]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[IRDA]: *_DONGLE should depend on IRTTY_SIR
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
554cfb7e
YH
77#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
1da177e4
LT
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
70ceb4f5
YH
101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
1da177e4
LT
109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
143struct fib6_node ip6_routing_table = {
144 .leaf = &ip6_null_entry,
145 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
146};
147
148/* Protects all the ip6 fib */
149
150DEFINE_RWLOCK(rt6_lock);
151
152
153/* allocate dst with ip6_dst_ops */
154static __inline__ struct rt6_info *ip6_dst_alloc(void)
155{
156 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
157}
158
159static void ip6_dst_destroy(struct dst_entry *dst)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (idev != NULL) {
165 rt->rt6i_idev = NULL;
166 in6_dev_put(idev);
167 }
168}
169
170static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
171 int how)
172{
173 struct rt6_info *rt = (struct rt6_info *)dst;
174 struct inet6_dev *idev = rt->rt6i_idev;
175
176 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
177 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
178 if (loopback_idev != NULL) {
179 rt->rt6i_idev = loopback_idev;
180 in6_dev_put(idev);
181 }
182 }
183}
184
185static __inline__ int rt6_check_expired(const struct rt6_info *rt)
186{
187 return (rt->rt6i_flags & RTF_EXPIRES &&
188 time_after(jiffies, rt->rt6i_expires));
189}
190
191/*
192 * Route lookup. Any rt6_lock is implied.
193 */
194
195static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
196 int oif,
197 int strict)
198{
199 struct rt6_info *local = NULL;
200 struct rt6_info *sprt;
201
202 if (oif) {
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct net_device *dev = sprt->rt6i_dev;
205 if (dev->ifindex == oif)
206 return sprt;
207 if (dev->flags & IFF_LOOPBACK) {
208 if (sprt->rt6i_idev == NULL ||
209 sprt->rt6i_idev->dev->ifindex != oif) {
210 if (strict && oif)
211 continue;
212 if (local && (!oif ||
213 local->rt6i_idev->dev->ifindex == oif))
214 continue;
215 }
216 local = sprt;
217 }
218 }
219
220 if (local)
221 return local;
222
223 if (strict)
224 return &ip6_null_entry;
225 }
226 return rt;
227}
228
27097255
YH
229#ifdef CONFIG_IPV6_ROUTER_PREF
230static void rt6_probe(struct rt6_info *rt)
231{
232 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
233 /*
234 * Okay, this does not seem to be appropriate
235 * for now, however, we need to check if it
236 * is really so; aka Router Reachability Probing.
237 *
238 * Router Reachability Probe MUST be rate-limited
239 * to no more than one per minute.
240 */
241 if (!neigh || (neigh->nud_state & NUD_VALID))
242 return;
243 read_lock_bh(&neigh->lock);
244 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 245 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
246 struct in6_addr mcaddr;
247 struct in6_addr *target;
248
249 neigh->updated = jiffies;
250 read_unlock_bh(&neigh->lock);
251
252 target = (struct in6_addr *)&neigh->primary_key;
253 addrconf_addr_solict_mult(target, &mcaddr);
254 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
255 } else
256 read_unlock_bh(&neigh->lock);
257}
258#else
259static inline void rt6_probe(struct rt6_info *rt)
260{
261 return;
262}
263#endif
264
1da177e4 265/*
554cfb7e 266 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 267 */
554cfb7e
YH
268static int inline rt6_check_dev(struct rt6_info *rt, int oif)
269{
270 struct net_device *dev = rt->rt6i_dev;
271 if (!oif || dev->ifindex == oif)
272 return 2;
273 if ((dev->flags & IFF_LOOPBACK) &&
274 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
275 return 1;
276 return 0;
277}
1da177e4 278
554cfb7e 279static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 280{
554cfb7e
YH
281 struct neighbour *neigh = rt->rt6i_nexthop;
282 int m = 0;
283 if (neigh) {
284 read_lock_bh(&neigh->lock);
285 if (neigh->nud_state & NUD_VALID)
286 m = 1;
287 read_unlock_bh(&neigh->lock);
1da177e4 288 }
554cfb7e 289 return m;
1da177e4
LT
290}
291
554cfb7e
YH
292static int rt6_score_route(struct rt6_info *rt, int oif,
293 int strict)
1da177e4 294{
554cfb7e
YH
295 int m = rt6_check_dev(rt, oif);
296 if (!m && (strict & RT6_SELECT_F_IFACE))
297 return -1;
ebacaaa0
YH
298#ifdef CONFIG_IPV6_ROUTER_PREF
299 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
300#endif
554cfb7e 301 if (rt6_check_neigh(rt))
ebacaaa0 302 m |= 16;
554cfb7e
YH
303 else if (strict & RT6_SELECT_F_REACHABLE)
304 return -1;
305 return m;
306}
307
308static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
309 int strict)
310{
311 struct rt6_info *match = NULL, *last = NULL;
312 struct rt6_info *rt, *rt0 = *head;
313 u32 metric;
314 int mpri = -1;
1da177e4 315
554cfb7e
YH
316 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
317 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 318
554cfb7e 319 for (rt = rt0, metric = rt0->rt6i_metric;
c302e6d5 320 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
554cfb7e
YH
321 rt = rt->u.next) {
322 int m;
1da177e4 323
554cfb7e 324 if (rt6_check_expired(rt))
1da177e4
LT
325 continue;
326
554cfb7e
YH
327 last = rt;
328
329 m = rt6_score_route(rt, oif, strict);
330 if (m < 0)
1da177e4 331 continue;
1da177e4 332
554cfb7e 333 if (m > mpri) {
27097255 334 rt6_probe(match);
554cfb7e 335 match = rt;
1da177e4 336 mpri = m;
27097255
YH
337 } else {
338 rt6_probe(rt);
1da177e4
LT
339 }
340 }
341
554cfb7e
YH
342 if (!match &&
343 (strict & RT6_SELECT_F_REACHABLE) &&
344 last && last != rt0) {
345 /* no entries matched; do round-robin */
c302e6d5
YH
346 static spinlock_t lock = SPIN_LOCK_UNLOCKED;
347 spin_lock(&lock);
554cfb7e
YH
348 *head = rt0->u.next;
349 rt0->u.next = last->u.next;
350 last->u.next = rt0;
c302e6d5 351 spin_unlock(&lock);
1da177e4 352 }
1da177e4 353
554cfb7e
YH
354 RT6_TRACE("%s() => %p, score=%d\n",
355 __FUNCTION__, match, mpri);
1da177e4 356
554cfb7e 357 return (match ? match : &ip6_null_entry);
1da177e4
LT
358}
359
70ceb4f5
YH
360#ifdef CONFIG_IPV6_ROUTE_INFO
361int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
362 struct in6_addr *gwaddr)
363{
364 struct route_info *rinfo = (struct route_info *) opt;
365 struct in6_addr prefix_buf, *prefix;
366 unsigned int pref;
367 u32 lifetime;
368 struct rt6_info *rt;
369
370 if (len < sizeof(struct route_info)) {
371 return -EINVAL;
372 }
373
374 /* Sanity check for prefix_len and length */
375 if (rinfo->length > 3) {
376 return -EINVAL;
377 } else if (rinfo->prefix_len > 128) {
378 return -EINVAL;
379 } else if (rinfo->prefix_len > 64) {
380 if (rinfo->length < 2) {
381 return -EINVAL;
382 }
383 } else if (rinfo->prefix_len > 0) {
384 if (rinfo->length < 1) {
385 return -EINVAL;
386 }
387 }
388
389 pref = rinfo->route_pref;
390 if (pref == ICMPV6_ROUTER_PREF_INVALID)
391 pref = ICMPV6_ROUTER_PREF_MEDIUM;
392
393 lifetime = htonl(rinfo->lifetime);
394 if (lifetime == 0xffffffff) {
395 /* infinity */
396 } else if (lifetime > 0x7fffffff/HZ) {
397 /* Avoid arithmetic overflow */
398 lifetime = 0x7fffffff/HZ - 1;
399 }
400
401 if (rinfo->length == 3)
402 prefix = (struct in6_addr *)rinfo->prefix;
403 else {
404 /* this function is safe */
405 ipv6_addr_prefix(&prefix_buf,
406 (struct in6_addr *)rinfo->prefix,
407 rinfo->prefix_len);
408 prefix = &prefix_buf;
409 }
410
411 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
412
413 if (rt && !lifetime) {
414 ip6_del_rt(rt, NULL, NULL, NULL);
415 rt = NULL;
416 }
417
418 if (!rt && lifetime)
419 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
420 pref);
421 else if (rt)
422 rt->rt6i_flags = RTF_ROUTEINFO |
423 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
424
425 if (rt) {
426 if (lifetime == 0xffffffff) {
427 rt->rt6i_flags &= ~RTF_EXPIRES;
428 } else {
429 rt->rt6i_expires = jiffies + HZ * lifetime;
430 rt->rt6i_flags |= RTF_EXPIRES;
431 }
432 dst_release(&rt->u.dst);
433 }
434 return 0;
435}
436#endif
437
1da177e4
LT
438struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
439 int oif, int strict)
440{
441 struct fib6_node *fn;
442 struct rt6_info *rt;
443
444 read_lock_bh(&rt6_lock);
445 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
446 rt = rt6_device_match(fn->leaf, oif, strict);
447 dst_hold(&rt->u.dst);
448 rt->u.dst.__use++;
449 read_unlock_bh(&rt6_lock);
450
451 rt->u.dst.lastuse = jiffies;
452 if (rt->u.dst.error == 0)
453 return rt;
454 dst_release(&rt->u.dst);
455 return NULL;
456}
457
458/* ip6_ins_rt is called with FREE rt6_lock.
459 It takes new route entry, the addition fails by any reason the
460 route is freed. In any case, if caller does not hold it, it may
461 be destroyed.
462 */
463
0d51aa80
JHS
464int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
465 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
466{
467 int err;
468
469 write_lock_bh(&rt6_lock);
0d51aa80 470 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
1da177e4
LT
471 write_unlock_bh(&rt6_lock);
472
473 return err;
474}
475
95a9a5ba
YH
476static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
477 struct in6_addr *saddr)
1da177e4 478{
1da177e4
LT
479 struct rt6_info *rt;
480
481 /*
482 * Clone the route.
483 */
484
485 rt = ip6_rt_copy(ort);
486
487 if (rt) {
58c4fb86
YH
488 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
489 if (rt->rt6i_dst.plen != 128 &&
490 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
491 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 492 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 493 }
1da177e4 494
58c4fb86 495 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
496 rt->rt6i_dst.plen = 128;
497 rt->rt6i_flags |= RTF_CACHE;
498 rt->u.dst.flags |= DST_HOST;
499
500#ifdef CONFIG_IPV6_SUBTREES
501 if (rt->rt6i_src.plen && saddr) {
502 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
503 rt->rt6i_src.plen = 128;
504 }
505#endif
506
507 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
508
95a9a5ba 509 }
1da177e4 510
95a9a5ba
YH
511 return rt;
512}
1da177e4 513
299d9939
YH
514static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
515{
516 struct rt6_info *rt = ip6_rt_copy(ort);
517 if (rt) {
518 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
519 rt->rt6i_dst.plen = 128;
520 rt->rt6i_flags |= RTF_CACHE;
521 if (rt->rt6i_flags & RTF_REJECT)
522 rt->u.dst.error = ort->u.dst.error;
523 rt->u.dst.flags |= DST_HOST;
524 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
525 }
526 return rt;
527}
528
1da177e4 529#define BACKTRACK() \
bb133964 530if (rt == &ip6_null_entry) { \
1da177e4
LT
531 while ((fn = fn->parent) != NULL) { \
532 if (fn->fn_flags & RTN_ROOT) { \
1da177e4
LT
533 goto out; \
534 } \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
537 } \
538}
539
540
541void ip6_route_input(struct sk_buff *skb)
542{
543 struct fib6_node *fn;
519fbd87 544 struct rt6_info *rt, *nrt;
1da177e4
LT
545 int strict;
546 int attempts = 3;
519fbd87 547 int err;
8238dd06 548 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 549
118f8c16 550 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
551
552relookup:
553 read_lock_bh(&rt6_lock);
554
8238dd06 555restart_2:
1da177e4
LT
556 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
557 &skb->nh.ipv6h->saddr);
558
559restart:
8238dd06 560 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
1da177e4 561 BACKTRACK();
8238dd06
YH
562 if (rt == &ip6_null_entry ||
563 rt->rt6i_flags & RTF_CACHE)
1ddef044 564 goto out;
1da177e4 565
fb9de91e
YH
566 dst_hold(&rt->u.dst);
567 read_unlock_bh(&rt6_lock);
568
519fbd87
YH
569 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
570 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
571 else {
572#if CLONE_OFFLINK_ROUTE
573 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
574#else
575 goto out2;
576#endif
577 }
e40cf353 578
519fbd87
YH
579 dst_release(&rt->u.dst);
580 rt = nrt ? : &ip6_null_entry;
1da177e4 581
519fbd87
YH
582 dst_hold(&rt->u.dst);
583 if (nrt) {
584 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
585 if (!err)
1da177e4 586 goto out2;
1da177e4 587 }
1da177e4 588
519fbd87
YH
589 if (--attempts <= 0)
590 goto out2;
591
592 /*
593 * Race condition! In the gap, when rt6_lock was
594 * released someone could insert this route. Relookup.
595 */
596 dst_release(&rt->u.dst);
597 goto relookup;
598
599out:
8238dd06
YH
600 if (reachable) {
601 reachable = 0;
602 goto restart_2;
603 }
519fbd87
YH
604 dst_hold(&rt->u.dst);
605 read_unlock_bh(&rt6_lock);
1da177e4
LT
606out2:
607 rt->u.dst.lastuse = jiffies;
608 rt->u.dst.__use++;
609 skb->dst = (struct dst_entry *) rt;
fb9de91e 610 return;
1da177e4
LT
611}
612
613struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
614{
615 struct fib6_node *fn;
519fbd87 616 struct rt6_info *rt, *nrt;
1da177e4
LT
617 int strict;
618 int attempts = 3;
519fbd87 619 int err;
8238dd06 620 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 621
554cfb7e 622 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
623
624relookup:
625 read_lock_bh(&rt6_lock);
626
8238dd06 627restart_2:
1da177e4
LT
628 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
629
630restart:
8238dd06 631 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
1ddef044 632 BACKTRACK();
8238dd06
YH
633 if (rt == &ip6_null_entry ||
634 rt->rt6i_flags & RTF_CACHE)
1da177e4 635 goto out;
1da177e4 636
fb9de91e
YH
637 dst_hold(&rt->u.dst);
638 read_unlock_bh(&rt6_lock);
639
519fbd87 640 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 641 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
642 else {
643#if CLONE_OFFLINK_ROUTE
644 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
645#else
646 goto out2;
647#endif
648 }
1da177e4 649
519fbd87
YH
650 dst_release(&rt->u.dst);
651 rt = nrt ? : &ip6_null_entry;
1da177e4 652
519fbd87
YH
653 dst_hold(&rt->u.dst);
654 if (nrt) {
655 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
656 if (!err)
1da177e4 657 goto out2;
1da177e4 658 }
e40cf353 659
519fbd87
YH
660 if (--attempts <= 0)
661 goto out2;
662
663 /*
664 * Race condition! In the gap, when rt6_lock was
665 * released someone could insert this route. Relookup.
666 */
667 dst_release(&rt->u.dst);
668 goto relookup;
669
670out:
8238dd06
YH
671 if (reachable) {
672 reachable = 0;
673 goto restart_2;
674 }
519fbd87
YH
675 dst_hold(&rt->u.dst);
676 read_unlock_bh(&rt6_lock);
1da177e4
LT
677out2:
678 rt->u.dst.lastuse = jiffies;
679 rt->u.dst.__use++;
680 return &rt->u.dst;
681}
682
683
684/*
685 * Destination cache support functions
686 */
687
688static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
689{
690 struct rt6_info *rt;
691
692 rt = (struct rt6_info *) dst;
693
694 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
695 return dst;
696
697 return NULL;
698}
699
700static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
701{
702 struct rt6_info *rt = (struct rt6_info *) dst;
703
704 if (rt) {
705 if (rt->rt6i_flags & RTF_CACHE)
0d51aa80 706 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
707 else
708 dst_release(dst);
709 }
710 return NULL;
711}
712
713static void ip6_link_failure(struct sk_buff *skb)
714{
715 struct rt6_info *rt;
716
717 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
718
719 rt = (struct rt6_info *) skb->dst;
720 if (rt) {
721 if (rt->rt6i_flags&RTF_CACHE) {
722 dst_set_expires(&rt->u.dst, 0);
723 rt->rt6i_flags |= RTF_EXPIRES;
724 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
725 rt->rt6i_node->fn_sernum = -1;
726 }
727}
728
729static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
730{
731 struct rt6_info *rt6 = (struct rt6_info*)dst;
732
733 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
734 rt6->rt6i_flags |= RTF_MODIFIED;
735 if (mtu < IPV6_MIN_MTU) {
736 mtu = IPV6_MIN_MTU;
737 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
738 }
739 dst->metrics[RTAX_MTU-1] = mtu;
740 }
741}
742
743/* Protected by rt6_lock. */
744static struct dst_entry *ndisc_dst_gc_list;
745static int ipv6_get_mtu(struct net_device *dev);
746
747static inline unsigned int ipv6_advmss(unsigned int mtu)
748{
749 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
750
751 if (mtu < ip6_rt_min_advmss)
752 mtu = ip6_rt_min_advmss;
753
754 /*
755 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
756 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
757 * IPV6_MAXPLEN is also valid and means: "any MSS,
758 * rely only on pmtu discovery"
759 */
760 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
761 mtu = IPV6_MAXPLEN;
762 return mtu;
763}
764
765struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
766 struct neighbour *neigh,
767 struct in6_addr *addr,
768 int (*output)(struct sk_buff *))
769{
770 struct rt6_info *rt;
771 struct inet6_dev *idev = in6_dev_get(dev);
772
773 if (unlikely(idev == NULL))
774 return NULL;
775
776 rt = ip6_dst_alloc();
777 if (unlikely(rt == NULL)) {
778 in6_dev_put(idev);
779 goto out;
780 }
781
782 dev_hold(dev);
783 if (neigh)
784 neigh_hold(neigh);
785 else
786 neigh = ndisc_get_neigh(dev, addr);
787
788 rt->rt6i_dev = dev;
789 rt->rt6i_idev = idev;
790 rt->rt6i_nexthop = neigh;
791 atomic_set(&rt->u.dst.__refcnt, 1);
792 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
793 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
794 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
795 rt->u.dst.output = output;
796
797#if 0 /* there's no chance to use these for ndisc */
798 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
799 ? DST_HOST
800 : 0;
801 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
802 rt->rt6i_dst.plen = 128;
803#endif
804
805 write_lock_bh(&rt6_lock);
806 rt->u.dst.next = ndisc_dst_gc_list;
807 ndisc_dst_gc_list = &rt->u.dst;
808 write_unlock_bh(&rt6_lock);
809
810 fib6_force_start_gc();
811
812out:
813 return (struct dst_entry *)rt;
814}
815
816int ndisc_dst_gc(int *more)
817{
818 struct dst_entry *dst, *next, **pprev;
819 int freed;
820
821 next = NULL;
822 pprev = &ndisc_dst_gc_list;
823 freed = 0;
824 while ((dst = *pprev) != NULL) {
825 if (!atomic_read(&dst->__refcnt)) {
826 *pprev = dst->next;
827 dst_free(dst);
828 freed++;
829 } else {
830 pprev = &dst->next;
831 (*more)++;
832 }
833 }
834
835 return freed;
836}
837
838static int ip6_dst_gc(void)
839{
840 static unsigned expire = 30*HZ;
841 static unsigned long last_gc;
842 unsigned long now = jiffies;
843
844 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
845 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
846 goto out;
847
848 expire++;
849 fib6_run_gc(expire);
850 last_gc = now;
851 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
852 expire = ip6_rt_gc_timeout>>1;
853
854out:
855 expire -= expire>>ip6_rt_gc_elasticity;
856 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
857}
858
859/* Clean host part of a prefix. Not necessary in radix tree,
860 but results in cleaner routing tables.
861
862 Remove it only when all the things will work!
863 */
864
865static int ipv6_get_mtu(struct net_device *dev)
866{
867 int mtu = IPV6_MIN_MTU;
868 struct inet6_dev *idev;
869
870 idev = in6_dev_get(dev);
871 if (idev) {
872 mtu = idev->cnf.mtu6;
873 in6_dev_put(idev);
874 }
875 return mtu;
876}
877
878int ipv6_get_hoplimit(struct net_device *dev)
879{
880 int hoplimit = ipv6_devconf.hop_limit;
881 struct inet6_dev *idev;
882
883 idev = in6_dev_get(dev);
884 if (idev) {
885 hoplimit = idev->cnf.hop_limit;
886 in6_dev_put(idev);
887 }
888 return hoplimit;
889}
890
891/*
892 *
893 */
894
0d51aa80
JHS
895int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
896 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
897{
898 int err;
899 struct rtmsg *r;
900 struct rtattr **rta;
901 struct rt6_info *rt = NULL;
902 struct net_device *dev = NULL;
903 struct inet6_dev *idev = NULL;
904 int addr_type;
905
906 rta = (struct rtattr **) _rtattr;
907
908 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
909 return -EINVAL;
910#ifndef CONFIG_IPV6_SUBTREES
911 if (rtmsg->rtmsg_src_len)
912 return -EINVAL;
913#endif
914 if (rtmsg->rtmsg_ifindex) {
915 err = -ENODEV;
916 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
917 if (!dev)
918 goto out;
919 idev = in6_dev_get(dev);
920 if (!idev)
921 goto out;
922 }
923
924 if (rtmsg->rtmsg_metric == 0)
925 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
926
927 rt = ip6_dst_alloc();
928
929 if (rt == NULL) {
930 err = -ENOMEM;
931 goto out;
932 }
933
934 rt->u.dst.obsolete = -1;
3dd4bc68 935 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1da177e4
LT
936 if (nlh && (r = NLMSG_DATA(nlh))) {
937 rt->rt6i_protocol = r->rtm_protocol;
938 } else {
939 rt->rt6i_protocol = RTPROT_BOOT;
940 }
941
942 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
943
944 if (addr_type & IPV6_ADDR_MULTICAST)
945 rt->u.dst.input = ip6_mc_input;
946 else
947 rt->u.dst.input = ip6_forward;
948
949 rt->u.dst.output = ip6_output;
950
951 ipv6_addr_prefix(&rt->rt6i_dst.addr,
952 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
953 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
954 if (rt->rt6i_dst.plen == 128)
955 rt->u.dst.flags = DST_HOST;
956
957#ifdef CONFIG_IPV6_SUBTREES
958 ipv6_addr_prefix(&rt->rt6i_src.addr,
959 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
960 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
961#endif
962
963 rt->rt6i_metric = rtmsg->rtmsg_metric;
964
965 /* We cannot add true routes via loopback here,
966 they would result in kernel looping; promote them to reject routes
967 */
968 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
969 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
970 /* hold loopback dev/idev if we haven't done so. */
971 if (dev != &loopback_dev) {
972 if (dev) {
973 dev_put(dev);
974 in6_dev_put(idev);
975 }
976 dev = &loopback_dev;
977 dev_hold(dev);
978 idev = in6_dev_get(dev);
979 if (!idev) {
980 err = -ENODEV;
981 goto out;
982 }
983 }
984 rt->u.dst.output = ip6_pkt_discard_out;
985 rt->u.dst.input = ip6_pkt_discard;
986 rt->u.dst.error = -ENETUNREACH;
987 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
988 goto install_route;
989 }
990
991 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
992 struct in6_addr *gw_addr;
993 int gwa_type;
994
995 gw_addr = &rtmsg->rtmsg_gateway;
996 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
997 gwa_type = ipv6_addr_type(gw_addr);
998
999 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1000 struct rt6_info *grt;
1001
1002 /* IPv6 strictly inhibits using not link-local
1003 addresses as nexthop address.
1004 Otherwise, router will not able to send redirects.
1005 It is very good, but in some (rare!) circumstances
1006 (SIT, PtP, NBMA NOARP links) it is handy to allow
1007 some exceptions. --ANK
1008 */
1009 err = -EINVAL;
1010 if (!(gwa_type&IPV6_ADDR_UNICAST))
1011 goto out;
1012
1013 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1014
1015 err = -EHOSTUNREACH;
1016 if (grt == NULL)
1017 goto out;
1018 if (dev) {
1019 if (dev != grt->rt6i_dev) {
1020 dst_release(&grt->u.dst);
1021 goto out;
1022 }
1023 } else {
1024 dev = grt->rt6i_dev;
1025 idev = grt->rt6i_idev;
1026 dev_hold(dev);
1027 in6_dev_hold(grt->rt6i_idev);
1028 }
1029 if (!(grt->rt6i_flags&RTF_GATEWAY))
1030 err = 0;
1031 dst_release(&grt->u.dst);
1032
1033 if (err)
1034 goto out;
1035 }
1036 err = -EINVAL;
1037 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1038 goto out;
1039 }
1040
1041 err = -ENODEV;
1042 if (dev == NULL)
1043 goto out;
1044
1045 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1046 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1047 if (IS_ERR(rt->rt6i_nexthop)) {
1048 err = PTR_ERR(rt->rt6i_nexthop);
1049 rt->rt6i_nexthop = NULL;
1050 goto out;
1051 }
1052 }
1053
1054 rt->rt6i_flags = rtmsg->rtmsg_flags;
1055
1056install_route:
1057 if (rta && rta[RTA_METRICS-1]) {
1058 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1059 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1060
1061 while (RTA_OK(attr, attrlen)) {
1062 unsigned flavor = attr->rta_type;
1063 if (flavor) {
1064 if (flavor > RTAX_MAX) {
1065 err = -EINVAL;
1066 goto out;
1067 }
1068 rt->u.dst.metrics[flavor-1] =
1069 *(u32 *)RTA_DATA(attr);
1070 }
1071 attr = RTA_NEXT(attr, attrlen);
1072 }
1073 }
1074
1075 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1076 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1077 if (!rt->u.dst.metrics[RTAX_MTU-1])
1078 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1079 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1080 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1081 rt->u.dst.dev = dev;
1082 rt->rt6i_idev = idev;
0d51aa80 1083 return ip6_ins_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1084
1085out:
1086 if (dev)
1087 dev_put(dev);
1088 if (idev)
1089 in6_dev_put(idev);
1090 if (rt)
1091 dst_free((struct dst_entry *) rt);
1092 return err;
1093}
1094
0d51aa80 1095int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1096{
1097 int err;
1098
1099 write_lock_bh(&rt6_lock);
1100
0d51aa80 1101 err = fib6_del(rt, nlh, _rtattr, req);
1da177e4
LT
1102 dst_release(&rt->u.dst);
1103
1104 write_unlock_bh(&rt6_lock);
1105
1106 return err;
1107}
1108
0d51aa80 1109static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1110{
1111 struct fib6_node *fn;
1112 struct rt6_info *rt;
1113 int err = -ESRCH;
1114
1115 read_lock_bh(&rt6_lock);
1116
1117 fn = fib6_locate(&ip6_routing_table,
1118 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1119 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1120
1121 if (fn) {
1122 for (rt = fn->leaf; rt; rt = rt->u.next) {
1123 if (rtmsg->rtmsg_ifindex &&
1124 (rt->rt6i_dev == NULL ||
1125 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1126 continue;
1127 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1128 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1129 continue;
1130 if (rtmsg->rtmsg_metric &&
1131 rtmsg->rtmsg_metric != rt->rt6i_metric)
1132 continue;
1133 dst_hold(&rt->u.dst);
1134 read_unlock_bh(&rt6_lock);
1135
0d51aa80 1136 return ip6_del_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1137 }
1138 }
1139 read_unlock_bh(&rt6_lock);
1140
1141 return err;
1142}
1143
1144/*
1145 * Handle redirects
1146 */
1147void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1148 struct neighbour *neigh, u8 *lladdr, int on_link)
1149{
e843b9e1
YH
1150 struct rt6_info *rt, *nrt = NULL;
1151 int strict;
1152 struct fib6_node *fn;
1da177e4
LT
1153
1154 /*
e843b9e1
YH
1155 * Get the "current" route for this destination and
1156 * check if the redirect has come from approriate router.
1157 *
1158 * RFC 2461 specifies that redirects should only be
1159 * accepted if they come from the nexthop to the target.
1160 * Due to the way the routes are chosen, this notion
1161 * is a bit fuzzy and one might need to check all possible
1162 * routes.
1da177e4 1163 */
e843b9e1 1164 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
1da177e4 1165
e843b9e1
YH
1166 read_lock_bh(&rt6_lock);
1167 fn = fib6_lookup(&ip6_routing_table, dest, NULL);
1168restart:
1169 for (rt = fn->leaf; rt; rt = rt->u.next) {
1170 /*
1171 * Current route is on-link; redirect is always invalid.
1172 *
1173 * Seems, previous statement is not true. It could
1174 * be node, which looks for us as on-link (f.e. proxy ndisc)
1175 * But then router serving it might decide, that we should
1176 * know truth 8)8) --ANK (980726).
1177 */
1178 if (rt6_check_expired(rt))
1179 continue;
1180 if (!(rt->rt6i_flags & RTF_GATEWAY))
1181 continue;
1182 if (neigh->dev != rt->rt6i_dev)
1183 continue;
1184 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1185 continue;
1186 break;
1187 }
1188 if (rt)
1189 dst_hold(&rt->u.dst);
1190 else if (strict) {
1191 while ((fn = fn->parent) != NULL) {
1192 if (fn->fn_flags & RTN_ROOT)
1193 break;
1194 if (fn->fn_flags & RTN_RTINFO)
1195 goto restart;
1da177e4 1196 }
e843b9e1
YH
1197 }
1198 read_unlock_bh(&rt6_lock);
1199
1200 if (!rt) {
1da177e4
LT
1201 if (net_ratelimit())
1202 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1203 "for redirect target\n");
e843b9e1 1204 return;
1da177e4
LT
1205 }
1206
1da177e4
LT
1207 /*
1208 * We have finally decided to accept it.
1209 */
1210
1211 neigh_update(neigh, lladdr, NUD_STALE,
1212 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1213 NEIGH_UPDATE_F_OVERRIDE|
1214 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1215 NEIGH_UPDATE_F_ISROUTER))
1216 );
1217
1218 /*
1219 * Redirect received -> path was valid.
1220 * Look, redirects are sent only in response to data packets,
1221 * so that this nexthop apparently is reachable. --ANK
1222 */
1223 dst_confirm(&rt->u.dst);
1224
1225 /* Duplicate redirect: silently ignore. */
1226 if (neigh == rt->u.dst.neighbour)
1227 goto out;
1228
1229 nrt = ip6_rt_copy(rt);
1230 if (nrt == NULL)
1231 goto out;
1232
1233 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1234 if (on_link)
1235 nrt->rt6i_flags &= ~RTF_GATEWAY;
1236
1237 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1238 nrt->rt6i_dst.plen = 128;
1239 nrt->u.dst.flags |= DST_HOST;
1240
1241 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1242 nrt->rt6i_nexthop = neigh_clone(neigh);
1243 /* Reset pmtu, it may be better */
1244 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1245 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1246
0d51aa80 1247 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1da177e4
LT
1248 goto out;
1249
1250 if (rt->rt6i_flags&RTF_CACHE) {
0d51aa80 1251 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1252 return;
1253 }
1254
1255out:
1256 dst_release(&rt->u.dst);
1257 return;
1258}
1259
1260/*
1261 * Handle ICMP "packet too big" messages
1262 * i.e. Path MTU discovery
1263 */
1264
1265void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1266 struct net_device *dev, u32 pmtu)
1267{
1268 struct rt6_info *rt, *nrt;
1269 int allfrag = 0;
1270
1271 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1272 if (rt == NULL)
1273 return;
1274
1275 if (pmtu >= dst_mtu(&rt->u.dst))
1276 goto out;
1277
1278 if (pmtu < IPV6_MIN_MTU) {
1279 /*
1280 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1281 * MTU (1280) and a fragment header should always be included
1282 * after a node receiving Too Big message reporting PMTU is
1283 * less than the IPv6 Minimum Link MTU.
1284 */
1285 pmtu = IPV6_MIN_MTU;
1286 allfrag = 1;
1287 }
1288
1289 /* New mtu received -> path was valid.
1290 They are sent only in response to data packets,
1291 so that this nexthop apparently is reachable. --ANK
1292 */
1293 dst_confirm(&rt->u.dst);
1294
1295 /* Host route. If it is static, it would be better
1296 not to override it, but add new one, so that
1297 when cache entry will expire old pmtu
1298 would return automatically.
1299 */
1300 if (rt->rt6i_flags & RTF_CACHE) {
1301 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1302 if (allfrag)
1303 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1304 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1305 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1306 goto out;
1307 }
1308
1309 /* Network route.
1310 Two cases are possible:
1311 1. It is connected route. Action: COW
1312 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1313 */
d5315b50 1314 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1315 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1316 else
1317 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1318
d5315b50 1319 if (nrt) {
a1e78363
YH
1320 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1321 if (allfrag)
1322 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1323
1324 /* According to RFC 1981, detecting PMTU increase shouldn't be
1325 * happened within 5 mins, the recommended timer is 10 mins.
1326 * Here this route expiration time is set to ip6_rt_mtu_expires
1327 * which is 10 mins. After 10 mins the decreased pmtu is expired
1328 * and detecting PMTU increase will be automatically happened.
1329 */
1330 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1331 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1332
1333 ip6_ins_rt(nrt, NULL, NULL, NULL);
1da177e4 1334 }
1da177e4
LT
1335out:
1336 dst_release(&rt->u.dst);
1337}
1338
1339/*
1340 * Misc support functions
1341 */
1342
1343static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1344{
1345 struct rt6_info *rt = ip6_dst_alloc();
1346
1347 if (rt) {
1348 rt->u.dst.input = ort->u.dst.input;
1349 rt->u.dst.output = ort->u.dst.output;
1350
1351 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1352 rt->u.dst.dev = ort->u.dst.dev;
1353 if (rt->u.dst.dev)
1354 dev_hold(rt->u.dst.dev);
1355 rt->rt6i_idev = ort->rt6i_idev;
1356 if (rt->rt6i_idev)
1357 in6_dev_hold(rt->rt6i_idev);
1358 rt->u.dst.lastuse = jiffies;
1359 rt->rt6i_expires = 0;
1360
1361 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1362 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1363 rt->rt6i_metric = 0;
1364
1365 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1366#ifdef CONFIG_IPV6_SUBTREES
1367 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1368#endif
1369 }
1370 return rt;
1371}
1372
70ceb4f5
YH
1373#ifdef CONFIG_IPV6_ROUTE_INFO
1374static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1375 struct in6_addr *gwaddr, int ifindex)
1376{
1377 struct fib6_node *fn;
1378 struct rt6_info *rt = NULL;
1379
1380 write_lock_bh(&rt6_lock);
1381 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1382 if (!fn)
1383 goto out;
1384
1385 for (rt = fn->leaf; rt; rt = rt->u.next) {
1386 if (rt->rt6i_dev->ifindex != ifindex)
1387 continue;
1388 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1389 continue;
1390 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1391 continue;
1392 dst_hold(&rt->u.dst);
1393 break;
1394 }
1395out:
1396 write_unlock_bh(&rt6_lock);
1397 return rt;
1398}
1399
1400static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1401 struct in6_addr *gwaddr, int ifindex,
1402 unsigned pref)
1403{
1404 struct in6_rtmsg rtmsg;
1405
1406 memset(&rtmsg, 0, sizeof(rtmsg));
1407 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1408 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1409 rtmsg.rtmsg_dst_len = prefixlen;
1410 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1411 rtmsg.rtmsg_metric = 1024;
1412 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
e317da96
YH
1413 /* We should treat it as a default route if prefix length is 0. */
1414 if (!prefixlen)
1415 rtmsg.rtmsg_flags |= RTF_DEFAULT;
70ceb4f5
YH
1416 rtmsg.rtmsg_ifindex = ifindex;
1417
1418 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1419
1420 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1421}
1422#endif
1423
1da177e4
LT
1424struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1425{
1426 struct rt6_info *rt;
1427 struct fib6_node *fn;
1428
1429 fn = &ip6_routing_table;
1430
1431 write_lock_bh(&rt6_lock);
1432 for (rt = fn->leaf; rt; rt=rt->u.next) {
1433 if (dev == rt->rt6i_dev &&
045927ff 1434 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1435 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1436 break;
1437 }
1438 if (rt)
1439 dst_hold(&rt->u.dst);
1440 write_unlock_bh(&rt6_lock);
1441 return rt;
1442}
1443
1444struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1445 struct net_device *dev,
1446 unsigned int pref)
1da177e4
LT
1447{
1448 struct in6_rtmsg rtmsg;
1449
1450 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1451 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1452 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1453 rtmsg.rtmsg_metric = 1024;
ebacaaa0
YH
1454 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1455 RTF_PREF(pref);
1da177e4
LT
1456
1457 rtmsg.rtmsg_ifindex = dev->ifindex;
1458
0d51aa80 1459 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1460 return rt6_get_dflt_router(gwaddr, dev);
1461}
1462
1463void rt6_purge_dflt_routers(void)
1464{
1465 struct rt6_info *rt;
1466
1467restart:
1468 read_lock_bh(&rt6_lock);
1469 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1470 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1471 dst_hold(&rt->u.dst);
1472
1da177e4
LT
1473 read_unlock_bh(&rt6_lock);
1474
0d51aa80 1475 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1476
1477 goto restart;
1478 }
1479 }
1480 read_unlock_bh(&rt6_lock);
1481}
1482
1483int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1484{
1485 struct in6_rtmsg rtmsg;
1486 int err;
1487
1488 switch(cmd) {
1489 case SIOCADDRT: /* Add a route */
1490 case SIOCDELRT: /* Delete a route */
1491 if (!capable(CAP_NET_ADMIN))
1492 return -EPERM;
1493 err = copy_from_user(&rtmsg, arg,
1494 sizeof(struct in6_rtmsg));
1495 if (err)
1496 return -EFAULT;
1497
1498 rtnl_lock();
1499 switch (cmd) {
1500 case SIOCADDRT:
0d51aa80 1501 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1502 break;
1503 case SIOCDELRT:
0d51aa80 1504 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1505 break;
1506 default:
1507 err = -EINVAL;
1508 }
1509 rtnl_unlock();
1510
1511 return err;
1512 };
1513
1514 return -EINVAL;
1515}
1516
1517/*
1518 * Drop the packet on the floor
1519 */
1520
20380731 1521static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4
LT
1522{
1523 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1524 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1525 kfree_skb(skb);
1526 return 0;
1527}
1528
20380731 1529static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1530{
1531 skb->dev = skb->dst->dev;
1532 return ip6_pkt_discard(skb);
1533}
1534
1535/*
1536 * Allocate a dst for local (unicast / anycast) address.
1537 */
1538
1539struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1540 const struct in6_addr *addr,
1541 int anycast)
1542{
1543 struct rt6_info *rt = ip6_dst_alloc();
1544
1545 if (rt == NULL)
1546 return ERR_PTR(-ENOMEM);
1547
1548 dev_hold(&loopback_dev);
1549 in6_dev_hold(idev);
1550
1551 rt->u.dst.flags = DST_HOST;
1552 rt->u.dst.input = ip6_input;
1553 rt->u.dst.output = ip6_output;
1554 rt->rt6i_dev = &loopback_dev;
1555 rt->rt6i_idev = idev;
1556 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1557 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1558 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1559 rt->u.dst.obsolete = -1;
1560
1561 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1562 if (anycast)
1563 rt->rt6i_flags |= RTF_ANYCAST;
1564 else
1da177e4
LT
1565 rt->rt6i_flags |= RTF_LOCAL;
1566 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1567 if (rt->rt6i_nexthop == NULL) {
1568 dst_free((struct dst_entry *) rt);
1569 return ERR_PTR(-ENOMEM);
1570 }
1571
1572 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1573 rt->rt6i_dst.plen = 128;
1574
1575 atomic_set(&rt->u.dst.__refcnt, 1);
1576
1577 return rt;
1578}
1579
1580static int fib6_ifdown(struct rt6_info *rt, void *arg)
1581{
1582 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1583 rt != &ip6_null_entry) {
1584 RT6_TRACE("deleted by ifdown %p\n", rt);
1585 return -1;
1586 }
1587 return 0;
1588}
1589
1590void rt6_ifdown(struct net_device *dev)
1591{
1592 write_lock_bh(&rt6_lock);
1593 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1594 write_unlock_bh(&rt6_lock);
1595}
1596
1597struct rt6_mtu_change_arg
1598{
1599 struct net_device *dev;
1600 unsigned mtu;
1601};
1602
1603static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1604{
1605 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1606 struct inet6_dev *idev;
1607
1608 /* In IPv6 pmtu discovery is not optional,
1609 so that RTAX_MTU lock cannot disable it.
1610 We still use this lock to block changes
1611 caused by addrconf/ndisc.
1612 */
1613
1614 idev = __in6_dev_get(arg->dev);
1615 if (idev == NULL)
1616 return 0;
1617
1618 /* For administrative MTU increase, there is no way to discover
1619 IPv6 PMTU increase, so PMTU increase should be updated here.
1620 Since RFC 1981 doesn't include administrative MTU increase
1621 update PMTU increase is a MUST. (i.e. jumbo frame)
1622 */
1623 /*
1624 If new MTU is less than route PMTU, this new MTU will be the
1625 lowest MTU in the path, update the route PMTU to reflect PMTU
1626 decreases; if new MTU is greater than route PMTU, and the
1627 old MTU is the lowest MTU in the path, update the route PMTU
1628 to reflect the increase. In this case if the other nodes' MTU
1629 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1630 PMTU discouvery.
1631 */
1632 if (rt->rt6i_dev == arg->dev &&
1633 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1634 (dst_mtu(&rt->u.dst) > arg->mtu ||
1635 (dst_mtu(&rt->u.dst) < arg->mtu &&
1636 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1637 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1638 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1639 return 0;
1640}
1641
1642void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1643{
1644 struct rt6_mtu_change_arg arg;
1645
1646 arg.dev = dev;
1647 arg.mtu = mtu;
1648 read_lock_bh(&rt6_lock);
1649 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1650 read_unlock_bh(&rt6_lock);
1651}
1652
1653static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1654 struct in6_rtmsg *rtmsg)
1655{
1656 memset(rtmsg, 0, sizeof(*rtmsg));
1657
1658 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1659 rtmsg->rtmsg_src_len = r->rtm_src_len;
1660 rtmsg->rtmsg_flags = RTF_UP;
1661 if (r->rtm_type == RTN_UNREACHABLE)
1662 rtmsg->rtmsg_flags |= RTF_REJECT;
1663
1664 if (rta[RTA_GATEWAY-1]) {
1665 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1666 return -EINVAL;
1667 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1668 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1669 }
1670 if (rta[RTA_DST-1]) {
1671 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1672 return -EINVAL;
1673 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1674 }
1675 if (rta[RTA_SRC-1]) {
1676 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1677 return -EINVAL;
1678 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1679 }
1680 if (rta[RTA_OIF-1]) {
1681 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1682 return -EINVAL;
1683 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1684 }
1685 if (rta[RTA_PRIORITY-1]) {
1686 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1687 return -EINVAL;
1688 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1689 }
1690 return 0;
1691}
1692
1693int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1694{
1695 struct rtmsg *r = NLMSG_DATA(nlh);
1696 struct in6_rtmsg rtmsg;
1697
1698 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1699 return -EINVAL;
0d51aa80 1700 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1701}
1702
1703int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1704{
1705 struct rtmsg *r = NLMSG_DATA(nlh);
1706 struct in6_rtmsg rtmsg;
1707
1708 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1709 return -EINVAL;
0d51aa80 1710 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1711}
1712
1713struct rt6_rtnl_dump_arg
1714{
1715 struct sk_buff *skb;
1716 struct netlink_callback *cb;
1717};
1718
1719static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1720 struct in6_addr *dst, struct in6_addr *src,
1721 int iif, int type, u32 pid, u32 seq,
1722 int prefix, unsigned int flags)
1da177e4
LT
1723{
1724 struct rtmsg *rtm;
1725 struct nlmsghdr *nlh;
1726 unsigned char *b = skb->tail;
1727 struct rta_cacheinfo ci;
1728
1729 if (prefix) { /* user wants prefix routes only */
1730 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1731 /* success since this is not a prefix route */
1732 return 1;
1733 }
1734 }
1735
b6544c0b 1736 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1da177e4
LT
1737 rtm = NLMSG_DATA(nlh);
1738 rtm->rtm_family = AF_INET6;
1739 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1740 rtm->rtm_src_len = rt->rt6i_src.plen;
1741 rtm->rtm_tos = 0;
1742 rtm->rtm_table = RT_TABLE_MAIN;
1743 if (rt->rt6i_flags&RTF_REJECT)
1744 rtm->rtm_type = RTN_UNREACHABLE;
1745 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1746 rtm->rtm_type = RTN_LOCAL;
1747 else
1748 rtm->rtm_type = RTN_UNICAST;
1749 rtm->rtm_flags = 0;
1750 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1751 rtm->rtm_protocol = rt->rt6i_protocol;
1752 if (rt->rt6i_flags&RTF_DYNAMIC)
1753 rtm->rtm_protocol = RTPROT_REDIRECT;
1754 else if (rt->rt6i_flags & RTF_ADDRCONF)
1755 rtm->rtm_protocol = RTPROT_KERNEL;
1756 else if (rt->rt6i_flags&RTF_DEFAULT)
1757 rtm->rtm_protocol = RTPROT_RA;
1758
1759 if (rt->rt6i_flags&RTF_CACHE)
1760 rtm->rtm_flags |= RTM_F_CLONED;
1761
1762 if (dst) {
1763 RTA_PUT(skb, RTA_DST, 16, dst);
1764 rtm->rtm_dst_len = 128;
1765 } else if (rtm->rtm_dst_len)
1766 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1767#ifdef CONFIG_IPV6_SUBTREES
1768 if (src) {
1769 RTA_PUT(skb, RTA_SRC, 16, src);
1770 rtm->rtm_src_len = 128;
1771 } else if (rtm->rtm_src_len)
1772 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1773#endif
1774 if (iif)
1775 RTA_PUT(skb, RTA_IIF, 4, &iif);
1776 else if (dst) {
1777 struct in6_addr saddr_buf;
1778 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1779 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1780 }
1781 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1782 goto rtattr_failure;
1783 if (rt->u.dst.neighbour)
1784 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1785 if (rt->u.dst.dev)
1786 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1787 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1788 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1789 if (rt->rt6i_expires)
1790 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1791 else
1792 ci.rta_expires = 0;
1793 ci.rta_used = rt->u.dst.__use;
1794 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1795 ci.rta_error = rt->u.dst.error;
1796 ci.rta_id = 0;
1797 ci.rta_ts = 0;
1798 ci.rta_tsage = 0;
1799 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1800 nlh->nlmsg_len = skb->tail - b;
1801 return skb->len;
1802
1803nlmsg_failure:
1804rtattr_failure:
1805 skb_trim(skb, b - skb->data);
1806 return -1;
1807}
1808
1809static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1810{
1811 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1812 int prefix;
1813
1814 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1815 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1816 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1817 } else
1818 prefix = 0;
1819
1820 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1821 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 1822 prefix, NLM_F_MULTI);
1da177e4
LT
1823}
1824
1825static int fib6_dump_node(struct fib6_walker_t *w)
1826{
1827 int res;
1828 struct rt6_info *rt;
1829
1830 for (rt = w->leaf; rt; rt = rt->u.next) {
1831 res = rt6_dump_route(rt, w->args);
1832 if (res < 0) {
1833 /* Frame is full, suspend walking */
1834 w->leaf = rt;
1835 return 1;
1836 }
1837 BUG_TRAP(res!=0);
1838 }
1839 w->leaf = NULL;
1840 return 0;
1841}
1842
1843static void fib6_dump_end(struct netlink_callback *cb)
1844{
1845 struct fib6_walker_t *w = (void*)cb->args[0];
1846
1847 if (w) {
1848 cb->args[0] = 0;
1849 fib6_walker_unlink(w);
1850 kfree(w);
1851 }
efacfbcb
HX
1852 cb->done = (void*)cb->args[1];
1853 cb->args[1] = 0;
1da177e4
LT
1854}
1855
1856static int fib6_dump_done(struct netlink_callback *cb)
1857{
1858 fib6_dump_end(cb);
a8f74b22 1859 return cb->done ? cb->done(cb) : 0;
1da177e4
LT
1860}
1861
1862int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1863{
1864 struct rt6_rtnl_dump_arg arg;
1865 struct fib6_walker_t *w;
1866 int res;
1867
1868 arg.skb = skb;
1869 arg.cb = cb;
1870
1871 w = (void*)cb->args[0];
1872 if (w == NULL) {
1873 /* New dump:
1874 *
1875 * 1. hook callback destructor.
1876 */
1877 cb->args[1] = (long)cb->done;
1878 cb->done = fib6_dump_done;
1879
1880 /*
1881 * 2. allocate and initialize walker.
1882 */
0c600eda 1883 w = kzalloc(sizeof(*w), GFP_ATOMIC);
1da177e4
LT
1884 if (w == NULL)
1885 return -ENOMEM;
1886 RT6_TRACE("dump<%p", w);
1da177e4
LT
1887 w->root = &ip6_routing_table;
1888 w->func = fib6_dump_node;
1889 w->args = &arg;
1890 cb->args[0] = (long)w;
1891 read_lock_bh(&rt6_lock);
1892 res = fib6_walk(w);
1893 read_unlock_bh(&rt6_lock);
1894 } else {
1895 w->args = &arg;
1896 read_lock_bh(&rt6_lock);
1897 res = fib6_walk_continue(w);
1898 read_unlock_bh(&rt6_lock);
1899 }
1900#if RT6_DEBUG >= 3
1901 if (res <= 0 && skb->len == 0)
1902 RT6_TRACE("%p>dump end\n", w);
1903#endif
1904 res = res < 0 ? res : skb->len;
1905 /* res < 0 is an error. (really, impossible)
1906 res == 0 means that dump is complete, but skb still can contain data.
1907 res > 0 dump is not complete, but frame is full.
1908 */
1909 /* Destroy walker, if dump of this table is complete. */
1910 if (res <= 0)
1911 fib6_dump_end(cb);
1912 return res;
1913}
1914
1915int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1916{
1917 struct rtattr **rta = arg;
1918 int iif = 0;
1919 int err = -ENOBUFS;
1920 struct sk_buff *skb;
1921 struct flowi fl;
1922 struct rt6_info *rt;
1923
1924 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1925 if (skb == NULL)
1926 goto out;
1927
1928 /* Reserve room for dummy headers, this skb can pass
1929 through good chunk of routing engine.
1930 */
1931 skb->mac.raw = skb->data;
1932 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1933
1934 memset(&fl, 0, sizeof(fl));
1935 if (rta[RTA_SRC-1])
1936 ipv6_addr_copy(&fl.fl6_src,
1937 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1938 if (rta[RTA_DST-1])
1939 ipv6_addr_copy(&fl.fl6_dst,
1940 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1941
1942 if (rta[RTA_IIF-1])
1943 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1944
1945 if (iif) {
1946 struct net_device *dev;
1947 dev = __dev_get_by_index(iif);
1948 if (!dev) {
1949 err = -ENODEV;
1950 goto out_free;
1951 }
1952 }
1953
1954 fl.oif = 0;
1955 if (rta[RTA_OIF-1])
1956 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1957
1958 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1959
1960 skb->dst = &rt->u.dst;
1961
1962 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1963 err = rt6_fill_node(skb, rt,
1964 &fl.fl6_dst, &fl.fl6_src,
1965 iif,
1966 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 1967 nlh->nlmsg_seq, 0, 0);
1da177e4
LT
1968 if (err < 0) {
1969 err = -EMSGSIZE;
1970 goto out_free;
1971 }
1972
1973 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1974 if (err > 0)
1975 err = 0;
1976out:
1977 return err;
1978out_free:
1979 kfree_skb(skb);
1980 goto out;
1981}
1982
0d51aa80
JHS
1983void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1984 struct netlink_skb_parms *req)
1da177e4
LT
1985{
1986 struct sk_buff *skb;
1987 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
0d51aa80
JHS
1988 u32 pid = current->pid;
1989 u32 seq = 0;
1da177e4 1990
0d51aa80
JHS
1991 if (req)
1992 pid = req->pid;
1993 if (nlh)
1994 seq = nlh->nlmsg_seq;
1995
1da177e4
LT
1996 skb = alloc_skb(size, gfp_any());
1997 if (!skb) {
ac6d439d 1998 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1da177e4
LT
1999 return;
2000 }
0d51aa80 2001 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1da177e4 2002 kfree_skb(skb);
ac6d439d 2003 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1da177e4
LT
2004 return;
2005 }
ac6d439d
PM
2006 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2007 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1da177e4
LT
2008}
2009
2010/*
2011 * /proc
2012 */
2013
2014#ifdef CONFIG_PROC_FS
2015
2016#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2017
2018struct rt6_proc_arg
2019{
2020 char *buffer;
2021 int offset;
2022 int length;
2023 int skip;
2024 int len;
2025};
2026
2027static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2028{
2029 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2030 int i;
2031
2032 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2033 arg->skip++;
2034 return 0;
2035 }
2036
2037 if (arg->len >= arg->length)
2038 return 0;
2039
2040 for (i=0; i<16; i++) {
2041 sprintf(arg->buffer + arg->len, "%02x",
2042 rt->rt6i_dst.addr.s6_addr[i]);
2043 arg->len += 2;
2044 }
2045 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2046 rt->rt6i_dst.plen);
2047
2048#ifdef CONFIG_IPV6_SUBTREES
2049 for (i=0; i<16; i++) {
2050 sprintf(arg->buffer + arg->len, "%02x",
2051 rt->rt6i_src.addr.s6_addr[i]);
2052 arg->len += 2;
2053 }
2054 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2055 rt->rt6i_src.plen);
2056#else
2057 sprintf(arg->buffer + arg->len,
2058 "00000000000000000000000000000000 00 ");
2059 arg->len += 36;
2060#endif
2061
2062 if (rt->rt6i_nexthop) {
2063 for (i=0; i<16; i++) {
2064 sprintf(arg->buffer + arg->len, "%02x",
2065 rt->rt6i_nexthop->primary_key[i]);
2066 arg->len += 2;
2067 }
2068 } else {
2069 sprintf(arg->buffer + arg->len,
2070 "00000000000000000000000000000000");
2071 arg->len += 32;
2072 }
2073 arg->len += sprintf(arg->buffer + arg->len,
2074 " %08x %08x %08x %08x %8s\n",
2075 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2076 rt->u.dst.__use, rt->rt6i_flags,
2077 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2078 return 0;
2079}
2080
2081static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2082{
2083 struct rt6_proc_arg arg;
2084 arg.buffer = buffer;
2085 arg.offset = offset;
2086 arg.length = length;
2087 arg.skip = 0;
2088 arg.len = 0;
2089
2090 read_lock_bh(&rt6_lock);
2091 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2092 read_unlock_bh(&rt6_lock);
2093
2094 *start = buffer;
2095 if (offset)
2096 *start += offset % RT6_INFO_LEN;
2097
2098 arg.len -= offset % RT6_INFO_LEN;
2099
2100 if (arg.len > length)
2101 arg.len = length;
2102 if (arg.len < 0)
2103 arg.len = 0;
2104
2105 return arg.len;
2106}
2107
1da177e4
LT
2108static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2109{
2110 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2111 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2112 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2113 rt6_stats.fib_rt_cache,
2114 atomic_read(&ip6_dst_ops.entries),
2115 rt6_stats.fib_discarded_routes);
2116
2117 return 0;
2118}
2119
2120static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2121{
2122 return single_open(file, rt6_stats_seq_show, NULL);
2123}
2124
2125static struct file_operations rt6_stats_seq_fops = {
2126 .owner = THIS_MODULE,
2127 .open = rt6_stats_seq_open,
2128 .read = seq_read,
2129 .llseek = seq_lseek,
2130 .release = single_release,
2131};
2132#endif /* CONFIG_PROC_FS */
2133
2134#ifdef CONFIG_SYSCTL
2135
2136static int flush_delay;
2137
2138static
2139int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2140 void __user *buffer, size_t *lenp, loff_t *ppos)
2141{
2142 if (write) {
2143 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2144 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2145 return 0;
2146 } else
2147 return -EINVAL;
2148}
2149
2150ctl_table ipv6_route_table[] = {
2151 {
2152 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2153 .procname = "flush",
2154 .data = &flush_delay,
2155 .maxlen = sizeof(int),
89c8b3a1 2156 .mode = 0200,
1da177e4
LT
2157 .proc_handler = &ipv6_sysctl_rtcache_flush
2158 },
2159 {
2160 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2161 .procname = "gc_thresh",
2162 .data = &ip6_dst_ops.gc_thresh,
2163 .maxlen = sizeof(int),
2164 .mode = 0644,
2165 .proc_handler = &proc_dointvec,
2166 },
2167 {
2168 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2169 .procname = "max_size",
2170 .data = &ip6_rt_max_size,
2171 .maxlen = sizeof(int),
2172 .mode = 0644,
2173 .proc_handler = &proc_dointvec,
2174 },
2175 {
2176 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2177 .procname = "gc_min_interval",
2178 .data = &ip6_rt_gc_min_interval,
2179 .maxlen = sizeof(int),
2180 .mode = 0644,
2181 .proc_handler = &proc_dointvec_jiffies,
2182 .strategy = &sysctl_jiffies,
2183 },
2184 {
2185 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2186 .procname = "gc_timeout",
2187 .data = &ip6_rt_gc_timeout,
2188 .maxlen = sizeof(int),
2189 .mode = 0644,
2190 .proc_handler = &proc_dointvec_jiffies,
2191 .strategy = &sysctl_jiffies,
2192 },
2193 {
2194 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2195 .procname = "gc_interval",
2196 .data = &ip6_rt_gc_interval,
2197 .maxlen = sizeof(int),
2198 .mode = 0644,
2199 .proc_handler = &proc_dointvec_jiffies,
2200 .strategy = &sysctl_jiffies,
2201 },
2202 {
2203 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2204 .procname = "gc_elasticity",
2205 .data = &ip6_rt_gc_elasticity,
2206 .maxlen = sizeof(int),
2207 .mode = 0644,
2208 .proc_handler = &proc_dointvec_jiffies,
2209 .strategy = &sysctl_jiffies,
2210 },
2211 {
2212 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2213 .procname = "mtu_expires",
2214 .data = &ip6_rt_mtu_expires,
2215 .maxlen = sizeof(int),
2216 .mode = 0644,
2217 .proc_handler = &proc_dointvec_jiffies,
2218 .strategy = &sysctl_jiffies,
2219 },
2220 {
2221 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2222 .procname = "min_adv_mss",
2223 .data = &ip6_rt_min_advmss,
2224 .maxlen = sizeof(int),
2225 .mode = 0644,
2226 .proc_handler = &proc_dointvec_jiffies,
2227 .strategy = &sysctl_jiffies,
2228 },
2229 {
2230 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2231 .procname = "gc_min_interval_ms",
2232 .data = &ip6_rt_gc_min_interval,
2233 .maxlen = sizeof(int),
2234 .mode = 0644,
2235 .proc_handler = &proc_dointvec_ms_jiffies,
2236 .strategy = &sysctl_ms_jiffies,
2237 },
2238 { .ctl_name = 0 }
2239};
2240
2241#endif
2242
2243void __init ip6_route_init(void)
2244{
2245 struct proc_dir_entry *p;
2246
2247 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2248 sizeof(struct rt6_info),
2249 0, SLAB_HWCACHE_ALIGN,
2250 NULL, NULL);
2251 if (!ip6_dst_ops.kmem_cachep)
2252 panic("cannot create ip6_dst_cache");
2253
2254 fib6_init();
2255#ifdef CONFIG_PROC_FS
2256 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2257 if (p)
2258 p->owner = THIS_MODULE;
2259
2260 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2261#endif
2262#ifdef CONFIG_XFRM
2263 xfrm6_init();
2264#endif
2265}
2266
2267void ip6_route_cleanup(void)
2268{
2269#ifdef CONFIG_PROC_FS
2270 proc_net_remove("ipv6_route");
2271 proc_net_remove("rt6_stats");
2272#endif
2273#ifdef CONFIG_XFRM
2274 xfrm6_fini();
2275#endif
2276 rt6_ifdown(NULL);
2277 fib6_gc_cleanup();
2278 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2279}