]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[IPV6]: ROUTE: Add accept_ra_rtr_pref sysctl.
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
554cfb7e
YH
77#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
1da177e4
LT
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101static struct dst_ops ip6_dst_ops = {
102 .family = AF_INET6,
103 .protocol = __constant_htons(ETH_P_IPV6),
104 .gc = ip6_dst_gc,
105 .gc_thresh = 1024,
106 .check = ip6_dst_check,
107 .destroy = ip6_dst_destroy,
108 .ifdown = ip6_dst_ifdown,
109 .negative_advice = ip6_negative_advice,
110 .link_failure = ip6_link_failure,
111 .update_pmtu = ip6_rt_update_pmtu,
112 .entry_size = sizeof(struct rt6_info),
113};
114
115struct rt6_info ip6_null_entry = {
116 .u = {
117 .dst = {
118 .__refcnt = ATOMIC_INIT(1),
119 .__use = 1,
120 .dev = &loopback_dev,
121 .obsolete = -1,
122 .error = -ENETUNREACH,
123 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
124 .input = ip6_pkt_discard,
125 .output = ip6_pkt_discard_out,
126 .ops = &ip6_dst_ops,
127 .path = (struct dst_entry*)&ip6_null_entry,
128 }
129 },
130 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
131 .rt6i_metric = ~(u32) 0,
132 .rt6i_ref = ATOMIC_INIT(1),
133};
134
135struct fib6_node ip6_routing_table = {
136 .leaf = &ip6_null_entry,
137 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138};
139
140/* Protects all the ip6 fib */
141
142DEFINE_RWLOCK(rt6_lock);
143
144
145/* allocate dst with ip6_dst_ops */
146static __inline__ struct rt6_info *ip6_dst_alloc(void)
147{
148 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149}
150
151static void ip6_dst_destroy(struct dst_entry *dst)
152{
153 struct rt6_info *rt = (struct rt6_info *)dst;
154 struct inet6_dev *idev = rt->rt6i_idev;
155
156 if (idev != NULL) {
157 rt->rt6i_idev = NULL;
158 in6_dev_put(idev);
159 }
160}
161
162static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163 int how)
164{
165 struct rt6_info *rt = (struct rt6_info *)dst;
166 struct inet6_dev *idev = rt->rt6i_idev;
167
168 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170 if (loopback_idev != NULL) {
171 rt->rt6i_idev = loopback_idev;
172 in6_dev_put(idev);
173 }
174 }
175}
176
177static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178{
179 return (rt->rt6i_flags & RTF_EXPIRES &&
180 time_after(jiffies, rt->rt6i_expires));
181}
182
183/*
184 * Route lookup. Any rt6_lock is implied.
185 */
186
187static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188 int oif,
189 int strict)
190{
191 struct rt6_info *local = NULL;
192 struct rt6_info *sprt;
193
194 if (oif) {
195 for (sprt = rt; sprt; sprt = sprt->u.next) {
196 struct net_device *dev = sprt->rt6i_dev;
197 if (dev->ifindex == oif)
198 return sprt;
199 if (dev->flags & IFF_LOOPBACK) {
200 if (sprt->rt6i_idev == NULL ||
201 sprt->rt6i_idev->dev->ifindex != oif) {
202 if (strict && oif)
203 continue;
204 if (local && (!oif ||
205 local->rt6i_idev->dev->ifindex == oif))
206 continue;
207 }
208 local = sprt;
209 }
210 }
211
212 if (local)
213 return local;
214
215 if (strict)
216 return &ip6_null_entry;
217 }
218 return rt;
219}
220
27097255
YH
221#ifdef CONFIG_IPV6_ROUTER_PREF
222static void rt6_probe(struct rt6_info *rt)
223{
224 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
225 /*
226 * Okay, this does not seem to be appropriate
227 * for now, however, we need to check if it
228 * is really so; aka Router Reachability Probing.
229 *
230 * Router Reachability Probe MUST be rate-limited
231 * to no more than one per minute.
232 */
233 if (!neigh || (neigh->nud_state & NUD_VALID))
234 return;
235 read_lock_bh(&neigh->lock);
236 if (!(neigh->nud_state & NUD_VALID) &&
237 time_after(jiffies, neigh->updated + 60 * HZ)) {
238 struct in6_addr mcaddr;
239 struct in6_addr *target;
240
241 neigh->updated = jiffies;
242 read_unlock_bh(&neigh->lock);
243
244 target = (struct in6_addr *)&neigh->primary_key;
245 addrconf_addr_solict_mult(target, &mcaddr);
246 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
247 } else
248 read_unlock_bh(&neigh->lock);
249}
250#else
251static inline void rt6_probe(struct rt6_info *rt)
252{
253 return;
254}
255#endif
256
1da177e4 257/*
554cfb7e 258 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 259 */
554cfb7e
YH
260static int inline rt6_check_dev(struct rt6_info *rt, int oif)
261{
262 struct net_device *dev = rt->rt6i_dev;
263 if (!oif || dev->ifindex == oif)
264 return 2;
265 if ((dev->flags & IFF_LOOPBACK) &&
266 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
267 return 1;
268 return 0;
269}
1da177e4 270
554cfb7e 271static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 272{
554cfb7e
YH
273 struct neighbour *neigh = rt->rt6i_nexthop;
274 int m = 0;
275 if (neigh) {
276 read_lock_bh(&neigh->lock);
277 if (neigh->nud_state & NUD_VALID)
278 m = 1;
279 read_unlock_bh(&neigh->lock);
1da177e4 280 }
554cfb7e 281 return m;
1da177e4
LT
282}
283
554cfb7e
YH
284static int rt6_score_route(struct rt6_info *rt, int oif,
285 int strict)
1da177e4 286{
554cfb7e
YH
287 int m = rt6_check_dev(rt, oif);
288 if (!m && (strict & RT6_SELECT_F_IFACE))
289 return -1;
ebacaaa0
YH
290#ifdef CONFIG_IPV6_ROUTER_PREF
291 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
292#endif
554cfb7e 293 if (rt6_check_neigh(rt))
ebacaaa0 294 m |= 16;
554cfb7e
YH
295 else if (strict & RT6_SELECT_F_REACHABLE)
296 return -1;
297 return m;
298}
299
300static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
301 int strict)
302{
303 struct rt6_info *match = NULL, *last = NULL;
304 struct rt6_info *rt, *rt0 = *head;
305 u32 metric;
306 int mpri = -1;
1da177e4 307
554cfb7e
YH
308 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
309 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 310
554cfb7e
YH
311 for (rt = rt0, metric = rt0->rt6i_metric;
312 rt && rt->rt6i_metric == metric;
313 rt = rt->u.next) {
314 int m;
1da177e4 315
554cfb7e 316 if (rt6_check_expired(rt))
1da177e4
LT
317 continue;
318
554cfb7e
YH
319 last = rt;
320
321 m = rt6_score_route(rt, oif, strict);
322 if (m < 0)
1da177e4 323 continue;
1da177e4 324
554cfb7e 325 if (m > mpri) {
27097255 326 rt6_probe(match);
554cfb7e 327 match = rt;
1da177e4 328 mpri = m;
27097255
YH
329 } else {
330 rt6_probe(rt);
1da177e4
LT
331 }
332 }
333
554cfb7e
YH
334 if (!match &&
335 (strict & RT6_SELECT_F_REACHABLE) &&
336 last && last != rt0) {
337 /* no entries matched; do round-robin */
338 *head = rt0->u.next;
339 rt0->u.next = last->u.next;
340 last->u.next = rt0;
1da177e4 341 }
1da177e4 342
554cfb7e
YH
343 RT6_TRACE("%s() => %p, score=%d\n",
344 __FUNCTION__, match, mpri);
1da177e4 345
554cfb7e 346 return (match ? match : &ip6_null_entry);
1da177e4
LT
347}
348
349struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
350 int oif, int strict)
351{
352 struct fib6_node *fn;
353 struct rt6_info *rt;
354
355 read_lock_bh(&rt6_lock);
356 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
357 rt = rt6_device_match(fn->leaf, oif, strict);
358 dst_hold(&rt->u.dst);
359 rt->u.dst.__use++;
360 read_unlock_bh(&rt6_lock);
361
362 rt->u.dst.lastuse = jiffies;
363 if (rt->u.dst.error == 0)
364 return rt;
365 dst_release(&rt->u.dst);
366 return NULL;
367}
368
369/* ip6_ins_rt is called with FREE rt6_lock.
370 It takes new route entry, the addition fails by any reason the
371 route is freed. In any case, if caller does not hold it, it may
372 be destroyed.
373 */
374
0d51aa80
JHS
375int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
376 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
377{
378 int err;
379
380 write_lock_bh(&rt6_lock);
0d51aa80 381 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
1da177e4
LT
382 write_unlock_bh(&rt6_lock);
383
384 return err;
385}
386
95a9a5ba
YH
387static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
388 struct in6_addr *saddr)
1da177e4 389{
1da177e4
LT
390 struct rt6_info *rt;
391
392 /*
393 * Clone the route.
394 */
395
396 rt = ip6_rt_copy(ort);
397
398 if (rt) {
58c4fb86
YH
399 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
400 if (rt->rt6i_dst.plen != 128 &&
401 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
402 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 403 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 404 }
1da177e4 405
58c4fb86 406 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
407 rt->rt6i_dst.plen = 128;
408 rt->rt6i_flags |= RTF_CACHE;
409 rt->u.dst.flags |= DST_HOST;
410
411#ifdef CONFIG_IPV6_SUBTREES
412 if (rt->rt6i_src.plen && saddr) {
413 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
414 rt->rt6i_src.plen = 128;
415 }
416#endif
417
418 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
419
95a9a5ba 420 }
1da177e4 421
95a9a5ba
YH
422 return rt;
423}
1da177e4 424
299d9939
YH
425static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
426{
427 struct rt6_info *rt = ip6_rt_copy(ort);
428 if (rt) {
429 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
430 rt->rt6i_dst.plen = 128;
431 rt->rt6i_flags |= RTF_CACHE;
432 if (rt->rt6i_flags & RTF_REJECT)
433 rt->u.dst.error = ort->u.dst.error;
434 rt->u.dst.flags |= DST_HOST;
435 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
436 }
437 return rt;
438}
439
1da177e4 440#define BACKTRACK() \
bb133964 441if (rt == &ip6_null_entry) { \
1da177e4
LT
442 while ((fn = fn->parent) != NULL) { \
443 if (fn->fn_flags & RTN_ROOT) { \
1da177e4
LT
444 goto out; \
445 } \
446 if (fn->fn_flags & RTN_RTINFO) \
447 goto restart; \
448 } \
449}
450
451
452void ip6_route_input(struct sk_buff *skb)
453{
454 struct fib6_node *fn;
519fbd87 455 struct rt6_info *rt, *nrt;
1da177e4
LT
456 int strict;
457 int attempts = 3;
519fbd87 458 int err;
8238dd06 459 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 460
118f8c16 461 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
462
463relookup:
464 read_lock_bh(&rt6_lock);
465
8238dd06 466restart_2:
1da177e4
LT
467 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
468 &skb->nh.ipv6h->saddr);
469
470restart:
8238dd06 471 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
1da177e4 472 BACKTRACK();
8238dd06
YH
473 if (rt == &ip6_null_entry ||
474 rt->rt6i_flags & RTF_CACHE)
1ddef044 475 goto out;
1da177e4 476
fb9de91e
YH
477 dst_hold(&rt->u.dst);
478 read_unlock_bh(&rt6_lock);
479
519fbd87
YH
480 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
481 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
482 else {
483#if CLONE_OFFLINK_ROUTE
484 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
485#else
486 goto out2;
487#endif
488 }
e40cf353 489
519fbd87
YH
490 dst_release(&rt->u.dst);
491 rt = nrt ? : &ip6_null_entry;
1da177e4 492
519fbd87
YH
493 dst_hold(&rt->u.dst);
494 if (nrt) {
495 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
496 if (!err)
1da177e4 497 goto out2;
1da177e4 498 }
1da177e4 499
519fbd87
YH
500 if (--attempts <= 0)
501 goto out2;
502
503 /*
504 * Race condition! In the gap, when rt6_lock was
505 * released someone could insert this route. Relookup.
506 */
507 dst_release(&rt->u.dst);
508 goto relookup;
509
510out:
8238dd06
YH
511 if (reachable) {
512 reachable = 0;
513 goto restart_2;
514 }
519fbd87
YH
515 dst_hold(&rt->u.dst);
516 read_unlock_bh(&rt6_lock);
1da177e4
LT
517out2:
518 rt->u.dst.lastuse = jiffies;
519 rt->u.dst.__use++;
520 skb->dst = (struct dst_entry *) rt;
fb9de91e 521 return;
1da177e4
LT
522}
523
524struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
525{
526 struct fib6_node *fn;
519fbd87 527 struct rt6_info *rt, *nrt;
1da177e4
LT
528 int strict;
529 int attempts = 3;
519fbd87 530 int err;
8238dd06 531 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 532
554cfb7e 533 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
534
535relookup:
536 read_lock_bh(&rt6_lock);
537
8238dd06 538restart_2:
1da177e4
LT
539 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
540
541restart:
8238dd06 542 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
1ddef044 543 BACKTRACK();
8238dd06
YH
544 if (rt == &ip6_null_entry ||
545 rt->rt6i_flags & RTF_CACHE)
1da177e4 546 goto out;
1da177e4 547
fb9de91e
YH
548 dst_hold(&rt->u.dst);
549 read_unlock_bh(&rt6_lock);
550
519fbd87 551 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 552 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
553 else {
554#if CLONE_OFFLINK_ROUTE
555 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
556#else
557 goto out2;
558#endif
559 }
1da177e4 560
519fbd87
YH
561 dst_release(&rt->u.dst);
562 rt = nrt ? : &ip6_null_entry;
1da177e4 563
519fbd87
YH
564 dst_hold(&rt->u.dst);
565 if (nrt) {
566 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
567 if (!err)
1da177e4 568 goto out2;
1da177e4 569 }
e40cf353 570
519fbd87
YH
571 if (--attempts <= 0)
572 goto out2;
573
574 /*
575 * Race condition! In the gap, when rt6_lock was
576 * released someone could insert this route. Relookup.
577 */
578 dst_release(&rt->u.dst);
579 goto relookup;
580
581out:
8238dd06
YH
582 if (reachable) {
583 reachable = 0;
584 goto restart_2;
585 }
519fbd87
YH
586 dst_hold(&rt->u.dst);
587 read_unlock_bh(&rt6_lock);
1da177e4
LT
588out2:
589 rt->u.dst.lastuse = jiffies;
590 rt->u.dst.__use++;
591 return &rt->u.dst;
592}
593
594
595/*
596 * Destination cache support functions
597 */
598
599static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
600{
601 struct rt6_info *rt;
602
603 rt = (struct rt6_info *) dst;
604
605 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
606 return dst;
607
608 return NULL;
609}
610
611static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
612{
613 struct rt6_info *rt = (struct rt6_info *) dst;
614
615 if (rt) {
616 if (rt->rt6i_flags & RTF_CACHE)
0d51aa80 617 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
618 else
619 dst_release(dst);
620 }
621 return NULL;
622}
623
624static void ip6_link_failure(struct sk_buff *skb)
625{
626 struct rt6_info *rt;
627
628 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
629
630 rt = (struct rt6_info *) skb->dst;
631 if (rt) {
632 if (rt->rt6i_flags&RTF_CACHE) {
633 dst_set_expires(&rt->u.dst, 0);
634 rt->rt6i_flags |= RTF_EXPIRES;
635 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
636 rt->rt6i_node->fn_sernum = -1;
637 }
638}
639
640static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
641{
642 struct rt6_info *rt6 = (struct rt6_info*)dst;
643
644 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
645 rt6->rt6i_flags |= RTF_MODIFIED;
646 if (mtu < IPV6_MIN_MTU) {
647 mtu = IPV6_MIN_MTU;
648 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
649 }
650 dst->metrics[RTAX_MTU-1] = mtu;
651 }
652}
653
654/* Protected by rt6_lock. */
655static struct dst_entry *ndisc_dst_gc_list;
656static int ipv6_get_mtu(struct net_device *dev);
657
658static inline unsigned int ipv6_advmss(unsigned int mtu)
659{
660 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
661
662 if (mtu < ip6_rt_min_advmss)
663 mtu = ip6_rt_min_advmss;
664
665 /*
666 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
667 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
668 * IPV6_MAXPLEN is also valid and means: "any MSS,
669 * rely only on pmtu discovery"
670 */
671 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
672 mtu = IPV6_MAXPLEN;
673 return mtu;
674}
675
676struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
677 struct neighbour *neigh,
678 struct in6_addr *addr,
679 int (*output)(struct sk_buff *))
680{
681 struct rt6_info *rt;
682 struct inet6_dev *idev = in6_dev_get(dev);
683
684 if (unlikely(idev == NULL))
685 return NULL;
686
687 rt = ip6_dst_alloc();
688 if (unlikely(rt == NULL)) {
689 in6_dev_put(idev);
690 goto out;
691 }
692
693 dev_hold(dev);
694 if (neigh)
695 neigh_hold(neigh);
696 else
697 neigh = ndisc_get_neigh(dev, addr);
698
699 rt->rt6i_dev = dev;
700 rt->rt6i_idev = idev;
701 rt->rt6i_nexthop = neigh;
702 atomic_set(&rt->u.dst.__refcnt, 1);
703 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
704 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
705 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
706 rt->u.dst.output = output;
707
708#if 0 /* there's no chance to use these for ndisc */
709 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
710 ? DST_HOST
711 : 0;
712 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
713 rt->rt6i_dst.plen = 128;
714#endif
715
716 write_lock_bh(&rt6_lock);
717 rt->u.dst.next = ndisc_dst_gc_list;
718 ndisc_dst_gc_list = &rt->u.dst;
719 write_unlock_bh(&rt6_lock);
720
721 fib6_force_start_gc();
722
723out:
724 return (struct dst_entry *)rt;
725}
726
727int ndisc_dst_gc(int *more)
728{
729 struct dst_entry *dst, *next, **pprev;
730 int freed;
731
732 next = NULL;
733 pprev = &ndisc_dst_gc_list;
734 freed = 0;
735 while ((dst = *pprev) != NULL) {
736 if (!atomic_read(&dst->__refcnt)) {
737 *pprev = dst->next;
738 dst_free(dst);
739 freed++;
740 } else {
741 pprev = &dst->next;
742 (*more)++;
743 }
744 }
745
746 return freed;
747}
748
749static int ip6_dst_gc(void)
750{
751 static unsigned expire = 30*HZ;
752 static unsigned long last_gc;
753 unsigned long now = jiffies;
754
755 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
756 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
757 goto out;
758
759 expire++;
760 fib6_run_gc(expire);
761 last_gc = now;
762 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
763 expire = ip6_rt_gc_timeout>>1;
764
765out:
766 expire -= expire>>ip6_rt_gc_elasticity;
767 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
768}
769
770/* Clean host part of a prefix. Not necessary in radix tree,
771 but results in cleaner routing tables.
772
773 Remove it only when all the things will work!
774 */
775
776static int ipv6_get_mtu(struct net_device *dev)
777{
778 int mtu = IPV6_MIN_MTU;
779 struct inet6_dev *idev;
780
781 idev = in6_dev_get(dev);
782 if (idev) {
783 mtu = idev->cnf.mtu6;
784 in6_dev_put(idev);
785 }
786 return mtu;
787}
788
789int ipv6_get_hoplimit(struct net_device *dev)
790{
791 int hoplimit = ipv6_devconf.hop_limit;
792 struct inet6_dev *idev;
793
794 idev = in6_dev_get(dev);
795 if (idev) {
796 hoplimit = idev->cnf.hop_limit;
797 in6_dev_put(idev);
798 }
799 return hoplimit;
800}
801
802/*
803 *
804 */
805
0d51aa80
JHS
806int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
807 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
808{
809 int err;
810 struct rtmsg *r;
811 struct rtattr **rta;
812 struct rt6_info *rt = NULL;
813 struct net_device *dev = NULL;
814 struct inet6_dev *idev = NULL;
815 int addr_type;
816
817 rta = (struct rtattr **) _rtattr;
818
819 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
820 return -EINVAL;
821#ifndef CONFIG_IPV6_SUBTREES
822 if (rtmsg->rtmsg_src_len)
823 return -EINVAL;
824#endif
825 if (rtmsg->rtmsg_ifindex) {
826 err = -ENODEV;
827 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
828 if (!dev)
829 goto out;
830 idev = in6_dev_get(dev);
831 if (!idev)
832 goto out;
833 }
834
835 if (rtmsg->rtmsg_metric == 0)
836 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
837
838 rt = ip6_dst_alloc();
839
840 if (rt == NULL) {
841 err = -ENOMEM;
842 goto out;
843 }
844
845 rt->u.dst.obsolete = -1;
3dd4bc68 846 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1da177e4
LT
847 if (nlh && (r = NLMSG_DATA(nlh))) {
848 rt->rt6i_protocol = r->rtm_protocol;
849 } else {
850 rt->rt6i_protocol = RTPROT_BOOT;
851 }
852
853 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
854
855 if (addr_type & IPV6_ADDR_MULTICAST)
856 rt->u.dst.input = ip6_mc_input;
857 else
858 rt->u.dst.input = ip6_forward;
859
860 rt->u.dst.output = ip6_output;
861
862 ipv6_addr_prefix(&rt->rt6i_dst.addr,
863 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
864 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
865 if (rt->rt6i_dst.plen == 128)
866 rt->u.dst.flags = DST_HOST;
867
868#ifdef CONFIG_IPV6_SUBTREES
869 ipv6_addr_prefix(&rt->rt6i_src.addr,
870 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
871 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
872#endif
873
874 rt->rt6i_metric = rtmsg->rtmsg_metric;
875
876 /* We cannot add true routes via loopback here,
877 they would result in kernel looping; promote them to reject routes
878 */
879 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
880 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
881 /* hold loopback dev/idev if we haven't done so. */
882 if (dev != &loopback_dev) {
883 if (dev) {
884 dev_put(dev);
885 in6_dev_put(idev);
886 }
887 dev = &loopback_dev;
888 dev_hold(dev);
889 idev = in6_dev_get(dev);
890 if (!idev) {
891 err = -ENODEV;
892 goto out;
893 }
894 }
895 rt->u.dst.output = ip6_pkt_discard_out;
896 rt->u.dst.input = ip6_pkt_discard;
897 rt->u.dst.error = -ENETUNREACH;
898 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
899 goto install_route;
900 }
901
902 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
903 struct in6_addr *gw_addr;
904 int gwa_type;
905
906 gw_addr = &rtmsg->rtmsg_gateway;
907 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
908 gwa_type = ipv6_addr_type(gw_addr);
909
910 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
911 struct rt6_info *grt;
912
913 /* IPv6 strictly inhibits using not link-local
914 addresses as nexthop address.
915 Otherwise, router will not able to send redirects.
916 It is very good, but in some (rare!) circumstances
917 (SIT, PtP, NBMA NOARP links) it is handy to allow
918 some exceptions. --ANK
919 */
920 err = -EINVAL;
921 if (!(gwa_type&IPV6_ADDR_UNICAST))
922 goto out;
923
924 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
925
926 err = -EHOSTUNREACH;
927 if (grt == NULL)
928 goto out;
929 if (dev) {
930 if (dev != grt->rt6i_dev) {
931 dst_release(&grt->u.dst);
932 goto out;
933 }
934 } else {
935 dev = grt->rt6i_dev;
936 idev = grt->rt6i_idev;
937 dev_hold(dev);
938 in6_dev_hold(grt->rt6i_idev);
939 }
940 if (!(grt->rt6i_flags&RTF_GATEWAY))
941 err = 0;
942 dst_release(&grt->u.dst);
943
944 if (err)
945 goto out;
946 }
947 err = -EINVAL;
948 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
949 goto out;
950 }
951
952 err = -ENODEV;
953 if (dev == NULL)
954 goto out;
955
956 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
957 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
958 if (IS_ERR(rt->rt6i_nexthop)) {
959 err = PTR_ERR(rt->rt6i_nexthop);
960 rt->rt6i_nexthop = NULL;
961 goto out;
962 }
963 }
964
965 rt->rt6i_flags = rtmsg->rtmsg_flags;
966
967install_route:
968 if (rta && rta[RTA_METRICS-1]) {
969 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
970 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
971
972 while (RTA_OK(attr, attrlen)) {
973 unsigned flavor = attr->rta_type;
974 if (flavor) {
975 if (flavor > RTAX_MAX) {
976 err = -EINVAL;
977 goto out;
978 }
979 rt->u.dst.metrics[flavor-1] =
980 *(u32 *)RTA_DATA(attr);
981 }
982 attr = RTA_NEXT(attr, attrlen);
983 }
984 }
985
986 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
987 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
988 if (!rt->u.dst.metrics[RTAX_MTU-1])
989 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
990 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
991 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
992 rt->u.dst.dev = dev;
993 rt->rt6i_idev = idev;
0d51aa80 994 return ip6_ins_rt(rt, nlh, _rtattr, req);
1da177e4
LT
995
996out:
997 if (dev)
998 dev_put(dev);
999 if (idev)
1000 in6_dev_put(idev);
1001 if (rt)
1002 dst_free((struct dst_entry *) rt);
1003 return err;
1004}
1005
0d51aa80 1006int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1007{
1008 int err;
1009
1010 write_lock_bh(&rt6_lock);
1011
0d51aa80 1012 err = fib6_del(rt, nlh, _rtattr, req);
1da177e4
LT
1013 dst_release(&rt->u.dst);
1014
1015 write_unlock_bh(&rt6_lock);
1016
1017 return err;
1018}
1019
0d51aa80 1020static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1021{
1022 struct fib6_node *fn;
1023 struct rt6_info *rt;
1024 int err = -ESRCH;
1025
1026 read_lock_bh(&rt6_lock);
1027
1028 fn = fib6_locate(&ip6_routing_table,
1029 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1030 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1031
1032 if (fn) {
1033 for (rt = fn->leaf; rt; rt = rt->u.next) {
1034 if (rtmsg->rtmsg_ifindex &&
1035 (rt->rt6i_dev == NULL ||
1036 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1037 continue;
1038 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1039 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1040 continue;
1041 if (rtmsg->rtmsg_metric &&
1042 rtmsg->rtmsg_metric != rt->rt6i_metric)
1043 continue;
1044 dst_hold(&rt->u.dst);
1045 read_unlock_bh(&rt6_lock);
1046
0d51aa80 1047 return ip6_del_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1048 }
1049 }
1050 read_unlock_bh(&rt6_lock);
1051
1052 return err;
1053}
1054
1055/*
1056 * Handle redirects
1057 */
1058void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1059 struct neighbour *neigh, u8 *lladdr, int on_link)
1060{
1061 struct rt6_info *rt, *nrt;
1062
1063 /* Locate old route to this destination. */
1064 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1065
1066 if (rt == NULL)
1067 return;
1068
1069 if (neigh->dev != rt->rt6i_dev)
1070 goto out;
1071
1072 /*
1073 * Current route is on-link; redirect is always invalid.
1074 *
1075 * Seems, previous statement is not true. It could
1076 * be node, which looks for us as on-link (f.e. proxy ndisc)
1077 * But then router serving it might decide, that we should
1078 * know truth 8)8) --ANK (980726).
1079 */
1080 if (!(rt->rt6i_flags&RTF_GATEWAY))
1081 goto out;
1082
1083 /*
1084 * RFC 2461 specifies that redirects should only be
1085 * accepted if they come from the nexthop to the target.
1086 * Due to the way default routers are chosen, this notion
1087 * is a bit fuzzy and one might need to check all default
1088 * routers.
1089 */
1090 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1091 if (rt->rt6i_flags & RTF_DEFAULT) {
1092 struct rt6_info *rt1;
1093
1094 read_lock(&rt6_lock);
1095 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1096 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1097 dst_hold(&rt1->u.dst);
1098 dst_release(&rt->u.dst);
1099 read_unlock(&rt6_lock);
1100 rt = rt1;
1101 goto source_ok;
1102 }
1103 }
1104 read_unlock(&rt6_lock);
1105 }
1106 if (net_ratelimit())
1107 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1108 "for redirect target\n");
1109 goto out;
1110 }
1111
1112source_ok:
1113
1114 /*
1115 * We have finally decided to accept it.
1116 */
1117
1118 neigh_update(neigh, lladdr, NUD_STALE,
1119 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1120 NEIGH_UPDATE_F_OVERRIDE|
1121 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1122 NEIGH_UPDATE_F_ISROUTER))
1123 );
1124
1125 /*
1126 * Redirect received -> path was valid.
1127 * Look, redirects are sent only in response to data packets,
1128 * so that this nexthop apparently is reachable. --ANK
1129 */
1130 dst_confirm(&rt->u.dst);
1131
1132 /* Duplicate redirect: silently ignore. */
1133 if (neigh == rt->u.dst.neighbour)
1134 goto out;
1135
1136 nrt = ip6_rt_copy(rt);
1137 if (nrt == NULL)
1138 goto out;
1139
1140 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1141 if (on_link)
1142 nrt->rt6i_flags &= ~RTF_GATEWAY;
1143
1144 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1145 nrt->rt6i_dst.plen = 128;
1146 nrt->u.dst.flags |= DST_HOST;
1147
1148 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1149 nrt->rt6i_nexthop = neigh_clone(neigh);
1150 /* Reset pmtu, it may be better */
1151 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1152 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1153
0d51aa80 1154 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1da177e4
LT
1155 goto out;
1156
1157 if (rt->rt6i_flags&RTF_CACHE) {
0d51aa80 1158 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1159 return;
1160 }
1161
1162out:
1163 dst_release(&rt->u.dst);
1164 return;
1165}
1166
1167/*
1168 * Handle ICMP "packet too big" messages
1169 * i.e. Path MTU discovery
1170 */
1171
1172void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1173 struct net_device *dev, u32 pmtu)
1174{
1175 struct rt6_info *rt, *nrt;
1176 int allfrag = 0;
1177
1178 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1179 if (rt == NULL)
1180 return;
1181
1182 if (pmtu >= dst_mtu(&rt->u.dst))
1183 goto out;
1184
1185 if (pmtu < IPV6_MIN_MTU) {
1186 /*
1187 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1188 * MTU (1280) and a fragment header should always be included
1189 * after a node receiving Too Big message reporting PMTU is
1190 * less than the IPv6 Minimum Link MTU.
1191 */
1192 pmtu = IPV6_MIN_MTU;
1193 allfrag = 1;
1194 }
1195
1196 /* New mtu received -> path was valid.
1197 They are sent only in response to data packets,
1198 so that this nexthop apparently is reachable. --ANK
1199 */
1200 dst_confirm(&rt->u.dst);
1201
1202 /* Host route. If it is static, it would be better
1203 not to override it, but add new one, so that
1204 when cache entry will expire old pmtu
1205 would return automatically.
1206 */
1207 if (rt->rt6i_flags & RTF_CACHE) {
1208 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1209 if (allfrag)
1210 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1211 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1212 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1213 goto out;
1214 }
1215
1216 /* Network route.
1217 Two cases are possible:
1218 1. It is connected route. Action: COW
1219 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1220 */
d5315b50 1221 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1222 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1223 else
1224 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1225
d5315b50 1226 if (nrt) {
a1e78363
YH
1227 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1228 if (allfrag)
1229 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1230
1231 /* According to RFC 1981, detecting PMTU increase shouldn't be
1232 * happened within 5 mins, the recommended timer is 10 mins.
1233 * Here this route expiration time is set to ip6_rt_mtu_expires
1234 * which is 10 mins. After 10 mins the decreased pmtu is expired
1235 * and detecting PMTU increase will be automatically happened.
1236 */
1237 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1238 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1239
1240 ip6_ins_rt(nrt, NULL, NULL, NULL);
1da177e4 1241 }
1da177e4
LT
1242out:
1243 dst_release(&rt->u.dst);
1244}
1245
1246/*
1247 * Misc support functions
1248 */
1249
1250static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1251{
1252 struct rt6_info *rt = ip6_dst_alloc();
1253
1254 if (rt) {
1255 rt->u.dst.input = ort->u.dst.input;
1256 rt->u.dst.output = ort->u.dst.output;
1257
1258 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1259 rt->u.dst.dev = ort->u.dst.dev;
1260 if (rt->u.dst.dev)
1261 dev_hold(rt->u.dst.dev);
1262 rt->rt6i_idev = ort->rt6i_idev;
1263 if (rt->rt6i_idev)
1264 in6_dev_hold(rt->rt6i_idev);
1265 rt->u.dst.lastuse = jiffies;
1266 rt->rt6i_expires = 0;
1267
1268 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1269 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1270 rt->rt6i_metric = 0;
1271
1272 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1273#ifdef CONFIG_IPV6_SUBTREES
1274 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1275#endif
1276 }
1277 return rt;
1278}
1279
1280struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1281{
1282 struct rt6_info *rt;
1283 struct fib6_node *fn;
1284
1285 fn = &ip6_routing_table;
1286
1287 write_lock_bh(&rt6_lock);
1288 for (rt = fn->leaf; rt; rt=rt->u.next) {
1289 if (dev == rt->rt6i_dev &&
045927ff 1290 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1291 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1292 break;
1293 }
1294 if (rt)
1295 dst_hold(&rt->u.dst);
1296 write_unlock_bh(&rt6_lock);
1297 return rt;
1298}
1299
1300struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1301 struct net_device *dev,
1302 unsigned int pref)
1da177e4
LT
1303{
1304 struct in6_rtmsg rtmsg;
1305
1306 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1307 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1308 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1309 rtmsg.rtmsg_metric = 1024;
ebacaaa0
YH
1310 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1311 RTF_PREF(pref);
1da177e4
LT
1312
1313 rtmsg.rtmsg_ifindex = dev->ifindex;
1314
0d51aa80 1315 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1316 return rt6_get_dflt_router(gwaddr, dev);
1317}
1318
1319void rt6_purge_dflt_routers(void)
1320{
1321 struct rt6_info *rt;
1322
1323restart:
1324 read_lock_bh(&rt6_lock);
1325 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1326 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1327 dst_hold(&rt->u.dst);
1328
1da177e4
LT
1329 read_unlock_bh(&rt6_lock);
1330
0d51aa80 1331 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1332
1333 goto restart;
1334 }
1335 }
1336 read_unlock_bh(&rt6_lock);
1337}
1338
1339int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1340{
1341 struct in6_rtmsg rtmsg;
1342 int err;
1343
1344 switch(cmd) {
1345 case SIOCADDRT: /* Add a route */
1346 case SIOCDELRT: /* Delete a route */
1347 if (!capable(CAP_NET_ADMIN))
1348 return -EPERM;
1349 err = copy_from_user(&rtmsg, arg,
1350 sizeof(struct in6_rtmsg));
1351 if (err)
1352 return -EFAULT;
1353
1354 rtnl_lock();
1355 switch (cmd) {
1356 case SIOCADDRT:
0d51aa80 1357 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1358 break;
1359 case SIOCDELRT:
0d51aa80 1360 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1361 break;
1362 default:
1363 err = -EINVAL;
1364 }
1365 rtnl_unlock();
1366
1367 return err;
1368 };
1369
1370 return -EINVAL;
1371}
1372
1373/*
1374 * Drop the packet on the floor
1375 */
1376
20380731 1377static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4
LT
1378{
1379 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1380 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1381 kfree_skb(skb);
1382 return 0;
1383}
1384
20380731 1385static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1386{
1387 skb->dev = skb->dst->dev;
1388 return ip6_pkt_discard(skb);
1389}
1390
1391/*
1392 * Allocate a dst for local (unicast / anycast) address.
1393 */
1394
1395struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1396 const struct in6_addr *addr,
1397 int anycast)
1398{
1399 struct rt6_info *rt = ip6_dst_alloc();
1400
1401 if (rt == NULL)
1402 return ERR_PTR(-ENOMEM);
1403
1404 dev_hold(&loopback_dev);
1405 in6_dev_hold(idev);
1406
1407 rt->u.dst.flags = DST_HOST;
1408 rt->u.dst.input = ip6_input;
1409 rt->u.dst.output = ip6_output;
1410 rt->rt6i_dev = &loopback_dev;
1411 rt->rt6i_idev = idev;
1412 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1413 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1414 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1415 rt->u.dst.obsolete = -1;
1416
1417 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1418 if (anycast)
1419 rt->rt6i_flags |= RTF_ANYCAST;
1420 else
1da177e4
LT
1421 rt->rt6i_flags |= RTF_LOCAL;
1422 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1423 if (rt->rt6i_nexthop == NULL) {
1424 dst_free((struct dst_entry *) rt);
1425 return ERR_PTR(-ENOMEM);
1426 }
1427
1428 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1429 rt->rt6i_dst.plen = 128;
1430
1431 atomic_set(&rt->u.dst.__refcnt, 1);
1432
1433 return rt;
1434}
1435
1436static int fib6_ifdown(struct rt6_info *rt, void *arg)
1437{
1438 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1439 rt != &ip6_null_entry) {
1440 RT6_TRACE("deleted by ifdown %p\n", rt);
1441 return -1;
1442 }
1443 return 0;
1444}
1445
1446void rt6_ifdown(struct net_device *dev)
1447{
1448 write_lock_bh(&rt6_lock);
1449 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1450 write_unlock_bh(&rt6_lock);
1451}
1452
1453struct rt6_mtu_change_arg
1454{
1455 struct net_device *dev;
1456 unsigned mtu;
1457};
1458
1459static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1460{
1461 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1462 struct inet6_dev *idev;
1463
1464 /* In IPv6 pmtu discovery is not optional,
1465 so that RTAX_MTU lock cannot disable it.
1466 We still use this lock to block changes
1467 caused by addrconf/ndisc.
1468 */
1469
1470 idev = __in6_dev_get(arg->dev);
1471 if (idev == NULL)
1472 return 0;
1473
1474 /* For administrative MTU increase, there is no way to discover
1475 IPv6 PMTU increase, so PMTU increase should be updated here.
1476 Since RFC 1981 doesn't include administrative MTU increase
1477 update PMTU increase is a MUST. (i.e. jumbo frame)
1478 */
1479 /*
1480 If new MTU is less than route PMTU, this new MTU will be the
1481 lowest MTU in the path, update the route PMTU to reflect PMTU
1482 decreases; if new MTU is greater than route PMTU, and the
1483 old MTU is the lowest MTU in the path, update the route PMTU
1484 to reflect the increase. In this case if the other nodes' MTU
1485 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1486 PMTU discouvery.
1487 */
1488 if (rt->rt6i_dev == arg->dev &&
1489 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1490 (dst_mtu(&rt->u.dst) > arg->mtu ||
1491 (dst_mtu(&rt->u.dst) < arg->mtu &&
1492 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1493 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1494 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1495 return 0;
1496}
1497
1498void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1499{
1500 struct rt6_mtu_change_arg arg;
1501
1502 arg.dev = dev;
1503 arg.mtu = mtu;
1504 read_lock_bh(&rt6_lock);
1505 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1506 read_unlock_bh(&rt6_lock);
1507}
1508
1509static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1510 struct in6_rtmsg *rtmsg)
1511{
1512 memset(rtmsg, 0, sizeof(*rtmsg));
1513
1514 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1515 rtmsg->rtmsg_src_len = r->rtm_src_len;
1516 rtmsg->rtmsg_flags = RTF_UP;
1517 if (r->rtm_type == RTN_UNREACHABLE)
1518 rtmsg->rtmsg_flags |= RTF_REJECT;
1519
1520 if (rta[RTA_GATEWAY-1]) {
1521 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1522 return -EINVAL;
1523 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1524 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1525 }
1526 if (rta[RTA_DST-1]) {
1527 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1528 return -EINVAL;
1529 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1530 }
1531 if (rta[RTA_SRC-1]) {
1532 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1533 return -EINVAL;
1534 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1535 }
1536 if (rta[RTA_OIF-1]) {
1537 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1538 return -EINVAL;
1539 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1540 }
1541 if (rta[RTA_PRIORITY-1]) {
1542 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1543 return -EINVAL;
1544 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1545 }
1546 return 0;
1547}
1548
1549int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1550{
1551 struct rtmsg *r = NLMSG_DATA(nlh);
1552 struct in6_rtmsg rtmsg;
1553
1554 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1555 return -EINVAL;
0d51aa80 1556 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1557}
1558
1559int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1560{
1561 struct rtmsg *r = NLMSG_DATA(nlh);
1562 struct in6_rtmsg rtmsg;
1563
1564 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1565 return -EINVAL;
0d51aa80 1566 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1567}
1568
1569struct rt6_rtnl_dump_arg
1570{
1571 struct sk_buff *skb;
1572 struct netlink_callback *cb;
1573};
1574
1575static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1576 struct in6_addr *dst, struct in6_addr *src,
1577 int iif, int type, u32 pid, u32 seq,
1578 int prefix, unsigned int flags)
1da177e4
LT
1579{
1580 struct rtmsg *rtm;
1581 struct nlmsghdr *nlh;
1582 unsigned char *b = skb->tail;
1583 struct rta_cacheinfo ci;
1584
1585 if (prefix) { /* user wants prefix routes only */
1586 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1587 /* success since this is not a prefix route */
1588 return 1;
1589 }
1590 }
1591
b6544c0b 1592 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1da177e4
LT
1593 rtm = NLMSG_DATA(nlh);
1594 rtm->rtm_family = AF_INET6;
1595 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1596 rtm->rtm_src_len = rt->rt6i_src.plen;
1597 rtm->rtm_tos = 0;
1598 rtm->rtm_table = RT_TABLE_MAIN;
1599 if (rt->rt6i_flags&RTF_REJECT)
1600 rtm->rtm_type = RTN_UNREACHABLE;
1601 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1602 rtm->rtm_type = RTN_LOCAL;
1603 else
1604 rtm->rtm_type = RTN_UNICAST;
1605 rtm->rtm_flags = 0;
1606 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1607 rtm->rtm_protocol = rt->rt6i_protocol;
1608 if (rt->rt6i_flags&RTF_DYNAMIC)
1609 rtm->rtm_protocol = RTPROT_REDIRECT;
1610 else if (rt->rt6i_flags & RTF_ADDRCONF)
1611 rtm->rtm_protocol = RTPROT_KERNEL;
1612 else if (rt->rt6i_flags&RTF_DEFAULT)
1613 rtm->rtm_protocol = RTPROT_RA;
1614
1615 if (rt->rt6i_flags&RTF_CACHE)
1616 rtm->rtm_flags |= RTM_F_CLONED;
1617
1618 if (dst) {
1619 RTA_PUT(skb, RTA_DST, 16, dst);
1620 rtm->rtm_dst_len = 128;
1621 } else if (rtm->rtm_dst_len)
1622 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1623#ifdef CONFIG_IPV6_SUBTREES
1624 if (src) {
1625 RTA_PUT(skb, RTA_SRC, 16, src);
1626 rtm->rtm_src_len = 128;
1627 } else if (rtm->rtm_src_len)
1628 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1629#endif
1630 if (iif)
1631 RTA_PUT(skb, RTA_IIF, 4, &iif);
1632 else if (dst) {
1633 struct in6_addr saddr_buf;
1634 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1635 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1636 }
1637 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1638 goto rtattr_failure;
1639 if (rt->u.dst.neighbour)
1640 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1641 if (rt->u.dst.dev)
1642 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1643 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1644 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1645 if (rt->rt6i_expires)
1646 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1647 else
1648 ci.rta_expires = 0;
1649 ci.rta_used = rt->u.dst.__use;
1650 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1651 ci.rta_error = rt->u.dst.error;
1652 ci.rta_id = 0;
1653 ci.rta_ts = 0;
1654 ci.rta_tsage = 0;
1655 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1656 nlh->nlmsg_len = skb->tail - b;
1657 return skb->len;
1658
1659nlmsg_failure:
1660rtattr_failure:
1661 skb_trim(skb, b - skb->data);
1662 return -1;
1663}
1664
1665static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1666{
1667 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1668 int prefix;
1669
1670 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1671 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1672 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1673 } else
1674 prefix = 0;
1675
1676 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1677 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 1678 prefix, NLM_F_MULTI);
1da177e4
LT
1679}
1680
1681static int fib6_dump_node(struct fib6_walker_t *w)
1682{
1683 int res;
1684 struct rt6_info *rt;
1685
1686 for (rt = w->leaf; rt; rt = rt->u.next) {
1687 res = rt6_dump_route(rt, w->args);
1688 if (res < 0) {
1689 /* Frame is full, suspend walking */
1690 w->leaf = rt;
1691 return 1;
1692 }
1693 BUG_TRAP(res!=0);
1694 }
1695 w->leaf = NULL;
1696 return 0;
1697}
1698
1699static void fib6_dump_end(struct netlink_callback *cb)
1700{
1701 struct fib6_walker_t *w = (void*)cb->args[0];
1702
1703 if (w) {
1704 cb->args[0] = 0;
1705 fib6_walker_unlink(w);
1706 kfree(w);
1707 }
efacfbcb
HX
1708 cb->done = (void*)cb->args[1];
1709 cb->args[1] = 0;
1da177e4
LT
1710}
1711
1712static int fib6_dump_done(struct netlink_callback *cb)
1713{
1714 fib6_dump_end(cb);
a8f74b22 1715 return cb->done ? cb->done(cb) : 0;
1da177e4
LT
1716}
1717
1718int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1719{
1720 struct rt6_rtnl_dump_arg arg;
1721 struct fib6_walker_t *w;
1722 int res;
1723
1724 arg.skb = skb;
1725 arg.cb = cb;
1726
1727 w = (void*)cb->args[0];
1728 if (w == NULL) {
1729 /* New dump:
1730 *
1731 * 1. hook callback destructor.
1732 */
1733 cb->args[1] = (long)cb->done;
1734 cb->done = fib6_dump_done;
1735
1736 /*
1737 * 2. allocate and initialize walker.
1738 */
9e147a1c 1739 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1da177e4
LT
1740 if (w == NULL)
1741 return -ENOMEM;
1742 RT6_TRACE("dump<%p", w);
1743 memset(w, 0, sizeof(*w));
1744 w->root = &ip6_routing_table;
1745 w->func = fib6_dump_node;
1746 w->args = &arg;
1747 cb->args[0] = (long)w;
1748 read_lock_bh(&rt6_lock);
1749 res = fib6_walk(w);
1750 read_unlock_bh(&rt6_lock);
1751 } else {
1752 w->args = &arg;
1753 read_lock_bh(&rt6_lock);
1754 res = fib6_walk_continue(w);
1755 read_unlock_bh(&rt6_lock);
1756 }
1757#if RT6_DEBUG >= 3
1758 if (res <= 0 && skb->len == 0)
1759 RT6_TRACE("%p>dump end\n", w);
1760#endif
1761 res = res < 0 ? res : skb->len;
1762 /* res < 0 is an error. (really, impossible)
1763 res == 0 means that dump is complete, but skb still can contain data.
1764 res > 0 dump is not complete, but frame is full.
1765 */
1766 /* Destroy walker, if dump of this table is complete. */
1767 if (res <= 0)
1768 fib6_dump_end(cb);
1769 return res;
1770}
1771
1772int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1773{
1774 struct rtattr **rta = arg;
1775 int iif = 0;
1776 int err = -ENOBUFS;
1777 struct sk_buff *skb;
1778 struct flowi fl;
1779 struct rt6_info *rt;
1780
1781 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1782 if (skb == NULL)
1783 goto out;
1784
1785 /* Reserve room for dummy headers, this skb can pass
1786 through good chunk of routing engine.
1787 */
1788 skb->mac.raw = skb->data;
1789 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1790
1791 memset(&fl, 0, sizeof(fl));
1792 if (rta[RTA_SRC-1])
1793 ipv6_addr_copy(&fl.fl6_src,
1794 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1795 if (rta[RTA_DST-1])
1796 ipv6_addr_copy(&fl.fl6_dst,
1797 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1798
1799 if (rta[RTA_IIF-1])
1800 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1801
1802 if (iif) {
1803 struct net_device *dev;
1804 dev = __dev_get_by_index(iif);
1805 if (!dev) {
1806 err = -ENODEV;
1807 goto out_free;
1808 }
1809 }
1810
1811 fl.oif = 0;
1812 if (rta[RTA_OIF-1])
1813 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1814
1815 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1816
1817 skb->dst = &rt->u.dst;
1818
1819 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1820 err = rt6_fill_node(skb, rt,
1821 &fl.fl6_dst, &fl.fl6_src,
1822 iif,
1823 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 1824 nlh->nlmsg_seq, 0, 0);
1da177e4
LT
1825 if (err < 0) {
1826 err = -EMSGSIZE;
1827 goto out_free;
1828 }
1829
1830 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1831 if (err > 0)
1832 err = 0;
1833out:
1834 return err;
1835out_free:
1836 kfree_skb(skb);
1837 goto out;
1838}
1839
0d51aa80
JHS
1840void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1841 struct netlink_skb_parms *req)
1da177e4
LT
1842{
1843 struct sk_buff *skb;
1844 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
0d51aa80
JHS
1845 u32 pid = current->pid;
1846 u32 seq = 0;
1da177e4 1847
0d51aa80
JHS
1848 if (req)
1849 pid = req->pid;
1850 if (nlh)
1851 seq = nlh->nlmsg_seq;
1852
1da177e4
LT
1853 skb = alloc_skb(size, gfp_any());
1854 if (!skb) {
ac6d439d 1855 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1da177e4
LT
1856 return;
1857 }
0d51aa80 1858 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1da177e4 1859 kfree_skb(skb);
ac6d439d 1860 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1da177e4
LT
1861 return;
1862 }
ac6d439d
PM
1863 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1864 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1da177e4
LT
1865}
1866
1867/*
1868 * /proc
1869 */
1870
1871#ifdef CONFIG_PROC_FS
1872
1873#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1874
1875struct rt6_proc_arg
1876{
1877 char *buffer;
1878 int offset;
1879 int length;
1880 int skip;
1881 int len;
1882};
1883
1884static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1885{
1886 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1887 int i;
1888
1889 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1890 arg->skip++;
1891 return 0;
1892 }
1893
1894 if (arg->len >= arg->length)
1895 return 0;
1896
1897 for (i=0; i<16; i++) {
1898 sprintf(arg->buffer + arg->len, "%02x",
1899 rt->rt6i_dst.addr.s6_addr[i]);
1900 arg->len += 2;
1901 }
1902 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1903 rt->rt6i_dst.plen);
1904
1905#ifdef CONFIG_IPV6_SUBTREES
1906 for (i=0; i<16; i++) {
1907 sprintf(arg->buffer + arg->len, "%02x",
1908 rt->rt6i_src.addr.s6_addr[i]);
1909 arg->len += 2;
1910 }
1911 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1912 rt->rt6i_src.plen);
1913#else
1914 sprintf(arg->buffer + arg->len,
1915 "00000000000000000000000000000000 00 ");
1916 arg->len += 36;
1917#endif
1918
1919 if (rt->rt6i_nexthop) {
1920 for (i=0; i<16; i++) {
1921 sprintf(arg->buffer + arg->len, "%02x",
1922 rt->rt6i_nexthop->primary_key[i]);
1923 arg->len += 2;
1924 }
1925 } else {
1926 sprintf(arg->buffer + arg->len,
1927 "00000000000000000000000000000000");
1928 arg->len += 32;
1929 }
1930 arg->len += sprintf(arg->buffer + arg->len,
1931 " %08x %08x %08x %08x %8s\n",
1932 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1933 rt->u.dst.__use, rt->rt6i_flags,
1934 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1935 return 0;
1936}
1937
1938static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1939{
1940 struct rt6_proc_arg arg;
1941 arg.buffer = buffer;
1942 arg.offset = offset;
1943 arg.length = length;
1944 arg.skip = 0;
1945 arg.len = 0;
1946
1947 read_lock_bh(&rt6_lock);
1948 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1949 read_unlock_bh(&rt6_lock);
1950
1951 *start = buffer;
1952 if (offset)
1953 *start += offset % RT6_INFO_LEN;
1954
1955 arg.len -= offset % RT6_INFO_LEN;
1956
1957 if (arg.len > length)
1958 arg.len = length;
1959 if (arg.len < 0)
1960 arg.len = 0;
1961
1962 return arg.len;
1963}
1964
1da177e4
LT
1965static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1966{
1967 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1968 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1969 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1970 rt6_stats.fib_rt_cache,
1971 atomic_read(&ip6_dst_ops.entries),
1972 rt6_stats.fib_discarded_routes);
1973
1974 return 0;
1975}
1976
1977static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1978{
1979 return single_open(file, rt6_stats_seq_show, NULL);
1980}
1981
1982static struct file_operations rt6_stats_seq_fops = {
1983 .owner = THIS_MODULE,
1984 .open = rt6_stats_seq_open,
1985 .read = seq_read,
1986 .llseek = seq_lseek,
1987 .release = single_release,
1988};
1989#endif /* CONFIG_PROC_FS */
1990
1991#ifdef CONFIG_SYSCTL
1992
1993static int flush_delay;
1994
1995static
1996int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1997 void __user *buffer, size_t *lenp, loff_t *ppos)
1998{
1999 if (write) {
2000 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2001 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2002 return 0;
2003 } else
2004 return -EINVAL;
2005}
2006
2007ctl_table ipv6_route_table[] = {
2008 {
2009 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2010 .procname = "flush",
2011 .data = &flush_delay,
2012 .maxlen = sizeof(int),
89c8b3a1 2013 .mode = 0200,
1da177e4
LT
2014 .proc_handler = &ipv6_sysctl_rtcache_flush
2015 },
2016 {
2017 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2018 .procname = "gc_thresh",
2019 .data = &ip6_dst_ops.gc_thresh,
2020 .maxlen = sizeof(int),
2021 .mode = 0644,
2022 .proc_handler = &proc_dointvec,
2023 },
2024 {
2025 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2026 .procname = "max_size",
2027 .data = &ip6_rt_max_size,
2028 .maxlen = sizeof(int),
2029 .mode = 0644,
2030 .proc_handler = &proc_dointvec,
2031 },
2032 {
2033 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2034 .procname = "gc_min_interval",
2035 .data = &ip6_rt_gc_min_interval,
2036 .maxlen = sizeof(int),
2037 .mode = 0644,
2038 .proc_handler = &proc_dointvec_jiffies,
2039 .strategy = &sysctl_jiffies,
2040 },
2041 {
2042 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2043 .procname = "gc_timeout",
2044 .data = &ip6_rt_gc_timeout,
2045 .maxlen = sizeof(int),
2046 .mode = 0644,
2047 .proc_handler = &proc_dointvec_jiffies,
2048 .strategy = &sysctl_jiffies,
2049 },
2050 {
2051 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2052 .procname = "gc_interval",
2053 .data = &ip6_rt_gc_interval,
2054 .maxlen = sizeof(int),
2055 .mode = 0644,
2056 .proc_handler = &proc_dointvec_jiffies,
2057 .strategy = &sysctl_jiffies,
2058 },
2059 {
2060 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2061 .procname = "gc_elasticity",
2062 .data = &ip6_rt_gc_elasticity,
2063 .maxlen = sizeof(int),
2064 .mode = 0644,
2065 .proc_handler = &proc_dointvec_jiffies,
2066 .strategy = &sysctl_jiffies,
2067 },
2068 {
2069 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2070 .procname = "mtu_expires",
2071 .data = &ip6_rt_mtu_expires,
2072 .maxlen = sizeof(int),
2073 .mode = 0644,
2074 .proc_handler = &proc_dointvec_jiffies,
2075 .strategy = &sysctl_jiffies,
2076 },
2077 {
2078 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2079 .procname = "min_adv_mss",
2080 .data = &ip6_rt_min_advmss,
2081 .maxlen = sizeof(int),
2082 .mode = 0644,
2083 .proc_handler = &proc_dointvec_jiffies,
2084 .strategy = &sysctl_jiffies,
2085 },
2086 {
2087 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2088 .procname = "gc_min_interval_ms",
2089 .data = &ip6_rt_gc_min_interval,
2090 .maxlen = sizeof(int),
2091 .mode = 0644,
2092 .proc_handler = &proc_dointvec_ms_jiffies,
2093 .strategy = &sysctl_ms_jiffies,
2094 },
2095 { .ctl_name = 0 }
2096};
2097
2098#endif
2099
2100void __init ip6_route_init(void)
2101{
2102 struct proc_dir_entry *p;
2103
2104 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2105 sizeof(struct rt6_info),
2106 0, SLAB_HWCACHE_ALIGN,
2107 NULL, NULL);
2108 if (!ip6_dst_ops.kmem_cachep)
2109 panic("cannot create ip6_dst_cache");
2110
2111 fib6_init();
2112#ifdef CONFIG_PROC_FS
2113 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2114 if (p)
2115 p->owner = THIS_MODULE;
2116
2117 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2118#endif
2119#ifdef CONFIG_XFRM
2120 xfrm6_init();
2121#endif
2122}
2123
2124void ip6_route_cleanup(void)
2125{
2126#ifdef CONFIG_PROC_FS
2127 proc_net_remove("ipv6_route");
2128 proc_net_remove("rt6_stats");
2129#endif
2130#ifdef CONFIG_XFRM
2131 xfrm6_fini();
2132#endif
2133 rt6_ifdown(NULL);
2134 fib6_gc_cleanup();
2135 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2136}