]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[DECNet] fib: Fix out of bound access of dn_fib_props[]
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4
LT
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
8d71740c 57#include <net/netevent.h>
21713ebc 58#include <net/netlink.h>
1da177e4
LT
59
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
519fbd87 77#define CLONE_OFFLINK_ROUTE 0
1da177e4
LT
78
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
70ceb4f5
YH
100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
1da177e4
LT
108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
122struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
135 }
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
101367c2
TG
142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
6723ab54
DM
144static int ip6_pkt_prohibit(struct sk_buff *skb);
145static int ip6_pkt_prohibit_out(struct sk_buff *skb);
146static int ip6_pkt_blk_hole(struct sk_buff *skb);
147
101367c2
TG
148struct rt6_info ip6_prohibit_entry = {
149 .u = {
150 .dst = {
151 .__refcnt = ATOMIC_INIT(1),
152 .__use = 1,
153 .dev = &loopback_dev,
154 .obsolete = -1,
155 .error = -EACCES,
156 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
157 .input = ip6_pkt_prohibit,
158 .output = ip6_pkt_prohibit_out,
101367c2
TG
159 .ops = &ip6_dst_ops,
160 .path = (struct dst_entry*)&ip6_prohibit_entry,
161 }
162 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
168struct rt6_info ip6_blk_hole_entry = {
169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
173 .dev = &loopback_dev,
174 .obsolete = -1,
175 .error = -EINVAL,
176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
177 .input = ip6_pkt_blk_hole,
178 .output = ip6_pkt_blk_hole,
101367c2
TG
179 .ops = &ip6_dst_ops,
180 .path = (struct dst_entry*)&ip6_blk_hole_entry,
181 }
182 },
183 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
184 .rt6i_metric = ~(u32) 0,
185 .rt6i_ref = ATOMIC_INIT(1),
186};
187
188#endif
189
1da177e4
LT
190/* allocate dst with ip6_dst_ops */
191static __inline__ struct rt6_info *ip6_dst_alloc(void)
192{
193 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
194}
195
196static void ip6_dst_destroy(struct dst_entry *dst)
197{
198 struct rt6_info *rt = (struct rt6_info *)dst;
199 struct inet6_dev *idev = rt->rt6i_idev;
200
201 if (idev != NULL) {
202 rt->rt6i_idev = NULL;
203 in6_dev_put(idev);
1ab1457c 204 }
1da177e4
LT
205}
206
207static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208 int how)
209{
210 struct rt6_info *rt = (struct rt6_info *)dst;
211 struct inet6_dev *idev = rt->rt6i_idev;
212
213 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
215 if (loopback_idev != NULL) {
216 rt->rt6i_idev = loopback_idev;
217 in6_dev_put(idev);
218 }
219 }
220}
221
222static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223{
224 return (rt->rt6i_flags & RTF_EXPIRES &&
225 time_after(jiffies, rt->rt6i_expires));
226}
227
c71099ac
TG
228static inline int rt6_need_strict(struct in6_addr *daddr)
229{
230 return (ipv6_addr_type(daddr) &
231 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232}
233
1da177e4 234/*
c71099ac 235 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
236 */
237
238static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
239 int oif,
240 int strict)
241{
242 struct rt6_info *local = NULL;
243 struct rt6_info *sprt;
244
245 if (oif) {
7cc48263 246 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
247 struct net_device *dev = sprt->rt6i_dev;
248 if (dev->ifindex == oif)
249 return sprt;
250 if (dev->flags & IFF_LOOPBACK) {
251 if (sprt->rt6i_idev == NULL ||
252 sprt->rt6i_idev->dev->ifindex != oif) {
253 if (strict && oif)
254 continue;
1ab1457c 255 if (local && (!oif ||
1da177e4
LT
256 local->rt6i_idev->dev->ifindex == oif))
257 continue;
258 }
259 local = sprt;
260 }
261 }
262
263 if (local)
264 return local;
265
266 if (strict)
267 return &ip6_null_entry;
268 }
269 return rt;
270}
271
27097255
YH
272#ifdef CONFIG_IPV6_ROUTER_PREF
273static void rt6_probe(struct rt6_info *rt)
274{
275 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
276 /*
277 * Okay, this does not seem to be appropriate
278 * for now, however, we need to check if it
279 * is really so; aka Router Reachability Probing.
280 *
281 * Router Reachability Probe MUST be rate-limited
282 * to no more than one per minute.
283 */
284 if (!neigh || (neigh->nud_state & NUD_VALID))
285 return;
286 read_lock_bh(&neigh->lock);
287 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 288 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
289 struct in6_addr mcaddr;
290 struct in6_addr *target;
291
292 neigh->updated = jiffies;
293 read_unlock_bh(&neigh->lock);
294
295 target = (struct in6_addr *)&neigh->primary_key;
296 addrconf_addr_solict_mult(target, &mcaddr);
297 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
298 } else
299 read_unlock_bh(&neigh->lock);
300}
301#else
302static inline void rt6_probe(struct rt6_info *rt)
303{
304 return;
305}
306#endif
307
1da177e4 308/*
554cfb7e 309 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 310 */
b6f99a21 311static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
312{
313 struct net_device *dev = rt->rt6i_dev;
a0d78ebf
YH
314 int ret = 0;
315
316 if (!oif)
317 return 2;
318 if (dev->flags & IFF_LOOPBACK) {
319 if (!WARN_ON(rt->rt6i_idev == NULL) &&
320 rt->rt6i_idev->dev->ifindex == oif)
321 ret = 1;
322 else
323 return 0;
324 }
325 if (dev->ifindex == oif)
554cfb7e 326 return 2;
a0d78ebf
YH
327
328 return ret;
554cfb7e 329}
1da177e4 330
b6f99a21 331static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 332{
554cfb7e
YH
333 struct neighbour *neigh = rt->rt6i_nexthop;
334 int m = 0;
4d0c5911
YH
335 if (rt->rt6i_flags & RTF_NONEXTHOP ||
336 !(rt->rt6i_flags & RTF_GATEWAY))
337 m = 1;
338 else if (neigh) {
554cfb7e
YH
339 read_lock_bh(&neigh->lock);
340 if (neigh->nud_state & NUD_VALID)
4d0c5911 341 m = 2;
ea73ee23
YH
342 else if (!(neigh->nud_state & NUD_FAILED))
343 m = 1;
554cfb7e 344 read_unlock_bh(&neigh->lock);
1da177e4 345 }
554cfb7e 346 return m;
1da177e4
LT
347}
348
554cfb7e
YH
349static int rt6_score_route(struct rt6_info *rt, int oif,
350 int strict)
1da177e4 351{
4d0c5911 352 int m, n;
1ab1457c 353
4d0c5911 354 m = rt6_check_dev(rt, oif);
77d16f45 355 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 356 return -1;
ebacaaa0
YH
357#ifdef CONFIG_IPV6_ROUTER_PREF
358 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
359#endif
4d0c5911 360 n = rt6_check_neigh(rt);
557e92ef 361 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
362 return -1;
363 return m;
364}
365
366static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
367 int strict)
368{
369 struct rt6_info *match = NULL, *last = NULL;
370 struct rt6_info *rt, *rt0 = *head;
371 u32 metric;
372 int mpri = -1;
1da177e4 373
554cfb7e
YH
374 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
375 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 376
554cfb7e 377 for (rt = rt0, metric = rt0->rt6i_metric;
c302e6d5 378 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
7cc48263 379 rt = rt->u.dst.rt6_next) {
554cfb7e 380 int m;
1da177e4 381
554cfb7e 382 if (rt6_check_expired(rt))
1da177e4
LT
383 continue;
384
554cfb7e
YH
385 last = rt;
386
387 m = rt6_score_route(rt, oif, strict);
388 if (m < 0)
1da177e4 389 continue;
1da177e4 390
554cfb7e 391 if (m > mpri) {
ea659e07
YH
392 if (strict & RT6_LOOKUP_F_REACHABLE)
393 rt6_probe(match);
554cfb7e 394 match = rt;
1da177e4 395 mpri = m;
ea659e07 396 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
27097255 397 rt6_probe(rt);
1da177e4
LT
398 }
399 }
400
554cfb7e 401 if (!match &&
77d16f45 402 (strict & RT6_LOOKUP_F_REACHABLE) &&
554cfb7e
YH
403 last && last != rt0) {
404 /* no entries matched; do round-robin */
34af946a 405 static DEFINE_SPINLOCK(lock);
c302e6d5 406 spin_lock(&lock);
7cc48263
ED
407 *head = rt0->u.dst.rt6_next;
408 rt0->u.dst.rt6_next = last->u.dst.rt6_next;
409 last->u.dst.rt6_next = rt0;
c302e6d5 410 spin_unlock(&lock);
1da177e4 411 }
1da177e4 412
554cfb7e
YH
413 RT6_TRACE("%s() => %p, score=%d\n",
414 __FUNCTION__, match, mpri);
1da177e4 415
554cfb7e 416 return (match ? match : &ip6_null_entry);
1da177e4
LT
417}
418
70ceb4f5
YH
419#ifdef CONFIG_IPV6_ROUTE_INFO
420int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
421 struct in6_addr *gwaddr)
422{
423 struct route_info *rinfo = (struct route_info *) opt;
424 struct in6_addr prefix_buf, *prefix;
425 unsigned int pref;
426 u32 lifetime;
427 struct rt6_info *rt;
428
429 if (len < sizeof(struct route_info)) {
430 return -EINVAL;
431 }
432
433 /* Sanity check for prefix_len and length */
434 if (rinfo->length > 3) {
435 return -EINVAL;
436 } else if (rinfo->prefix_len > 128) {
437 return -EINVAL;
438 } else if (rinfo->prefix_len > 64) {
439 if (rinfo->length < 2) {
440 return -EINVAL;
441 }
442 } else if (rinfo->prefix_len > 0) {
443 if (rinfo->length < 1) {
444 return -EINVAL;
445 }
446 }
447
448 pref = rinfo->route_pref;
449 if (pref == ICMPV6_ROUTER_PREF_INVALID)
450 pref = ICMPV6_ROUTER_PREF_MEDIUM;
451
e69a4adc 452 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
453 if (lifetime == 0xffffffff) {
454 /* infinity */
455 } else if (lifetime > 0x7fffffff/HZ) {
456 /* Avoid arithmetic overflow */
457 lifetime = 0x7fffffff/HZ - 1;
458 }
459
460 if (rinfo->length == 3)
461 prefix = (struct in6_addr *)rinfo->prefix;
462 else {
463 /* this function is safe */
464 ipv6_addr_prefix(&prefix_buf,
465 (struct in6_addr *)rinfo->prefix,
466 rinfo->prefix_len);
467 prefix = &prefix_buf;
468 }
469
470 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
471
472 if (rt && !lifetime) {
e0a1ad73 473 ip6_del_rt(rt);
70ceb4f5
YH
474 rt = NULL;
475 }
476
477 if (!rt && lifetime)
478 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
479 pref);
480 else if (rt)
481 rt->rt6i_flags = RTF_ROUTEINFO |
482 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
483
484 if (rt) {
485 if (lifetime == 0xffffffff) {
486 rt->rt6i_flags &= ~RTF_EXPIRES;
487 } else {
488 rt->rt6i_expires = jiffies + HZ * lifetime;
489 rt->rt6i_flags |= RTF_EXPIRES;
490 }
491 dst_release(&rt->u.dst);
492 }
493 return 0;
494}
495#endif
496
982f56f3
YH
497#define BACKTRACK(saddr) \
498do { \
499 if (rt == &ip6_null_entry) { \
500 struct fib6_node *pn; \
e0eda7bb 501 while (1) { \
982f56f3
YH
502 if (fn->fn_flags & RTN_TL_ROOT) \
503 goto out; \
504 pn = fn->parent; \
505 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 506 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
507 else \
508 fn = pn; \
509 if (fn->fn_flags & RTN_RTINFO) \
510 goto restart; \
c71099ac 511 } \
c71099ac 512 } \
982f56f3 513} while(0)
c71099ac
TG
514
515static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
516 struct flowi *fl, int flags)
1da177e4
LT
517{
518 struct fib6_node *fn;
519 struct rt6_info *rt;
520
c71099ac
TG
521 read_lock_bh(&table->tb6_lock);
522 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
523restart:
524 rt = fn->leaf;
77d16f45 525 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 526 BACKTRACK(&fl->fl6_src);
c71099ac 527out:
33cc4896 528 dst_hold(&rt->u.dst);
c71099ac 529 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
530
531 rt->u.dst.lastuse = jiffies;
c71099ac
TG
532 rt->u.dst.__use++;
533
534 return rt;
535
536}
537
538struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
539 int oif, int strict)
540{
541 struct flowi fl = {
542 .oif = oif,
543 .nl_u = {
544 .ip6_u = {
545 .daddr = *daddr,
c71099ac
TG
546 },
547 },
548 };
549 struct dst_entry *dst;
77d16f45 550 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 551
adaa70bb
TG
552 if (saddr) {
553 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
554 flags |= RT6_LOOKUP_F_HAS_SADDR;
555 }
556
c71099ac
TG
557 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
558 if (dst->error == 0)
559 return (struct rt6_info *) dst;
560
561 dst_release(dst);
562
1da177e4
LT
563 return NULL;
564}
565
c71099ac 566/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
567 It takes new route entry, the addition fails by any reason the
568 route is freed. In any case, if caller does not hold it, it may
569 be destroyed.
570 */
571
86872cb5 572static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
573{
574 int err;
c71099ac 575 struct fib6_table *table;
1da177e4 576
c71099ac
TG
577 table = rt->rt6i_table;
578 write_lock_bh(&table->tb6_lock);
86872cb5 579 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 580 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
581
582 return err;
583}
584
40e22e8f
TG
585int ip6_ins_rt(struct rt6_info *rt)
586{
86872cb5 587 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
588}
589
95a9a5ba
YH
590static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
591 struct in6_addr *saddr)
1da177e4 592{
1da177e4
LT
593 struct rt6_info *rt;
594
595 /*
596 * Clone the route.
597 */
598
599 rt = ip6_rt_copy(ort);
600
601 if (rt) {
58c4fb86
YH
602 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
603 if (rt->rt6i_dst.plen != 128 &&
604 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
605 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 606 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 607 }
1da177e4 608
58c4fb86 609 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
610 rt->rt6i_dst.plen = 128;
611 rt->rt6i_flags |= RTF_CACHE;
612 rt->u.dst.flags |= DST_HOST;
613
614#ifdef CONFIG_IPV6_SUBTREES
615 if (rt->rt6i_src.plen && saddr) {
616 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
617 rt->rt6i_src.plen = 128;
618 }
619#endif
620
621 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
622
95a9a5ba 623 }
1da177e4 624
95a9a5ba
YH
625 return rt;
626}
1da177e4 627
299d9939
YH
628static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
629{
630 struct rt6_info *rt = ip6_rt_copy(ort);
631 if (rt) {
632 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
633 rt->rt6i_dst.plen = 128;
634 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
635 rt->u.dst.flags |= DST_HOST;
636 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
637 }
638 return rt;
639}
640
8ce11e6a
AB
641static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
642 struct flowi *fl, int flags)
1da177e4
LT
643{
644 struct fib6_node *fn;
519fbd87 645 struct rt6_info *rt, *nrt;
c71099ac 646 int strict = 0;
1da177e4 647 int attempts = 3;
519fbd87 648 int err;
ea659e07 649 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 650
77d16f45 651 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
652
653relookup:
c71099ac 654 read_lock_bh(&table->tb6_lock);
1da177e4 655
8238dd06 656restart_2:
c71099ac 657 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
658
659restart:
c71099ac 660 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
982f56f3 661 BACKTRACK(&fl->fl6_src);
8238dd06
YH
662 if (rt == &ip6_null_entry ||
663 rt->rt6i_flags & RTF_CACHE)
1ddef044 664 goto out;
1da177e4 665
fb9de91e 666 dst_hold(&rt->u.dst);
c71099ac 667 read_unlock_bh(&table->tb6_lock);
fb9de91e 668
519fbd87 669 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 670 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
671 else {
672#if CLONE_OFFLINK_ROUTE
c71099ac 673 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
674#else
675 goto out2;
676#endif
677 }
e40cf353 678
519fbd87
YH
679 dst_release(&rt->u.dst);
680 rt = nrt ? : &ip6_null_entry;
1da177e4 681
519fbd87
YH
682 dst_hold(&rt->u.dst);
683 if (nrt) {
40e22e8f 684 err = ip6_ins_rt(nrt);
519fbd87 685 if (!err)
1da177e4 686 goto out2;
1da177e4 687 }
1da177e4 688
519fbd87
YH
689 if (--attempts <= 0)
690 goto out2;
691
692 /*
c71099ac 693 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
694 * released someone could insert this route. Relookup.
695 */
696 dst_release(&rt->u.dst);
697 goto relookup;
698
699out:
8238dd06
YH
700 if (reachable) {
701 reachable = 0;
702 goto restart_2;
703 }
519fbd87 704 dst_hold(&rt->u.dst);
c71099ac 705 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
706out2:
707 rt->u.dst.lastuse = jiffies;
708 rt->u.dst.__use++;
c71099ac
TG
709
710 return rt;
1da177e4
LT
711}
712
c71099ac
TG
713void ip6_route_input(struct sk_buff *skb)
714{
715 struct ipv6hdr *iph = skb->nh.ipv6h;
adaa70bb 716 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
717 struct flowi fl = {
718 .iif = skb->dev->ifindex,
719 .nl_u = {
720 .ip6_u = {
721 .daddr = iph->daddr,
722 .saddr = iph->saddr,
90bcaf7b 723 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
724 },
725 },
1ab1457c 726 .mark = skb->mark,
c71099ac
TG
727 .proto = iph->nexthdr,
728 };
adaa70bb
TG
729
730 if (rt6_need_strict(&iph->daddr))
731 flags |= RT6_LOOKUP_F_IFACE;
c71099ac
TG
732
733 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
734}
735
736static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
737 struct flowi *fl, int flags)
1da177e4
LT
738{
739 struct fib6_node *fn;
519fbd87 740 struct rt6_info *rt, *nrt;
c71099ac 741 int strict = 0;
1da177e4 742 int attempts = 3;
519fbd87 743 int err;
ea659e07 744 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 745
77d16f45 746 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
747
748relookup:
c71099ac 749 read_lock_bh(&table->tb6_lock);
1da177e4 750
8238dd06 751restart_2:
c71099ac 752 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
753
754restart:
8238dd06 755 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
982f56f3 756 BACKTRACK(&fl->fl6_src);
8238dd06
YH
757 if (rt == &ip6_null_entry ||
758 rt->rt6i_flags & RTF_CACHE)
1da177e4 759 goto out;
1da177e4 760
fb9de91e 761 dst_hold(&rt->u.dst);
c71099ac 762 read_unlock_bh(&table->tb6_lock);
fb9de91e 763
519fbd87 764 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 765 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
766 else {
767#if CLONE_OFFLINK_ROUTE
768 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
769#else
770 goto out2;
771#endif
772 }
1da177e4 773
519fbd87
YH
774 dst_release(&rt->u.dst);
775 rt = nrt ? : &ip6_null_entry;
1da177e4 776
519fbd87
YH
777 dst_hold(&rt->u.dst);
778 if (nrt) {
40e22e8f 779 err = ip6_ins_rt(nrt);
519fbd87 780 if (!err)
1da177e4 781 goto out2;
1da177e4 782 }
e40cf353 783
519fbd87
YH
784 if (--attempts <= 0)
785 goto out2;
786
787 /*
c71099ac 788 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
789 * released someone could insert this route. Relookup.
790 */
791 dst_release(&rt->u.dst);
792 goto relookup;
793
794out:
8238dd06
YH
795 if (reachable) {
796 reachable = 0;
797 goto restart_2;
798 }
519fbd87 799 dst_hold(&rt->u.dst);
c71099ac 800 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
801out2:
802 rt->u.dst.lastuse = jiffies;
803 rt->u.dst.__use++;
c71099ac
TG
804 return rt;
805}
806
807struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
808{
809 int flags = 0;
810
811 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 812 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 813
adaa70bb
TG
814 if (!ipv6_addr_any(&fl->fl6_src))
815 flags |= RT6_LOOKUP_F_HAS_SADDR;
816
c71099ac 817 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
818}
819
820
821/*
822 * Destination cache support functions
823 */
824
825static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
826{
827 struct rt6_info *rt;
828
829 rt = (struct rt6_info *) dst;
830
831 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
832 return dst;
833
834 return NULL;
835}
836
837static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
838{
839 struct rt6_info *rt = (struct rt6_info *) dst;
840
841 if (rt) {
842 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 843 ip6_del_rt(rt);
1da177e4
LT
844 else
845 dst_release(dst);
846 }
847 return NULL;
848}
849
850static void ip6_link_failure(struct sk_buff *skb)
851{
852 struct rt6_info *rt;
853
854 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
855
856 rt = (struct rt6_info *) skb->dst;
857 if (rt) {
858 if (rt->rt6i_flags&RTF_CACHE) {
859 dst_set_expires(&rt->u.dst, 0);
860 rt->rt6i_flags |= RTF_EXPIRES;
861 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
862 rt->rt6i_node->fn_sernum = -1;
863 }
864}
865
866static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
867{
868 struct rt6_info *rt6 = (struct rt6_info*)dst;
869
870 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
871 rt6->rt6i_flags |= RTF_MODIFIED;
872 if (mtu < IPV6_MIN_MTU) {
873 mtu = IPV6_MIN_MTU;
874 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
875 }
876 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 877 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
878 }
879}
880
1da177e4
LT
881static int ipv6_get_mtu(struct net_device *dev);
882
883static inline unsigned int ipv6_advmss(unsigned int mtu)
884{
885 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
886
887 if (mtu < ip6_rt_min_advmss)
888 mtu = ip6_rt_min_advmss;
889
890 /*
1ab1457c
YH
891 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
892 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
893 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
894 * rely only on pmtu discovery"
895 */
896 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
897 mtu = IPV6_MAXPLEN;
898 return mtu;
899}
900
5d0bbeeb 901static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 902static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 903
1ab1457c 904struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
1da177e4
LT
905 struct neighbour *neigh,
906 struct in6_addr *addr,
907 int (*output)(struct sk_buff *))
908{
909 struct rt6_info *rt;
910 struct inet6_dev *idev = in6_dev_get(dev);
911
912 if (unlikely(idev == NULL))
913 return NULL;
914
915 rt = ip6_dst_alloc();
916 if (unlikely(rt == NULL)) {
917 in6_dev_put(idev);
918 goto out;
919 }
920
921 dev_hold(dev);
922 if (neigh)
923 neigh_hold(neigh);
924 else
925 neigh = ndisc_get_neigh(dev, addr);
926
927 rt->rt6i_dev = dev;
928 rt->rt6i_idev = idev;
929 rt->rt6i_nexthop = neigh;
930 atomic_set(&rt->u.dst.__refcnt, 1);
931 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
932 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
933 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
934 rt->u.dst.output = output;
935
936#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
937 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
938 ? DST_HOST
1da177e4
LT
939 : 0;
940 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
941 rt->rt6i_dst.plen = 128;
942#endif
943
5d0bbeeb 944 spin_lock_bh(&ndisc_lock);
1da177e4
LT
945 rt->u.dst.next = ndisc_dst_gc_list;
946 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 947 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
948
949 fib6_force_start_gc();
950
951out:
40aa7b90 952 return &rt->u.dst;
1da177e4
LT
953}
954
955int ndisc_dst_gc(int *more)
956{
957 struct dst_entry *dst, *next, **pprev;
958 int freed;
959
960 next = NULL;
1ab1457c 961 freed = 0;
5d0bbeeb
TG
962
963 spin_lock_bh(&ndisc_lock);
1da177e4 964 pprev = &ndisc_dst_gc_list;
5d0bbeeb 965
1da177e4
LT
966 while ((dst = *pprev) != NULL) {
967 if (!atomic_read(&dst->__refcnt)) {
968 *pprev = dst->next;
969 dst_free(dst);
970 freed++;
971 } else {
972 pprev = &dst->next;
973 (*more)++;
974 }
975 }
976
5d0bbeeb
TG
977 spin_unlock_bh(&ndisc_lock);
978
1da177e4
LT
979 return freed;
980}
981
982static int ip6_dst_gc(void)
983{
984 static unsigned expire = 30*HZ;
985 static unsigned long last_gc;
986 unsigned long now = jiffies;
987
988 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
989 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
990 goto out;
991
992 expire++;
993 fib6_run_gc(expire);
994 last_gc = now;
995 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
996 expire = ip6_rt_gc_timeout>>1;
997
998out:
999 expire -= expire>>ip6_rt_gc_elasticity;
1000 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1001}
1002
1003/* Clean host part of a prefix. Not necessary in radix tree,
1004 but results in cleaner routing tables.
1005
1006 Remove it only when all the things will work!
1007 */
1008
1009static int ipv6_get_mtu(struct net_device *dev)
1010{
1011 int mtu = IPV6_MIN_MTU;
1012 struct inet6_dev *idev;
1013
1014 idev = in6_dev_get(dev);
1015 if (idev) {
1016 mtu = idev->cnf.mtu6;
1017 in6_dev_put(idev);
1018 }
1019 return mtu;
1020}
1021
1022int ipv6_get_hoplimit(struct net_device *dev)
1023{
1024 int hoplimit = ipv6_devconf.hop_limit;
1025 struct inet6_dev *idev;
1026
1027 idev = in6_dev_get(dev);
1028 if (idev) {
1029 hoplimit = idev->cnf.hop_limit;
1030 in6_dev_put(idev);
1031 }
1032 return hoplimit;
1033}
1034
1035/*
1036 *
1037 */
1038
86872cb5 1039int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1040{
1041 int err;
1da177e4
LT
1042 struct rt6_info *rt = NULL;
1043 struct net_device *dev = NULL;
1044 struct inet6_dev *idev = NULL;
c71099ac 1045 struct fib6_table *table;
1da177e4
LT
1046 int addr_type;
1047
86872cb5 1048 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1049 return -EINVAL;
1050#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1051 if (cfg->fc_src_len)
1da177e4
LT
1052 return -EINVAL;
1053#endif
86872cb5 1054 if (cfg->fc_ifindex) {
1da177e4 1055 err = -ENODEV;
86872cb5 1056 dev = dev_get_by_index(cfg->fc_ifindex);
1da177e4
LT
1057 if (!dev)
1058 goto out;
1059 idev = in6_dev_get(dev);
1060 if (!idev)
1061 goto out;
1062 }
1063
86872cb5
TG
1064 if (cfg->fc_metric == 0)
1065 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1066
86872cb5 1067 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1068 if (table == NULL) {
1069 err = -ENOBUFS;
1070 goto out;
1071 }
1072
1da177e4
LT
1073 rt = ip6_dst_alloc();
1074
1075 if (rt == NULL) {
1076 err = -ENOMEM;
1077 goto out;
1078 }
1079
1080 rt->u.dst.obsolete = -1;
86872cb5 1081 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1082
86872cb5
TG
1083 if (cfg->fc_protocol == RTPROT_UNSPEC)
1084 cfg->fc_protocol = RTPROT_BOOT;
1085 rt->rt6i_protocol = cfg->fc_protocol;
1086
1087 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1088
1089 if (addr_type & IPV6_ADDR_MULTICAST)
1090 rt->u.dst.input = ip6_mc_input;
1091 else
1092 rt->u.dst.input = ip6_forward;
1093
1094 rt->u.dst.output = ip6_output;
1095
86872cb5
TG
1096 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1097 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1098 if (rt->rt6i_dst.plen == 128)
1099 rt->u.dst.flags = DST_HOST;
1100
1101#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1102 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1103 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1104#endif
1105
86872cb5 1106 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1107
1108 /* We cannot add true routes via loopback here,
1109 they would result in kernel looping; promote them to reject routes
1110 */
86872cb5 1111 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1112 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1113 /* hold loopback dev/idev if we haven't done so. */
1114 if (dev != &loopback_dev) {
1115 if (dev) {
1116 dev_put(dev);
1117 in6_dev_put(idev);
1118 }
1119 dev = &loopback_dev;
1120 dev_hold(dev);
1121 idev = in6_dev_get(dev);
1122 if (!idev) {
1123 err = -ENODEV;
1124 goto out;
1125 }
1126 }
1127 rt->u.dst.output = ip6_pkt_discard_out;
1128 rt->u.dst.input = ip6_pkt_discard;
1129 rt->u.dst.error = -ENETUNREACH;
1130 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1131 goto install_route;
1132 }
1133
86872cb5 1134 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1135 struct in6_addr *gw_addr;
1136 int gwa_type;
1137
86872cb5
TG
1138 gw_addr = &cfg->fc_gateway;
1139 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1140 gwa_type = ipv6_addr_type(gw_addr);
1141
1142 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1143 struct rt6_info *grt;
1144
1145 /* IPv6 strictly inhibits using not link-local
1146 addresses as nexthop address.
1147 Otherwise, router will not able to send redirects.
1148 It is very good, but in some (rare!) circumstances
1149 (SIT, PtP, NBMA NOARP links) it is handy to allow
1150 some exceptions. --ANK
1151 */
1152 err = -EINVAL;
1153 if (!(gwa_type&IPV6_ADDR_UNICAST))
1154 goto out;
1155
86872cb5 1156 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1157
1158 err = -EHOSTUNREACH;
1159 if (grt == NULL)
1160 goto out;
1161 if (dev) {
1162 if (dev != grt->rt6i_dev) {
1163 dst_release(&grt->u.dst);
1164 goto out;
1165 }
1166 } else {
1167 dev = grt->rt6i_dev;
1168 idev = grt->rt6i_idev;
1169 dev_hold(dev);
1170 in6_dev_hold(grt->rt6i_idev);
1171 }
1172 if (!(grt->rt6i_flags&RTF_GATEWAY))
1173 err = 0;
1174 dst_release(&grt->u.dst);
1175
1176 if (err)
1177 goto out;
1178 }
1179 err = -EINVAL;
1180 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1181 goto out;
1182 }
1183
1184 err = -ENODEV;
1185 if (dev == NULL)
1186 goto out;
1187
86872cb5 1188 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1189 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1190 if (IS_ERR(rt->rt6i_nexthop)) {
1191 err = PTR_ERR(rt->rt6i_nexthop);
1192 rt->rt6i_nexthop = NULL;
1193 goto out;
1194 }
1195 }
1196
86872cb5 1197 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1198
1199install_route:
86872cb5
TG
1200 if (cfg->fc_mx) {
1201 struct nlattr *nla;
1202 int remaining;
1203
1204 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1205 int type = nla->nla_type;
1206
1207 if (type) {
1208 if (type > RTAX_MAX) {
1da177e4
LT
1209 err = -EINVAL;
1210 goto out;
1211 }
86872cb5
TG
1212
1213 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1214 }
1da177e4
LT
1215 }
1216 }
1217
1218 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1219 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1220 if (!rt->u.dst.metrics[RTAX_MTU-1])
1221 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1222 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1223 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1224 rt->u.dst.dev = dev;
1225 rt->rt6i_idev = idev;
c71099ac 1226 rt->rt6i_table = table;
86872cb5 1227 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1228
1229out:
1230 if (dev)
1231 dev_put(dev);
1232 if (idev)
1233 in6_dev_put(idev);
1234 if (rt)
40aa7b90 1235 dst_free(&rt->u.dst);
1da177e4
LT
1236 return err;
1237}
1238
86872cb5 1239static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1240{
1241 int err;
c71099ac 1242 struct fib6_table *table;
1da177e4 1243
6c813a72
PM
1244 if (rt == &ip6_null_entry)
1245 return -ENOENT;
1246
c71099ac
TG
1247 table = rt->rt6i_table;
1248 write_lock_bh(&table->tb6_lock);
1da177e4 1249
86872cb5 1250 err = fib6_del(rt, info);
1da177e4
LT
1251 dst_release(&rt->u.dst);
1252
c71099ac 1253 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1254
1255 return err;
1256}
1257
e0a1ad73
TG
1258int ip6_del_rt(struct rt6_info *rt)
1259{
86872cb5 1260 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1261}
1262
86872cb5 1263static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1264{
c71099ac 1265 struct fib6_table *table;
1da177e4
LT
1266 struct fib6_node *fn;
1267 struct rt6_info *rt;
1268 int err = -ESRCH;
1269
86872cb5 1270 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1271 if (table == NULL)
1272 return err;
1273
1274 read_lock_bh(&table->tb6_lock);
1da177e4 1275
c71099ac 1276 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1277 &cfg->fc_dst, cfg->fc_dst_len,
1278 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1279
1da177e4 1280 if (fn) {
7cc48263 1281 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1282 if (cfg->fc_ifindex &&
1da177e4 1283 (rt->rt6i_dev == NULL ||
86872cb5 1284 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1285 continue;
86872cb5
TG
1286 if (cfg->fc_flags & RTF_GATEWAY &&
1287 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1288 continue;
86872cb5 1289 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1290 continue;
1291 dst_hold(&rt->u.dst);
c71099ac 1292 read_unlock_bh(&table->tb6_lock);
1da177e4 1293
86872cb5 1294 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1295 }
1296 }
c71099ac 1297 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1298
1299 return err;
1300}
1301
1302/*
1303 * Handle redirects
1304 */
a6279458
YH
1305struct ip6rd_flowi {
1306 struct flowi fl;
1307 struct in6_addr gateway;
1308};
1309
1310static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1311 struct flowi *fl,
1312 int flags)
1da177e4 1313{
a6279458
YH
1314 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1315 struct rt6_info *rt;
e843b9e1 1316 struct fib6_node *fn;
c71099ac 1317
1da177e4 1318 /*
e843b9e1
YH
1319 * Get the "current" route for this destination and
1320 * check if the redirect has come from approriate router.
1321 *
1322 * RFC 2461 specifies that redirects should only be
1323 * accepted if they come from the nexthop to the target.
1324 * Due to the way the routes are chosen, this notion
1325 * is a bit fuzzy and one might need to check all possible
1326 * routes.
1da177e4 1327 */
1da177e4 1328
c71099ac 1329 read_lock_bh(&table->tb6_lock);
a6279458 1330 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1331restart:
7cc48263 1332 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1333 /*
1334 * Current route is on-link; redirect is always invalid.
1335 *
1336 * Seems, previous statement is not true. It could
1337 * be node, which looks for us as on-link (f.e. proxy ndisc)
1338 * But then router serving it might decide, that we should
1339 * know truth 8)8) --ANK (980726).
1340 */
1341 if (rt6_check_expired(rt))
1342 continue;
1343 if (!(rt->rt6i_flags & RTF_GATEWAY))
1344 continue;
a6279458 1345 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1346 continue;
a6279458 1347 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1348 continue;
1349 break;
1350 }
a6279458 1351
cb15d9c2 1352 if (!rt)
a6279458 1353 rt = &ip6_null_entry;
cb15d9c2
YH
1354 BACKTRACK(&fl->fl6_src);
1355out:
a6279458
YH
1356 dst_hold(&rt->u.dst);
1357
c71099ac 1358 read_unlock_bh(&table->tb6_lock);
e843b9e1 1359
a6279458
YH
1360 return rt;
1361};
1362
1363static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1364 struct in6_addr *src,
1365 struct in6_addr *gateway,
1366 struct net_device *dev)
1367{
adaa70bb 1368 int flags = RT6_LOOKUP_F_HAS_SADDR;
a6279458
YH
1369 struct ip6rd_flowi rdfl = {
1370 .fl = {
1371 .oif = dev->ifindex,
1372 .nl_u = {
1373 .ip6_u = {
1374 .daddr = *dest,
1375 .saddr = *src,
1376 },
1377 },
1378 },
1379 .gateway = *gateway,
1380 };
adaa70bb
TG
1381
1382 if (rt6_need_strict(dest))
1383 flags |= RT6_LOOKUP_F_IFACE;
a6279458
YH
1384
1385 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1386}
1387
1388void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1389 struct in6_addr *saddr,
1390 struct neighbour *neigh, u8 *lladdr, int on_link)
1391{
1392 struct rt6_info *rt, *nrt = NULL;
1393 struct netevent_redirect netevent;
1394
1395 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1396
1397 if (rt == &ip6_null_entry) {
1da177e4
LT
1398 if (net_ratelimit())
1399 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1400 "for redirect target\n");
a6279458 1401 goto out;
1da177e4
LT
1402 }
1403
1da177e4
LT
1404 /*
1405 * We have finally decided to accept it.
1406 */
1407
1ab1457c 1408 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1409 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1410 NEIGH_UPDATE_F_OVERRIDE|
1411 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1412 NEIGH_UPDATE_F_ISROUTER))
1413 );
1414
1415 /*
1416 * Redirect received -> path was valid.
1417 * Look, redirects are sent only in response to data packets,
1418 * so that this nexthop apparently is reachable. --ANK
1419 */
1420 dst_confirm(&rt->u.dst);
1421
1422 /* Duplicate redirect: silently ignore. */
1423 if (neigh == rt->u.dst.neighbour)
1424 goto out;
1425
1426 nrt = ip6_rt_copy(rt);
1427 if (nrt == NULL)
1428 goto out;
1429
1430 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1431 if (on_link)
1432 nrt->rt6i_flags &= ~RTF_GATEWAY;
1433
1434 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1435 nrt->rt6i_dst.plen = 128;
1436 nrt->u.dst.flags |= DST_HOST;
1437
1438 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1439 nrt->rt6i_nexthop = neigh_clone(neigh);
1440 /* Reset pmtu, it may be better */
1441 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1442 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1443
40e22e8f 1444 if (ip6_ins_rt(nrt))
1da177e4
LT
1445 goto out;
1446
8d71740c
TT
1447 netevent.old = &rt->u.dst;
1448 netevent.new = &nrt->u.dst;
1449 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1450
1da177e4 1451 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1452 ip6_del_rt(rt);
1da177e4
LT
1453 return;
1454 }
1455
1456out:
1ab1457c 1457 dst_release(&rt->u.dst);
1da177e4
LT
1458 return;
1459}
1460
1461/*
1462 * Handle ICMP "packet too big" messages
1463 * i.e. Path MTU discovery
1464 */
1465
1466void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1467 struct net_device *dev, u32 pmtu)
1468{
1469 struct rt6_info *rt, *nrt;
1470 int allfrag = 0;
1471
1472 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1473 if (rt == NULL)
1474 return;
1475
1476 if (pmtu >= dst_mtu(&rt->u.dst))
1477 goto out;
1478
1479 if (pmtu < IPV6_MIN_MTU) {
1480 /*
1ab1457c 1481 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1482 * MTU (1280) and a fragment header should always be included
1483 * after a node receiving Too Big message reporting PMTU is
1484 * less than the IPv6 Minimum Link MTU.
1485 */
1486 pmtu = IPV6_MIN_MTU;
1487 allfrag = 1;
1488 }
1489
1490 /* New mtu received -> path was valid.
1491 They are sent only in response to data packets,
1492 so that this nexthop apparently is reachable. --ANK
1493 */
1494 dst_confirm(&rt->u.dst);
1495
1496 /* Host route. If it is static, it would be better
1497 not to override it, but add new one, so that
1498 when cache entry will expire old pmtu
1499 would return automatically.
1500 */
1501 if (rt->rt6i_flags & RTF_CACHE) {
1502 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1503 if (allfrag)
1504 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1505 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1506 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1507 goto out;
1508 }
1509
1510 /* Network route.
1511 Two cases are possible:
1512 1. It is connected route. Action: COW
1513 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1514 */
d5315b50 1515 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1516 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1517 else
1518 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1519
d5315b50 1520 if (nrt) {
a1e78363
YH
1521 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1522 if (allfrag)
1523 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1524
1525 /* According to RFC 1981, detecting PMTU increase shouldn't be
1526 * happened within 5 mins, the recommended timer is 10 mins.
1527 * Here this route expiration time is set to ip6_rt_mtu_expires
1528 * which is 10 mins. After 10 mins the decreased pmtu is expired
1529 * and detecting PMTU increase will be automatically happened.
1530 */
1531 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1532 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1533
40e22e8f 1534 ip6_ins_rt(nrt);
1da177e4 1535 }
1da177e4
LT
1536out:
1537 dst_release(&rt->u.dst);
1538}
1539
1540/*
1541 * Misc support functions
1542 */
1543
1544static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1545{
1546 struct rt6_info *rt = ip6_dst_alloc();
1547
1548 if (rt) {
1549 rt->u.dst.input = ort->u.dst.input;
1550 rt->u.dst.output = ort->u.dst.output;
1551
1552 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1553 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1554 rt->u.dst.dev = ort->u.dst.dev;
1555 if (rt->u.dst.dev)
1556 dev_hold(rt->u.dst.dev);
1557 rt->rt6i_idev = ort->rt6i_idev;
1558 if (rt->rt6i_idev)
1559 in6_dev_hold(rt->rt6i_idev);
1560 rt->u.dst.lastuse = jiffies;
1561 rt->rt6i_expires = 0;
1562
1563 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1564 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1565 rt->rt6i_metric = 0;
1566
1567 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1568#ifdef CONFIG_IPV6_SUBTREES
1569 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1570#endif
c71099ac 1571 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1572 }
1573 return rt;
1574}
1575
70ceb4f5
YH
1576#ifdef CONFIG_IPV6_ROUTE_INFO
1577static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1578 struct in6_addr *gwaddr, int ifindex)
1579{
1580 struct fib6_node *fn;
1581 struct rt6_info *rt = NULL;
c71099ac
TG
1582 struct fib6_table *table;
1583
1584 table = fib6_get_table(RT6_TABLE_INFO);
1585 if (table == NULL)
1586 return NULL;
70ceb4f5 1587
c71099ac
TG
1588 write_lock_bh(&table->tb6_lock);
1589 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1590 if (!fn)
1591 goto out;
1592
7cc48263 1593 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1594 if (rt->rt6i_dev->ifindex != ifindex)
1595 continue;
1596 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1597 continue;
1598 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1599 continue;
1600 dst_hold(&rt->u.dst);
1601 break;
1602 }
1603out:
c71099ac 1604 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1605 return rt;
1606}
1607
1608static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1609 struct in6_addr *gwaddr, int ifindex,
1610 unsigned pref)
1611{
86872cb5
TG
1612 struct fib6_config cfg = {
1613 .fc_table = RT6_TABLE_INFO,
1614 .fc_metric = 1024,
1615 .fc_ifindex = ifindex,
1616 .fc_dst_len = prefixlen,
1617 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1618 RTF_UP | RTF_PREF(pref),
1619 };
1620
1621 ipv6_addr_copy(&cfg.fc_dst, prefix);
1622 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1623
e317da96
YH
1624 /* We should treat it as a default route if prefix length is 0. */
1625 if (!prefixlen)
86872cb5 1626 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1627
86872cb5 1628 ip6_route_add(&cfg);
70ceb4f5
YH
1629
1630 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1631}
1632#endif
1633
1da177e4 1634struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1635{
1da177e4 1636 struct rt6_info *rt;
c71099ac 1637 struct fib6_table *table;
1da177e4 1638
c71099ac
TG
1639 table = fib6_get_table(RT6_TABLE_DFLT);
1640 if (table == NULL)
1641 return NULL;
1da177e4 1642
c71099ac 1643 write_lock_bh(&table->tb6_lock);
7cc48263 1644 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1645 if (dev == rt->rt6i_dev &&
045927ff 1646 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1647 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1648 break;
1649 }
1650 if (rt)
1651 dst_hold(&rt->u.dst);
c71099ac 1652 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1653 return rt;
1654}
1655
1656struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1657 struct net_device *dev,
1658 unsigned int pref)
1da177e4 1659{
86872cb5
TG
1660 struct fib6_config cfg = {
1661 .fc_table = RT6_TABLE_DFLT,
1662 .fc_metric = 1024,
1663 .fc_ifindex = dev->ifindex,
1664 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1665 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1666 };
1da177e4 1667
86872cb5 1668 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1669
86872cb5 1670 ip6_route_add(&cfg);
1da177e4 1671
1da177e4
LT
1672 return rt6_get_dflt_router(gwaddr, dev);
1673}
1674
1675void rt6_purge_dflt_routers(void)
1676{
1677 struct rt6_info *rt;
c71099ac
TG
1678 struct fib6_table *table;
1679
1680 /* NOTE: Keep consistent with rt6_get_dflt_router */
1681 table = fib6_get_table(RT6_TABLE_DFLT);
1682 if (table == NULL)
1683 return;
1da177e4
LT
1684
1685restart:
c71099ac 1686 read_lock_bh(&table->tb6_lock);
7cc48263 1687 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1688 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1689 dst_hold(&rt->u.dst);
c71099ac 1690 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1691 ip6_del_rt(rt);
1da177e4
LT
1692 goto restart;
1693 }
1694 }
c71099ac 1695 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1696}
1697
86872cb5
TG
1698static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1699 struct fib6_config *cfg)
1700{
1701 memset(cfg, 0, sizeof(*cfg));
1702
1703 cfg->fc_table = RT6_TABLE_MAIN;
1704 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1705 cfg->fc_metric = rtmsg->rtmsg_metric;
1706 cfg->fc_expires = rtmsg->rtmsg_info;
1707 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1708 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1709 cfg->fc_flags = rtmsg->rtmsg_flags;
1710
1711 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1712 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1713 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1714}
1715
1da177e4
LT
1716int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1717{
86872cb5 1718 struct fib6_config cfg;
1da177e4
LT
1719 struct in6_rtmsg rtmsg;
1720 int err;
1721
1722 switch(cmd) {
1723 case SIOCADDRT: /* Add a route */
1724 case SIOCDELRT: /* Delete a route */
1725 if (!capable(CAP_NET_ADMIN))
1726 return -EPERM;
1727 err = copy_from_user(&rtmsg, arg,
1728 sizeof(struct in6_rtmsg));
1729 if (err)
1730 return -EFAULT;
86872cb5
TG
1731
1732 rtmsg_to_fib6_config(&rtmsg, &cfg);
1733
1da177e4
LT
1734 rtnl_lock();
1735 switch (cmd) {
1736 case SIOCADDRT:
86872cb5 1737 err = ip6_route_add(&cfg);
1da177e4
LT
1738 break;
1739 case SIOCDELRT:
86872cb5 1740 err = ip6_route_del(&cfg);
1da177e4
LT
1741 break;
1742 default:
1743 err = -EINVAL;
1744 }
1745 rtnl_unlock();
1746
1747 return err;
1748 };
1749
1750 return -EINVAL;
1751}
1752
1753/*
1754 * Drop the packet on the floor
1755 */
1756
9ce8ade0 1757static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
1da177e4 1758{
76d0cc1b
LL
1759 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1760 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
a11d206d 1761 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
76d0cc1b 1762
a11d206d 1763 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
9ce8ade0 1764 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1765 kfree_skb(skb);
1766 return 0;
1767}
1768
9ce8ade0
TG
1769static int ip6_pkt_discard(struct sk_buff *skb)
1770{
1771 return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
1772}
1773
20380731 1774static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1775{
1776 skb->dev = skb->dst->dev;
1777 return ip6_pkt_discard(skb);
1778}
1779
6723ab54
DM
1780#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1781
9ce8ade0
TG
1782static int ip6_pkt_prohibit(struct sk_buff *skb)
1783{
1784 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
1785}
1786
1787static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1788{
1789 skb->dev = skb->dst->dev;
1790 return ip6_pkt_prohibit(skb);
1791}
1792
1793static int ip6_pkt_blk_hole(struct sk_buff *skb)
1794{
1795 kfree_skb(skb);
1796 return 0;
1797}
1798
6723ab54
DM
1799#endif
1800
1da177e4
LT
1801/*
1802 * Allocate a dst for local (unicast / anycast) address.
1803 */
1804
1805struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1806 const struct in6_addr *addr,
1807 int anycast)
1808{
1809 struct rt6_info *rt = ip6_dst_alloc();
1810
1811 if (rt == NULL)
1812 return ERR_PTR(-ENOMEM);
1813
1814 dev_hold(&loopback_dev);
1815 in6_dev_hold(idev);
1816
1817 rt->u.dst.flags = DST_HOST;
1818 rt->u.dst.input = ip6_input;
1819 rt->u.dst.output = ip6_output;
1820 rt->rt6i_dev = &loopback_dev;
1821 rt->rt6i_idev = idev;
1822 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1823 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1824 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1825 rt->u.dst.obsolete = -1;
1826
1827 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1828 if (anycast)
1829 rt->rt6i_flags |= RTF_ANYCAST;
1830 else
1da177e4
LT
1831 rt->rt6i_flags |= RTF_LOCAL;
1832 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1833 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1834 dst_free(&rt->u.dst);
1da177e4
LT
1835 return ERR_PTR(-ENOMEM);
1836 }
1837
1838 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1839 rt->rt6i_dst.plen = 128;
c71099ac 1840 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1841
1842 atomic_set(&rt->u.dst.__refcnt, 1);
1843
1844 return rt;
1845}
1846
1847static int fib6_ifdown(struct rt6_info *rt, void *arg)
1848{
1849 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1850 rt != &ip6_null_entry) {
1851 RT6_TRACE("deleted by ifdown %p\n", rt);
1852 return -1;
1853 }
1854 return 0;
1855}
1856
1857void rt6_ifdown(struct net_device *dev)
1858{
c71099ac 1859 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1860}
1861
1862struct rt6_mtu_change_arg
1863{
1864 struct net_device *dev;
1865 unsigned mtu;
1866};
1867
1868static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1869{
1870 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1871 struct inet6_dev *idev;
1872
1873 /* In IPv6 pmtu discovery is not optional,
1874 so that RTAX_MTU lock cannot disable it.
1875 We still use this lock to block changes
1876 caused by addrconf/ndisc.
1877 */
1878
1879 idev = __in6_dev_get(arg->dev);
1880 if (idev == NULL)
1881 return 0;
1882
1883 /* For administrative MTU increase, there is no way to discover
1884 IPv6 PMTU increase, so PMTU increase should be updated here.
1885 Since RFC 1981 doesn't include administrative MTU increase
1886 update PMTU increase is a MUST. (i.e. jumbo frame)
1887 */
1888 /*
1889 If new MTU is less than route PMTU, this new MTU will be the
1890 lowest MTU in the path, update the route PMTU to reflect PMTU
1891 decreases; if new MTU is greater than route PMTU, and the
1892 old MTU is the lowest MTU in the path, update the route PMTU
1893 to reflect the increase. In this case if the other nodes' MTU
1894 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1895 PMTU discouvery.
1896 */
1897 if (rt->rt6i_dev == arg->dev &&
1898 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1ab1457c
YH
1899 (dst_mtu(&rt->u.dst) > arg->mtu ||
1900 (dst_mtu(&rt->u.dst) < arg->mtu &&
1da177e4
LT
1901 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1902 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1903 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1904 return 0;
1905}
1906
1907void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1908{
c71099ac
TG
1909 struct rt6_mtu_change_arg arg = {
1910 .dev = dev,
1911 .mtu = mtu,
1912 };
1da177e4 1913
c71099ac 1914 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1915}
1916
86872cb5 1917static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
5176f91e 1918 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1919 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1920 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1921 [RTA_PRIORITY] = { .type = NLA_U32 },
1922 [RTA_METRICS] = { .type = NLA_NESTED },
1923};
1924
1925static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1926 struct fib6_config *cfg)
1da177e4 1927{
86872cb5
TG
1928 struct rtmsg *rtm;
1929 struct nlattr *tb[RTA_MAX+1];
1930 int err;
1da177e4 1931
86872cb5
TG
1932 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1933 if (err < 0)
1934 goto errout;
1da177e4 1935
86872cb5
TG
1936 err = -EINVAL;
1937 rtm = nlmsg_data(nlh);
1938 memset(cfg, 0, sizeof(*cfg));
1939
1940 cfg->fc_table = rtm->rtm_table;
1941 cfg->fc_dst_len = rtm->rtm_dst_len;
1942 cfg->fc_src_len = rtm->rtm_src_len;
1943 cfg->fc_flags = RTF_UP;
1944 cfg->fc_protocol = rtm->rtm_protocol;
1945
1946 if (rtm->rtm_type == RTN_UNREACHABLE)
1947 cfg->fc_flags |= RTF_REJECT;
1948
1949 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1950 cfg->fc_nlinfo.nlh = nlh;
1951
1952 if (tb[RTA_GATEWAY]) {
1953 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1954 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1955 }
86872cb5
TG
1956
1957 if (tb[RTA_DST]) {
1958 int plen = (rtm->rtm_dst_len + 7) >> 3;
1959
1960 if (nla_len(tb[RTA_DST]) < plen)
1961 goto errout;
1962
1963 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1964 }
86872cb5
TG
1965
1966 if (tb[RTA_SRC]) {
1967 int plen = (rtm->rtm_src_len + 7) >> 3;
1968
1969 if (nla_len(tb[RTA_SRC]) < plen)
1970 goto errout;
1971
1972 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 1973 }
86872cb5
TG
1974
1975 if (tb[RTA_OIF])
1976 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1977
1978 if (tb[RTA_PRIORITY])
1979 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1980
1981 if (tb[RTA_METRICS]) {
1982 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1983 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 1984 }
86872cb5
TG
1985
1986 if (tb[RTA_TABLE])
1987 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1988
1989 err = 0;
1990errout:
1991 return err;
1da177e4
LT
1992}
1993
1994int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1995{
86872cb5
TG
1996 struct fib6_config cfg;
1997 int err;
1da177e4 1998
86872cb5
TG
1999 err = rtm_to_fib6_config(skb, nlh, &cfg);
2000 if (err < 0)
2001 return err;
2002
2003 return ip6_route_del(&cfg);
1da177e4
LT
2004}
2005
2006int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2007{
86872cb5
TG
2008 struct fib6_config cfg;
2009 int err;
1da177e4 2010
86872cb5
TG
2011 err = rtm_to_fib6_config(skb, nlh, &cfg);
2012 if (err < 0)
2013 return err;
2014
2015 return ip6_route_add(&cfg);
1da177e4
LT
2016}
2017
339bf98f
TG
2018static inline size_t rt6_nlmsg_size(void)
2019{
2020 return NLMSG_ALIGN(sizeof(struct rtmsg))
2021 + nla_total_size(16) /* RTA_SRC */
2022 + nla_total_size(16) /* RTA_DST */
2023 + nla_total_size(16) /* RTA_GATEWAY */
2024 + nla_total_size(16) /* RTA_PREFSRC */
2025 + nla_total_size(4) /* RTA_TABLE */
2026 + nla_total_size(4) /* RTA_IIF */
2027 + nla_total_size(4) /* RTA_OIF */
2028 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2029 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2030 + nla_total_size(sizeof(struct rta_cacheinfo));
2031}
2032
1da177e4 2033static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2034 struct in6_addr *dst, struct in6_addr *src,
2035 int iif, int type, u32 pid, u32 seq,
2036 int prefix, unsigned int flags)
1da177e4
LT
2037{
2038 struct rtmsg *rtm;
2d7202bf 2039 struct nlmsghdr *nlh;
e3703b3d 2040 long expires;
9e762a4a 2041 u32 table;
1da177e4
LT
2042
2043 if (prefix) { /* user wants prefix routes only */
2044 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2045 /* success since this is not a prefix route */
2046 return 1;
2047 }
2048 }
2049
2d7202bf
TG
2050 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2051 if (nlh == NULL)
26932566 2052 return -EMSGSIZE;
2d7202bf
TG
2053
2054 rtm = nlmsg_data(nlh);
1da177e4
LT
2055 rtm->rtm_family = AF_INET6;
2056 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2057 rtm->rtm_src_len = rt->rt6i_src.plen;
2058 rtm->rtm_tos = 0;
c71099ac 2059 if (rt->rt6i_table)
9e762a4a 2060 table = rt->rt6i_table->tb6_id;
c71099ac 2061 else
9e762a4a
PM
2062 table = RT6_TABLE_UNSPEC;
2063 rtm->rtm_table = table;
2d7202bf 2064 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2065 if (rt->rt6i_flags&RTF_REJECT)
2066 rtm->rtm_type = RTN_UNREACHABLE;
2067 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2068 rtm->rtm_type = RTN_LOCAL;
2069 else
2070 rtm->rtm_type = RTN_UNICAST;
2071 rtm->rtm_flags = 0;
2072 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2073 rtm->rtm_protocol = rt->rt6i_protocol;
2074 if (rt->rt6i_flags&RTF_DYNAMIC)
2075 rtm->rtm_protocol = RTPROT_REDIRECT;
2076 else if (rt->rt6i_flags & RTF_ADDRCONF)
2077 rtm->rtm_protocol = RTPROT_KERNEL;
2078 else if (rt->rt6i_flags&RTF_DEFAULT)
2079 rtm->rtm_protocol = RTPROT_RA;
2080
2081 if (rt->rt6i_flags&RTF_CACHE)
2082 rtm->rtm_flags |= RTM_F_CLONED;
2083
2084 if (dst) {
2d7202bf 2085 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2086 rtm->rtm_dst_len = 128;
1da177e4 2087 } else if (rtm->rtm_dst_len)
2d7202bf 2088 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2089#ifdef CONFIG_IPV6_SUBTREES
2090 if (src) {
2d7202bf 2091 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2092 rtm->rtm_src_len = 128;
1da177e4 2093 } else if (rtm->rtm_src_len)
2d7202bf 2094 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2095#endif
2096 if (iif)
2d7202bf 2097 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2098 else if (dst) {
2099 struct in6_addr saddr_buf;
2100 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2101 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2102 }
2d7202bf 2103
1da177e4 2104 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2105 goto nla_put_failure;
2106
1da177e4 2107 if (rt->u.dst.neighbour)
2d7202bf
TG
2108 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2109
1da177e4 2110 if (rt->u.dst.dev)
2d7202bf
TG
2111 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2112
2113 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2114
2115 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2116 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2117 expires, rt->u.dst.error) < 0)
2118 goto nla_put_failure;
2d7202bf
TG
2119
2120 return nlmsg_end(skb, nlh);
2121
2122nla_put_failure:
26932566
PM
2123 nlmsg_cancel(skb, nlh);
2124 return -EMSGSIZE;
1da177e4
LT
2125}
2126
1b43af54 2127int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2128{
2129 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2130 int prefix;
2131
2d7202bf
TG
2132 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2133 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2134 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2135 } else
2136 prefix = 0;
2137
2138 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2139 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2140 prefix, NLM_F_MULTI);
1da177e4
LT
2141}
2142
1da177e4
LT
2143int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2144{
ab364a6f
TG
2145 struct nlattr *tb[RTA_MAX+1];
2146 struct rt6_info *rt;
1da177e4 2147 struct sk_buff *skb;
ab364a6f 2148 struct rtmsg *rtm;
1da177e4 2149 struct flowi fl;
ab364a6f 2150 int err, iif = 0;
1da177e4 2151
ab364a6f
TG
2152 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2153 if (err < 0)
2154 goto errout;
1da177e4 2155
ab364a6f 2156 err = -EINVAL;
1da177e4 2157 memset(&fl, 0, sizeof(fl));
1da177e4 2158
ab364a6f
TG
2159 if (tb[RTA_SRC]) {
2160 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2161 goto errout;
2162
2163 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2164 }
2165
2166 if (tb[RTA_DST]) {
2167 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2168 goto errout;
2169
2170 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2171 }
2172
2173 if (tb[RTA_IIF])
2174 iif = nla_get_u32(tb[RTA_IIF]);
2175
2176 if (tb[RTA_OIF])
2177 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2178
2179 if (iif) {
2180 struct net_device *dev;
2181 dev = __dev_get_by_index(iif);
2182 if (!dev) {
2183 err = -ENODEV;
ab364a6f 2184 goto errout;
1da177e4
LT
2185 }
2186 }
2187
ab364a6f
TG
2188 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2189 if (skb == NULL) {
2190 err = -ENOBUFS;
2191 goto errout;
2192 }
1da177e4 2193
ab364a6f
TG
2194 /* Reserve room for dummy headers, this skb can pass
2195 through good chunk of routing engine.
2196 */
2197 skb->mac.raw = skb->data;
2198 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2199
ab364a6f 2200 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2201 skb->dst = &rt->u.dst;
2202
ab364a6f 2203 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2204 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2205 nlh->nlmsg_seq, 0, 0);
1da177e4 2206 if (err < 0) {
ab364a6f
TG
2207 kfree_skb(skb);
2208 goto errout;
1da177e4
LT
2209 }
2210
2942e900 2211 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
ab364a6f 2212errout:
1da177e4 2213 return err;
1da177e4
LT
2214}
2215
86872cb5 2216void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2217{
2218 struct sk_buff *skb;
86872cb5
TG
2219 u32 pid = 0, seq = 0;
2220 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2221 int err = -ENOBUFS;
2222
86872cb5
TG
2223 if (info) {
2224 pid = info->pid;
2225 nlh = info->nlh;
2226 if (nlh)
2227 seq = nlh->nlmsg_seq;
2228 }
2229
339bf98f 2230 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2231 if (skb == NULL)
2232 goto errout;
2233
2234 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
26932566
PM
2235 if (err < 0) {
2236 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2237 WARN_ON(err == -EMSGSIZE);
2238 kfree_skb(skb);
2239 goto errout;
2240 }
21713ebc
TG
2241 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2242errout:
2243 if (err < 0)
2244 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2245}
2246
2247/*
2248 * /proc
2249 */
2250
2251#ifdef CONFIG_PROC_FS
2252
2253#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2254
2255struct rt6_proc_arg
2256{
2257 char *buffer;
2258 int offset;
2259 int length;
2260 int skip;
2261 int len;
2262};
2263
2264static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2265{
2266 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1da177e4
LT
2267
2268 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2269 arg->skip++;
2270 return 0;
2271 }
2272
2273 if (arg->len >= arg->length)
2274 return 0;
2275
33e93c96
YH
2276 arg->len += sprintf(arg->buffer + arg->len,
2277 NIP6_SEQFMT " %02x ",
2278 NIP6(rt->rt6i_dst.addr),
1da177e4
LT
2279 rt->rt6i_dst.plen);
2280
2281#ifdef CONFIG_IPV6_SUBTREES
33e93c96
YH
2282 arg->len += sprintf(arg->buffer + arg->len,
2283 NIP6_SEQFMT " %02x ",
2284 NIP6(rt->rt6i_src.addr),
1da177e4
LT
2285 rt->rt6i_src.plen);
2286#else
33e93c96
YH
2287 arg->len += sprintf(arg->buffer + arg->len,
2288 "00000000000000000000000000000000 00 ");
1da177e4
LT
2289#endif
2290
2291 if (rt->rt6i_nexthop) {
33e93c96
YH
2292 arg->len += sprintf(arg->buffer + arg->len,
2293 NIP6_SEQFMT,
2294 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2295 } else {
33e93c96
YH
2296 arg->len += sprintf(arg->buffer + arg->len,
2297 "00000000000000000000000000000000");
1da177e4
LT
2298 }
2299 arg->len += sprintf(arg->buffer + arg->len,
2300 " %08x %08x %08x %08x %8s\n",
2301 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1ab1457c 2302 rt->u.dst.__use, rt->rt6i_flags,
1da177e4
LT
2303 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2304 return 0;
2305}
2306
2307static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2308{
c71099ac
TG
2309 struct rt6_proc_arg arg = {
2310 .buffer = buffer,
2311 .offset = offset,
2312 .length = length,
2313 };
1da177e4 2314
c71099ac 2315 fib6_clean_all(rt6_info_route, 0, &arg);
1da177e4
LT
2316
2317 *start = buffer;
2318 if (offset)
2319 *start += offset % RT6_INFO_LEN;
2320
2321 arg.len -= offset % RT6_INFO_LEN;
2322
2323 if (arg.len > length)
2324 arg.len = length;
2325 if (arg.len < 0)
2326 arg.len = 0;
2327
2328 return arg.len;
2329}
2330
1da177e4
LT
2331static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2332{
2333 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2334 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2335 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2336 rt6_stats.fib_rt_cache,
2337 atomic_read(&ip6_dst_ops.entries),
2338 rt6_stats.fib_discarded_routes);
2339
2340 return 0;
2341}
2342
2343static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2344{
2345 return single_open(file, rt6_stats_seq_show, NULL);
2346}
2347
9a32144e 2348static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2349 .owner = THIS_MODULE,
2350 .open = rt6_stats_seq_open,
2351 .read = seq_read,
2352 .llseek = seq_lseek,
2353 .release = single_release,
2354};
2355#endif /* CONFIG_PROC_FS */
2356
2357#ifdef CONFIG_SYSCTL
2358
2359static int flush_delay;
2360
2361static
2362int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2363 void __user *buffer, size_t *lenp, loff_t *ppos)
2364{
2365 if (write) {
2366 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2367 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2368 return 0;
2369 } else
2370 return -EINVAL;
2371}
2372
2373ctl_table ipv6_route_table[] = {
1ab1457c
YH
2374 {
2375 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1da177e4 2376 .procname = "flush",
1ab1457c 2377 .data = &flush_delay,
1da177e4 2378 .maxlen = sizeof(int),
89c8b3a1 2379 .mode = 0200,
1ab1457c 2380 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2381 },
2382 {
2383 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2384 .procname = "gc_thresh",
1ab1457c 2385 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2386 .maxlen = sizeof(int),
2387 .mode = 0644,
1ab1457c 2388 .proc_handler = &proc_dointvec,
1da177e4
LT
2389 },
2390 {
2391 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2392 .procname = "max_size",
1ab1457c 2393 .data = &ip6_rt_max_size,
1da177e4
LT
2394 .maxlen = sizeof(int),
2395 .mode = 0644,
1ab1457c 2396 .proc_handler = &proc_dointvec,
1da177e4
LT
2397 },
2398 {
2399 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2400 .procname = "gc_min_interval",
1ab1457c 2401 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2402 .maxlen = sizeof(int),
2403 .mode = 0644,
1ab1457c 2404 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2405 .strategy = &sysctl_jiffies,
2406 },
2407 {
2408 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2409 .procname = "gc_timeout",
1ab1457c 2410 .data = &ip6_rt_gc_timeout,
1da177e4
LT
2411 .maxlen = sizeof(int),
2412 .mode = 0644,
1ab1457c 2413 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2414 .strategy = &sysctl_jiffies,
2415 },
2416 {
2417 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2418 .procname = "gc_interval",
1ab1457c 2419 .data = &ip6_rt_gc_interval,
1da177e4
LT
2420 .maxlen = sizeof(int),
2421 .mode = 0644,
1ab1457c 2422 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2423 .strategy = &sysctl_jiffies,
2424 },
2425 {
2426 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2427 .procname = "gc_elasticity",
1ab1457c 2428 .data = &ip6_rt_gc_elasticity,
1da177e4
LT
2429 .maxlen = sizeof(int),
2430 .mode = 0644,
1ab1457c 2431 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2432 .strategy = &sysctl_jiffies,
2433 },
2434 {
2435 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2436 .procname = "mtu_expires",
1ab1457c 2437 .data = &ip6_rt_mtu_expires,
1da177e4
LT
2438 .maxlen = sizeof(int),
2439 .mode = 0644,
1ab1457c 2440 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2441 .strategy = &sysctl_jiffies,
2442 },
2443 {
2444 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2445 .procname = "min_adv_mss",
1ab1457c 2446 .data = &ip6_rt_min_advmss,
1da177e4
LT
2447 .maxlen = sizeof(int),
2448 .mode = 0644,
1ab1457c 2449 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2450 .strategy = &sysctl_jiffies,
2451 },
2452 {
2453 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2454 .procname = "gc_min_interval_ms",
1ab1457c 2455 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2456 .maxlen = sizeof(int),
2457 .mode = 0644,
1ab1457c 2458 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2459 .strategy = &sysctl_ms_jiffies,
2460 },
2461 { .ctl_name = 0 }
2462};
2463
2464#endif
2465
2466void __init ip6_route_init(void)
2467{
2468 struct proc_dir_entry *p;
2469
e5d679f3
AD
2470 ip6_dst_ops.kmem_cachep =
2471 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2472 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1da177e4
LT
2473 fib6_init();
2474#ifdef CONFIG_PROC_FS
2475 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2476 if (p)
2477 p->owner = THIS_MODULE;
2478
2479 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2480#endif
2481#ifdef CONFIG_XFRM
2482 xfrm6_init();
2483#endif
101367c2
TG
2484#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2485 fib6_rules_init();
2486#endif
1da177e4
LT
2487}
2488
2489void ip6_route_cleanup(void)
2490{
101367c2
TG
2491#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2492 fib6_rules_cleanup();
2493#endif
1da177e4
LT
2494#ifdef CONFIG_PROC_FS
2495 proc_net_remove("ipv6_route");
2496 proc_net_remove("rt6_stats");
2497#endif
2498#ifdef CONFIG_XFRM
2499 xfrm6_fini();
2500#endif
2501 rt6_ifdown(NULL);
2502 fib6_gc_cleanup();
2503 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2504}