]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[NET_DMA]: remove unused dma_memcpy_to_kernel_iovec
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4
LT
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
8d71740c 57#include <net/netevent.h>
21713ebc 58#include <net/netlink.h>
1da177e4
LT
59
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
519fbd87 77#define CLONE_OFFLINK_ROUTE 0
1da177e4
LT
78
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
70ceb4f5
YH
100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
1da177e4
LT
108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
14e50e57
DM
122static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
123{
124}
125
126static struct dst_ops ip6_dst_blackhole_ops = {
127 .family = AF_INET6,
128 .protocol = __constant_htons(ETH_P_IPV6),
129 .destroy = ip6_dst_destroy,
130 .check = ip6_dst_check,
131 .update_pmtu = ip6_rt_blackhole_update_pmtu,
132 .entry_size = sizeof(struct rt6_info),
133};
134
1da177e4
LT
135struct rt6_info ip6_null_entry = {
136 .u = {
137 .dst = {
138 .__refcnt = ATOMIC_INIT(1),
139 .__use = 1,
140 .dev = &loopback_dev,
141 .obsolete = -1,
142 .error = -ENETUNREACH,
143 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
144 .input = ip6_pkt_discard,
145 .output = ip6_pkt_discard_out,
146 .ops = &ip6_dst_ops,
147 .path = (struct dst_entry*)&ip6_null_entry,
148 }
149 },
150 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
151 .rt6i_metric = ~(u32) 0,
152 .rt6i_ref = ATOMIC_INIT(1),
153};
154
101367c2
TG
155#ifdef CONFIG_IPV6_MULTIPLE_TABLES
156
6723ab54
DM
157static int ip6_pkt_prohibit(struct sk_buff *skb);
158static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159static int ip6_pkt_blk_hole(struct sk_buff *skb);
160
101367c2
TG
161struct rt6_info ip6_prohibit_entry = {
162 .u = {
163 .dst = {
164 .__refcnt = ATOMIC_INIT(1),
165 .__use = 1,
166 .dev = &loopback_dev,
167 .obsolete = -1,
168 .error = -EACCES,
169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
170 .input = ip6_pkt_prohibit,
171 .output = ip6_pkt_prohibit_out,
101367c2
TG
172 .ops = &ip6_dst_ops,
173 .path = (struct dst_entry*)&ip6_prohibit_entry,
174 }
175 },
176 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
177 .rt6i_metric = ~(u32) 0,
178 .rt6i_ref = ATOMIC_INIT(1),
179};
180
181struct rt6_info ip6_blk_hole_entry = {
182 .u = {
183 .dst = {
184 .__refcnt = ATOMIC_INIT(1),
185 .__use = 1,
186 .dev = &loopback_dev,
187 .obsolete = -1,
188 .error = -EINVAL,
189 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
190 .input = ip6_pkt_blk_hole,
191 .output = ip6_pkt_blk_hole,
101367c2
TG
192 .ops = &ip6_dst_ops,
193 .path = (struct dst_entry*)&ip6_blk_hole_entry,
194 }
195 },
196 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
197 .rt6i_metric = ~(u32) 0,
198 .rt6i_ref = ATOMIC_INIT(1),
199};
200
201#endif
202
1da177e4
LT
203/* allocate dst with ip6_dst_ops */
204static __inline__ struct rt6_info *ip6_dst_alloc(void)
205{
206 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
207}
208
209static void ip6_dst_destroy(struct dst_entry *dst)
210{
211 struct rt6_info *rt = (struct rt6_info *)dst;
212 struct inet6_dev *idev = rt->rt6i_idev;
213
214 if (idev != NULL) {
215 rt->rt6i_idev = NULL;
216 in6_dev_put(idev);
1ab1457c 217 }
1da177e4
LT
218}
219
220static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
221 int how)
222{
223 struct rt6_info *rt = (struct rt6_info *)dst;
224 struct inet6_dev *idev = rt->rt6i_idev;
225
226 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
227 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
228 if (loopback_idev != NULL) {
229 rt->rt6i_idev = loopback_idev;
230 in6_dev_put(idev);
231 }
232 }
233}
234
235static __inline__ int rt6_check_expired(const struct rt6_info *rt)
236{
237 return (rt->rt6i_flags & RTF_EXPIRES &&
238 time_after(jiffies, rt->rt6i_expires));
239}
240
c71099ac
TG
241static inline int rt6_need_strict(struct in6_addr *daddr)
242{
243 return (ipv6_addr_type(daddr) &
244 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
245}
246
1da177e4 247/*
c71099ac 248 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
249 */
250
251static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
252 int oif,
253 int strict)
254{
255 struct rt6_info *local = NULL;
256 struct rt6_info *sprt;
257
258 if (oif) {
7cc48263 259 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
260 struct net_device *dev = sprt->rt6i_dev;
261 if (dev->ifindex == oif)
262 return sprt;
263 if (dev->flags & IFF_LOOPBACK) {
264 if (sprt->rt6i_idev == NULL ||
265 sprt->rt6i_idev->dev->ifindex != oif) {
266 if (strict && oif)
267 continue;
1ab1457c 268 if (local && (!oif ||
1da177e4
LT
269 local->rt6i_idev->dev->ifindex == oif))
270 continue;
271 }
272 local = sprt;
273 }
274 }
275
276 if (local)
277 return local;
278
279 if (strict)
280 return &ip6_null_entry;
281 }
282 return rt;
283}
284
27097255
YH
285#ifdef CONFIG_IPV6_ROUTER_PREF
286static void rt6_probe(struct rt6_info *rt)
287{
288 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
289 /*
290 * Okay, this does not seem to be appropriate
291 * for now, however, we need to check if it
292 * is really so; aka Router Reachability Probing.
293 *
294 * Router Reachability Probe MUST be rate-limited
295 * to no more than one per minute.
296 */
297 if (!neigh || (neigh->nud_state & NUD_VALID))
298 return;
299 read_lock_bh(&neigh->lock);
300 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 301 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
302 struct in6_addr mcaddr;
303 struct in6_addr *target;
304
305 neigh->updated = jiffies;
306 read_unlock_bh(&neigh->lock);
307
308 target = (struct in6_addr *)&neigh->primary_key;
309 addrconf_addr_solict_mult(target, &mcaddr);
310 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
311 } else
312 read_unlock_bh(&neigh->lock);
313}
314#else
315static inline void rt6_probe(struct rt6_info *rt)
316{
317 return;
318}
319#endif
320
1da177e4 321/*
554cfb7e 322 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 323 */
b6f99a21 324static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
325{
326 struct net_device *dev = rt->rt6i_dev;
161980f4 327 if (!oif || dev->ifindex == oif)
554cfb7e 328 return 2;
161980f4
DM
329 if ((dev->flags & IFF_LOOPBACK) &&
330 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331 return 1;
332 return 0;
554cfb7e 333}
1da177e4 334
b6f99a21 335static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 336{
554cfb7e
YH
337 struct neighbour *neigh = rt->rt6i_nexthop;
338 int m = 0;
4d0c5911
YH
339 if (rt->rt6i_flags & RTF_NONEXTHOP ||
340 !(rt->rt6i_flags & RTF_GATEWAY))
341 m = 1;
342 else if (neigh) {
554cfb7e
YH
343 read_lock_bh(&neigh->lock);
344 if (neigh->nud_state & NUD_VALID)
4d0c5911 345 m = 2;
ea73ee23
YH
346 else if (!(neigh->nud_state & NUD_FAILED))
347 m = 1;
554cfb7e 348 read_unlock_bh(&neigh->lock);
1da177e4 349 }
554cfb7e 350 return m;
1da177e4
LT
351}
352
554cfb7e
YH
353static int rt6_score_route(struct rt6_info *rt, int oif,
354 int strict)
1da177e4 355{
4d0c5911 356 int m, n;
1ab1457c 357
4d0c5911 358 m = rt6_check_dev(rt, oif);
77d16f45 359 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 360 return -1;
ebacaaa0
YH
361#ifdef CONFIG_IPV6_ROUTER_PREF
362 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
363#endif
4d0c5911 364 n = rt6_check_neigh(rt);
557e92ef 365 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
366 return -1;
367 return m;
368}
369
f11e6659
DM
370static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
371 int *mpri, struct rt6_info *match)
554cfb7e 372{
f11e6659
DM
373 int m;
374
375 if (rt6_check_expired(rt))
376 goto out;
377
378 m = rt6_score_route(rt, oif, strict);
379 if (m < 0)
380 goto out;
381
382 if (m > *mpri) {
383 if (strict & RT6_LOOKUP_F_REACHABLE)
384 rt6_probe(match);
385 *mpri = m;
386 match = rt;
387 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
388 rt6_probe(rt);
389 }
390
391out:
392 return match;
393}
394
395static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
396 struct rt6_info *rr_head,
397 u32 metric, int oif, int strict)
398{
399 struct rt6_info *rt, *match;
554cfb7e 400 int mpri = -1;
1da177e4 401
f11e6659
DM
402 match = NULL;
403 for (rt = rr_head; rt && rt->rt6i_metric == metric;
404 rt = rt->u.dst.rt6_next)
405 match = find_match(rt, oif, strict, &mpri, match);
406 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
407 rt = rt->u.dst.rt6_next)
408 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 409
f11e6659
DM
410 return match;
411}
1da177e4 412
f11e6659
DM
413static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
414{
415 struct rt6_info *match, *rt0;
1da177e4 416
f11e6659
DM
417 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
418 __FUNCTION__, fn->leaf, oif);
554cfb7e 419
f11e6659
DM
420 rt0 = fn->rr_ptr;
421 if (!rt0)
422 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 423
f11e6659 424 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 425
554cfb7e 426 if (!match &&
f11e6659
DM
427 (strict & RT6_LOOKUP_F_REACHABLE)) {
428 struct rt6_info *next = rt0->u.dst.rt6_next;
429
554cfb7e 430 /* no entries matched; do round-robin */
f11e6659
DM
431 if (!next || next->rt6i_metric != rt0->rt6i_metric)
432 next = fn->leaf;
433
434 if (next != rt0)
435 fn->rr_ptr = next;
1da177e4 436 }
1da177e4 437
f11e6659
DM
438 RT6_TRACE("%s() => %p\n",
439 __FUNCTION__, match);
1da177e4 440
554cfb7e 441 return (match ? match : &ip6_null_entry);
1da177e4
LT
442}
443
70ceb4f5
YH
444#ifdef CONFIG_IPV6_ROUTE_INFO
445int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct in6_addr *gwaddr)
447{
448 struct route_info *rinfo = (struct route_info *) opt;
449 struct in6_addr prefix_buf, *prefix;
450 unsigned int pref;
451 u32 lifetime;
452 struct rt6_info *rt;
453
454 if (len < sizeof(struct route_info)) {
455 return -EINVAL;
456 }
457
458 /* Sanity check for prefix_len and length */
459 if (rinfo->length > 3) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 128) {
462 return -EINVAL;
463 } else if (rinfo->prefix_len > 64) {
464 if (rinfo->length < 2) {
465 return -EINVAL;
466 }
467 } else if (rinfo->prefix_len > 0) {
468 if (rinfo->length < 1) {
469 return -EINVAL;
470 }
471 }
472
473 pref = rinfo->route_pref;
474 if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
e69a4adc 477 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
478 if (lifetime == 0xffffffff) {
479 /* infinity */
480 } else if (lifetime > 0x7fffffff/HZ) {
481 /* Avoid arithmetic overflow */
482 lifetime = 0x7fffffff/HZ - 1;
483 }
484
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
493 }
494
495 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
496
497 if (rt && !lifetime) {
e0a1ad73 498 ip6_del_rt(rt);
70ceb4f5
YH
499 rt = NULL;
500 }
501
502 if (!rt && lifetime)
503 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
504 pref);
505 else if (rt)
506 rt->rt6i_flags = RTF_ROUTEINFO |
507 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
508
509 if (rt) {
510 if (lifetime == 0xffffffff) {
511 rt->rt6i_flags &= ~RTF_EXPIRES;
512 } else {
513 rt->rt6i_expires = jiffies + HZ * lifetime;
514 rt->rt6i_flags |= RTF_EXPIRES;
515 }
516 dst_release(&rt->u.dst);
517 }
518 return 0;
519}
520#endif
521
982f56f3
YH
522#define BACKTRACK(saddr) \
523do { \
524 if (rt == &ip6_null_entry) { \
525 struct fib6_node *pn; \
e0eda7bb 526 while (1) { \
982f56f3
YH
527 if (fn->fn_flags & RTN_TL_ROOT) \
528 goto out; \
529 pn = fn->parent; \
530 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 531 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
532 else \
533 fn = pn; \
534 if (fn->fn_flags & RTN_RTINFO) \
535 goto restart; \
c71099ac 536 } \
c71099ac 537 } \
982f56f3 538} while(0)
c71099ac
TG
539
540static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
541 struct flowi *fl, int flags)
1da177e4
LT
542{
543 struct fib6_node *fn;
544 struct rt6_info *rt;
545
c71099ac
TG
546 read_lock_bh(&table->tb6_lock);
547 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
548restart:
549 rt = fn->leaf;
77d16f45 550 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 551 BACKTRACK(&fl->fl6_src);
c71099ac 552out:
33cc4896 553 dst_hold(&rt->u.dst);
c71099ac 554 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
555
556 rt->u.dst.lastuse = jiffies;
c71099ac
TG
557 rt->u.dst.__use++;
558
559 return rt;
560
561}
562
563struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
564 int oif, int strict)
565{
566 struct flowi fl = {
567 .oif = oif,
568 .nl_u = {
569 .ip6_u = {
570 .daddr = *daddr,
c71099ac
TG
571 },
572 },
573 };
574 struct dst_entry *dst;
77d16f45 575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 576
adaa70bb
TG
577 if (saddr) {
578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 }
581
c71099ac
TG
582 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
583 if (dst->error == 0)
584 return (struct rt6_info *) dst;
585
586 dst_release(dst);
587
1da177e4
LT
588 return NULL;
589}
590
7159039a
YH
591EXPORT_SYMBOL(rt6_lookup);
592
c71099ac 593/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
596 be destroyed.
597 */
598
86872cb5 599static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
600{
601 int err;
c71099ac 602 struct fib6_table *table;
1da177e4 603
c71099ac
TG
604 table = rt->rt6i_table;
605 write_lock_bh(&table->tb6_lock);
86872cb5 606 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 607 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
608
609 return err;
610}
611
40e22e8f
TG
612int ip6_ins_rt(struct rt6_info *rt)
613{
86872cb5 614 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
615}
616
95a9a5ba
YH
617static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618 struct in6_addr *saddr)
1da177e4 619{
1da177e4
LT
620 struct rt6_info *rt;
621
622 /*
623 * Clone the route.
624 */
625
626 rt = ip6_rt_copy(ort);
627
628 if (rt) {
58c4fb86
YH
629 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630 if (rt->rt6i_dst.plen != 128 &&
631 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 633 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 634 }
1da177e4 635
58c4fb86 636 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
637 rt->rt6i_dst.plen = 128;
638 rt->rt6i_flags |= RTF_CACHE;
639 rt->u.dst.flags |= DST_HOST;
640
641#ifdef CONFIG_IPV6_SUBTREES
642 if (rt->rt6i_src.plen && saddr) {
643 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644 rt->rt6i_src.plen = 128;
645 }
646#endif
647
648 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649
95a9a5ba 650 }
1da177e4 651
95a9a5ba
YH
652 return rt;
653}
1da177e4 654
299d9939
YH
655static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
656{
657 struct rt6_info *rt = ip6_rt_copy(ort);
658 if (rt) {
659 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660 rt->rt6i_dst.plen = 128;
661 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
662 rt->u.dst.flags |= DST_HOST;
663 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664 }
665 return rt;
666}
667
8ce11e6a
AB
668static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
669 struct flowi *fl, int flags)
1da177e4
LT
670{
671 struct fib6_node *fn;
519fbd87 672 struct rt6_info *rt, *nrt;
c71099ac 673 int strict = 0;
1da177e4 674 int attempts = 3;
519fbd87 675 int err;
ea659e07 676 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 677
77d16f45 678 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
679
680relookup:
c71099ac 681 read_lock_bh(&table->tb6_lock);
1da177e4 682
8238dd06 683restart_2:
c71099ac 684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
685
686restart:
f11e6659 687 rt = rt6_select(fn, fl->iif, strict | reachable);
982f56f3 688 BACKTRACK(&fl->fl6_src);
8238dd06
YH
689 if (rt == &ip6_null_entry ||
690 rt->rt6i_flags & RTF_CACHE)
1ddef044 691 goto out;
1da177e4 692
fb9de91e 693 dst_hold(&rt->u.dst);
c71099ac 694 read_unlock_bh(&table->tb6_lock);
fb9de91e 695
519fbd87 696 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 697 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
698 else {
699#if CLONE_OFFLINK_ROUTE
c71099ac 700 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
701#else
702 goto out2;
703#endif
704 }
e40cf353 705
519fbd87
YH
706 dst_release(&rt->u.dst);
707 rt = nrt ? : &ip6_null_entry;
1da177e4 708
519fbd87
YH
709 dst_hold(&rt->u.dst);
710 if (nrt) {
40e22e8f 711 err = ip6_ins_rt(nrt);
519fbd87 712 if (!err)
1da177e4 713 goto out2;
1da177e4 714 }
1da177e4 715
519fbd87
YH
716 if (--attempts <= 0)
717 goto out2;
718
719 /*
c71099ac 720 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
721 * released someone could insert this route. Relookup.
722 */
723 dst_release(&rt->u.dst);
724 goto relookup;
725
726out:
8238dd06
YH
727 if (reachable) {
728 reachable = 0;
729 goto restart_2;
730 }
519fbd87 731 dst_hold(&rt->u.dst);
c71099ac 732 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
733out2:
734 rt->u.dst.lastuse = jiffies;
735 rt->u.dst.__use++;
c71099ac
TG
736
737 return rt;
1da177e4
LT
738}
739
c71099ac
TG
740void ip6_route_input(struct sk_buff *skb)
741{
0660e03f 742 struct ipv6hdr *iph = ipv6_hdr(skb);
adaa70bb 743 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
744 struct flowi fl = {
745 .iif = skb->dev->ifindex,
746 .nl_u = {
747 .ip6_u = {
748 .daddr = iph->daddr,
749 .saddr = iph->saddr,
90bcaf7b 750 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
751 },
752 },
1ab1457c 753 .mark = skb->mark,
c71099ac
TG
754 .proto = iph->nexthdr,
755 };
adaa70bb
TG
756
757 if (rt6_need_strict(&iph->daddr))
758 flags |= RT6_LOOKUP_F_IFACE;
c71099ac
TG
759
760 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
761}
762
763static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
764 struct flowi *fl, int flags)
1da177e4
LT
765{
766 struct fib6_node *fn;
519fbd87 767 struct rt6_info *rt, *nrt;
c71099ac 768 int strict = 0;
1da177e4 769 int attempts = 3;
519fbd87 770 int err;
ea659e07 771 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 772
77d16f45 773 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
774
775relookup:
c71099ac 776 read_lock_bh(&table->tb6_lock);
1da177e4 777
8238dd06 778restart_2:
c71099ac 779 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
780
781restart:
f11e6659 782 rt = rt6_select(fn, fl->oif, strict | reachable);
982f56f3 783 BACKTRACK(&fl->fl6_src);
8238dd06
YH
784 if (rt == &ip6_null_entry ||
785 rt->rt6i_flags & RTF_CACHE)
1da177e4 786 goto out;
1da177e4 787
fb9de91e 788 dst_hold(&rt->u.dst);
c71099ac 789 read_unlock_bh(&table->tb6_lock);
fb9de91e 790
519fbd87 791 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 792 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
793 else {
794#if CLONE_OFFLINK_ROUTE
795 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
796#else
797 goto out2;
798#endif
799 }
1da177e4 800
519fbd87
YH
801 dst_release(&rt->u.dst);
802 rt = nrt ? : &ip6_null_entry;
1da177e4 803
519fbd87
YH
804 dst_hold(&rt->u.dst);
805 if (nrt) {
40e22e8f 806 err = ip6_ins_rt(nrt);
519fbd87 807 if (!err)
1da177e4 808 goto out2;
1da177e4 809 }
e40cf353 810
519fbd87
YH
811 if (--attempts <= 0)
812 goto out2;
813
814 /*
c71099ac 815 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
816 * released someone could insert this route. Relookup.
817 */
818 dst_release(&rt->u.dst);
819 goto relookup;
820
821out:
8238dd06
YH
822 if (reachable) {
823 reachable = 0;
824 goto restart_2;
825 }
519fbd87 826 dst_hold(&rt->u.dst);
c71099ac 827 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
828out2:
829 rt->u.dst.lastuse = jiffies;
830 rt->u.dst.__use++;
c71099ac
TG
831 return rt;
832}
833
834struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
835{
836 int flags = 0;
837
838 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 839 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 840
adaa70bb
TG
841 if (!ipv6_addr_any(&fl->fl6_src))
842 flags |= RT6_LOOKUP_F_HAS_SADDR;
843
c71099ac 844 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
845}
846
7159039a 847EXPORT_SYMBOL(ip6_route_output);
1da177e4 848
14e50e57
DM
849static int ip6_blackhole_output(struct sk_buff *skb)
850{
851 kfree_skb(skb);
852 return 0;
853}
854
855int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
856{
857 struct rt6_info *ort = (struct rt6_info *) *dstp;
858 struct rt6_info *rt = (struct rt6_info *)
859 dst_alloc(&ip6_dst_blackhole_ops);
860 struct dst_entry *new = NULL;
861
862 if (rt) {
863 new = &rt->u.dst;
864
865 atomic_set(&new->__refcnt, 1);
866 new->__use = 1;
867 new->input = ip6_blackhole_output;
868 new->output = ip6_blackhole_output;
869
870 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
871 new->dev = ort->u.dst.dev;
872 if (new->dev)
873 dev_hold(new->dev);
874 rt->rt6i_idev = ort->rt6i_idev;
875 if (rt->rt6i_idev)
876 in6_dev_hold(rt->rt6i_idev);
877 rt->rt6i_expires = 0;
878
879 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
880 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
881 rt->rt6i_metric = 0;
882
883 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
884#ifdef CONFIG_IPV6_SUBTREES
885 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
886#endif
887
888 dst_free(new);
889 }
890
891 dst_release(*dstp);
892 *dstp = new;
893 return (new ? 0 : -ENOMEM);
894}
895EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
896
1da177e4
LT
897/*
898 * Destination cache support functions
899 */
900
901static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
902{
903 struct rt6_info *rt;
904
905 rt = (struct rt6_info *) dst;
906
907 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
908 return dst;
909
910 return NULL;
911}
912
913static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
914{
915 struct rt6_info *rt = (struct rt6_info *) dst;
916
917 if (rt) {
918 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 919 ip6_del_rt(rt);
1da177e4
LT
920 else
921 dst_release(dst);
922 }
923 return NULL;
924}
925
926static void ip6_link_failure(struct sk_buff *skb)
927{
928 struct rt6_info *rt;
929
930 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
931
932 rt = (struct rt6_info *) skb->dst;
933 if (rt) {
934 if (rt->rt6i_flags&RTF_CACHE) {
935 dst_set_expires(&rt->u.dst, 0);
936 rt->rt6i_flags |= RTF_EXPIRES;
937 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
938 rt->rt6i_node->fn_sernum = -1;
939 }
940}
941
942static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
943{
944 struct rt6_info *rt6 = (struct rt6_info*)dst;
945
946 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
947 rt6->rt6i_flags |= RTF_MODIFIED;
948 if (mtu < IPV6_MIN_MTU) {
949 mtu = IPV6_MIN_MTU;
950 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
951 }
952 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 953 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
954 }
955}
956
1da177e4
LT
957static int ipv6_get_mtu(struct net_device *dev);
958
959static inline unsigned int ipv6_advmss(unsigned int mtu)
960{
961 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
962
963 if (mtu < ip6_rt_min_advmss)
964 mtu = ip6_rt_min_advmss;
965
966 /*
1ab1457c
YH
967 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
968 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
969 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
970 * rely only on pmtu discovery"
971 */
972 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
973 mtu = IPV6_MAXPLEN;
974 return mtu;
975}
976
5d0bbeeb 977static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 978static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 979
1ab1457c 980struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
1da177e4
LT
981 struct neighbour *neigh,
982 struct in6_addr *addr,
983 int (*output)(struct sk_buff *))
984{
985 struct rt6_info *rt;
986 struct inet6_dev *idev = in6_dev_get(dev);
987
988 if (unlikely(idev == NULL))
989 return NULL;
990
991 rt = ip6_dst_alloc();
992 if (unlikely(rt == NULL)) {
993 in6_dev_put(idev);
994 goto out;
995 }
996
997 dev_hold(dev);
998 if (neigh)
999 neigh_hold(neigh);
1000 else
1001 neigh = ndisc_get_neigh(dev, addr);
1002
1003 rt->rt6i_dev = dev;
1004 rt->rt6i_idev = idev;
1005 rt->rt6i_nexthop = neigh;
1006 atomic_set(&rt->u.dst.__refcnt, 1);
1007 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1008 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1009 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1010 rt->u.dst.output = output;
1011
1012#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
1013 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1014 ? DST_HOST
1da177e4
LT
1015 : 0;
1016 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1017 rt->rt6i_dst.plen = 128;
1018#endif
1019
5d0bbeeb 1020 spin_lock_bh(&ndisc_lock);
1da177e4
LT
1021 rt->u.dst.next = ndisc_dst_gc_list;
1022 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 1023 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
1024
1025 fib6_force_start_gc();
1026
1027out:
40aa7b90 1028 return &rt->u.dst;
1da177e4
LT
1029}
1030
1031int ndisc_dst_gc(int *more)
1032{
1033 struct dst_entry *dst, *next, **pprev;
1034 int freed;
1035
1036 next = NULL;
1ab1457c 1037 freed = 0;
5d0bbeeb
TG
1038
1039 spin_lock_bh(&ndisc_lock);
1da177e4 1040 pprev = &ndisc_dst_gc_list;
5d0bbeeb 1041
1da177e4
LT
1042 while ((dst = *pprev) != NULL) {
1043 if (!atomic_read(&dst->__refcnt)) {
1044 *pprev = dst->next;
1045 dst_free(dst);
1046 freed++;
1047 } else {
1048 pprev = &dst->next;
1049 (*more)++;
1050 }
1051 }
1052
5d0bbeeb
TG
1053 spin_unlock_bh(&ndisc_lock);
1054
1da177e4
LT
1055 return freed;
1056}
1057
1058static int ip6_dst_gc(void)
1059{
1060 static unsigned expire = 30*HZ;
1061 static unsigned long last_gc;
1062 unsigned long now = jiffies;
1063
1064 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1065 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1066 goto out;
1067
1068 expire++;
1069 fib6_run_gc(expire);
1070 last_gc = now;
1071 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1072 expire = ip6_rt_gc_timeout>>1;
1073
1074out:
1075 expire -= expire>>ip6_rt_gc_elasticity;
1076 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1077}
1078
1079/* Clean host part of a prefix. Not necessary in radix tree,
1080 but results in cleaner routing tables.
1081
1082 Remove it only when all the things will work!
1083 */
1084
1085static int ipv6_get_mtu(struct net_device *dev)
1086{
1087 int mtu = IPV6_MIN_MTU;
1088 struct inet6_dev *idev;
1089
1090 idev = in6_dev_get(dev);
1091 if (idev) {
1092 mtu = idev->cnf.mtu6;
1093 in6_dev_put(idev);
1094 }
1095 return mtu;
1096}
1097
1098int ipv6_get_hoplimit(struct net_device *dev)
1099{
1100 int hoplimit = ipv6_devconf.hop_limit;
1101 struct inet6_dev *idev;
1102
1103 idev = in6_dev_get(dev);
1104 if (idev) {
1105 hoplimit = idev->cnf.hop_limit;
1106 in6_dev_put(idev);
1107 }
1108 return hoplimit;
1109}
1110
1111/*
1112 *
1113 */
1114
86872cb5 1115int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1116{
1117 int err;
1da177e4
LT
1118 struct rt6_info *rt = NULL;
1119 struct net_device *dev = NULL;
1120 struct inet6_dev *idev = NULL;
c71099ac 1121 struct fib6_table *table;
1da177e4
LT
1122 int addr_type;
1123
86872cb5 1124 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1125 return -EINVAL;
1126#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1127 if (cfg->fc_src_len)
1da177e4
LT
1128 return -EINVAL;
1129#endif
86872cb5 1130 if (cfg->fc_ifindex) {
1da177e4 1131 err = -ENODEV;
86872cb5 1132 dev = dev_get_by_index(cfg->fc_ifindex);
1da177e4
LT
1133 if (!dev)
1134 goto out;
1135 idev = in6_dev_get(dev);
1136 if (!idev)
1137 goto out;
1138 }
1139
86872cb5
TG
1140 if (cfg->fc_metric == 0)
1141 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1142
86872cb5 1143 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1144 if (table == NULL) {
1145 err = -ENOBUFS;
1146 goto out;
1147 }
1148
1da177e4
LT
1149 rt = ip6_dst_alloc();
1150
1151 if (rt == NULL) {
1152 err = -ENOMEM;
1153 goto out;
1154 }
1155
1156 rt->u.dst.obsolete = -1;
86872cb5 1157 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1158
86872cb5
TG
1159 if (cfg->fc_protocol == RTPROT_UNSPEC)
1160 cfg->fc_protocol = RTPROT_BOOT;
1161 rt->rt6i_protocol = cfg->fc_protocol;
1162
1163 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1164
1165 if (addr_type & IPV6_ADDR_MULTICAST)
1166 rt->u.dst.input = ip6_mc_input;
1167 else
1168 rt->u.dst.input = ip6_forward;
1169
1170 rt->u.dst.output = ip6_output;
1171
86872cb5
TG
1172 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1173 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1174 if (rt->rt6i_dst.plen == 128)
1175 rt->u.dst.flags = DST_HOST;
1176
1177#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1178 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1179 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1180#endif
1181
86872cb5 1182 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1183
1184 /* We cannot add true routes via loopback here,
1185 they would result in kernel looping; promote them to reject routes
1186 */
86872cb5 1187 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1188 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1189 /* hold loopback dev/idev if we haven't done so. */
1190 if (dev != &loopback_dev) {
1191 if (dev) {
1192 dev_put(dev);
1193 in6_dev_put(idev);
1194 }
1195 dev = &loopback_dev;
1196 dev_hold(dev);
1197 idev = in6_dev_get(dev);
1198 if (!idev) {
1199 err = -ENODEV;
1200 goto out;
1201 }
1202 }
1203 rt->u.dst.output = ip6_pkt_discard_out;
1204 rt->u.dst.input = ip6_pkt_discard;
1205 rt->u.dst.error = -ENETUNREACH;
1206 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1207 goto install_route;
1208 }
1209
86872cb5 1210 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1211 struct in6_addr *gw_addr;
1212 int gwa_type;
1213
86872cb5
TG
1214 gw_addr = &cfg->fc_gateway;
1215 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1216 gwa_type = ipv6_addr_type(gw_addr);
1217
1218 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1219 struct rt6_info *grt;
1220
1221 /* IPv6 strictly inhibits using not link-local
1222 addresses as nexthop address.
1223 Otherwise, router will not able to send redirects.
1224 It is very good, but in some (rare!) circumstances
1225 (SIT, PtP, NBMA NOARP links) it is handy to allow
1226 some exceptions. --ANK
1227 */
1228 err = -EINVAL;
1229 if (!(gwa_type&IPV6_ADDR_UNICAST))
1230 goto out;
1231
86872cb5 1232 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1233
1234 err = -EHOSTUNREACH;
1235 if (grt == NULL)
1236 goto out;
1237 if (dev) {
1238 if (dev != grt->rt6i_dev) {
1239 dst_release(&grt->u.dst);
1240 goto out;
1241 }
1242 } else {
1243 dev = grt->rt6i_dev;
1244 idev = grt->rt6i_idev;
1245 dev_hold(dev);
1246 in6_dev_hold(grt->rt6i_idev);
1247 }
1248 if (!(grt->rt6i_flags&RTF_GATEWAY))
1249 err = 0;
1250 dst_release(&grt->u.dst);
1251
1252 if (err)
1253 goto out;
1254 }
1255 err = -EINVAL;
1256 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1257 goto out;
1258 }
1259
1260 err = -ENODEV;
1261 if (dev == NULL)
1262 goto out;
1263
86872cb5 1264 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1265 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1266 if (IS_ERR(rt->rt6i_nexthop)) {
1267 err = PTR_ERR(rt->rt6i_nexthop);
1268 rt->rt6i_nexthop = NULL;
1269 goto out;
1270 }
1271 }
1272
86872cb5 1273 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1274
1275install_route:
86872cb5
TG
1276 if (cfg->fc_mx) {
1277 struct nlattr *nla;
1278 int remaining;
1279
1280 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1281 int type = nla->nla_type;
1282
1283 if (type) {
1284 if (type > RTAX_MAX) {
1da177e4
LT
1285 err = -EINVAL;
1286 goto out;
1287 }
86872cb5
TG
1288
1289 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1290 }
1da177e4
LT
1291 }
1292 }
1293
1294 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1295 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1296 if (!rt->u.dst.metrics[RTAX_MTU-1])
1297 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1298 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1299 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1300 rt->u.dst.dev = dev;
1301 rt->rt6i_idev = idev;
c71099ac 1302 rt->rt6i_table = table;
86872cb5 1303 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1304
1305out:
1306 if (dev)
1307 dev_put(dev);
1308 if (idev)
1309 in6_dev_put(idev);
1310 if (rt)
40aa7b90 1311 dst_free(&rt->u.dst);
1da177e4
LT
1312 return err;
1313}
1314
86872cb5 1315static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1316{
1317 int err;
c71099ac 1318 struct fib6_table *table;
1da177e4 1319
6c813a72
PM
1320 if (rt == &ip6_null_entry)
1321 return -ENOENT;
1322
c71099ac
TG
1323 table = rt->rt6i_table;
1324 write_lock_bh(&table->tb6_lock);
1da177e4 1325
86872cb5 1326 err = fib6_del(rt, info);
1da177e4
LT
1327 dst_release(&rt->u.dst);
1328
c71099ac 1329 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1330
1331 return err;
1332}
1333
e0a1ad73
TG
1334int ip6_del_rt(struct rt6_info *rt)
1335{
86872cb5 1336 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1337}
1338
86872cb5 1339static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1340{
c71099ac 1341 struct fib6_table *table;
1da177e4
LT
1342 struct fib6_node *fn;
1343 struct rt6_info *rt;
1344 int err = -ESRCH;
1345
86872cb5 1346 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1347 if (table == NULL)
1348 return err;
1349
1350 read_lock_bh(&table->tb6_lock);
1da177e4 1351
c71099ac 1352 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1353 &cfg->fc_dst, cfg->fc_dst_len,
1354 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1355
1da177e4 1356 if (fn) {
7cc48263 1357 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1358 if (cfg->fc_ifindex &&
1da177e4 1359 (rt->rt6i_dev == NULL ||
86872cb5 1360 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1361 continue;
86872cb5
TG
1362 if (cfg->fc_flags & RTF_GATEWAY &&
1363 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1364 continue;
86872cb5 1365 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1366 continue;
1367 dst_hold(&rt->u.dst);
c71099ac 1368 read_unlock_bh(&table->tb6_lock);
1da177e4 1369
86872cb5 1370 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1371 }
1372 }
c71099ac 1373 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1374
1375 return err;
1376}
1377
1378/*
1379 * Handle redirects
1380 */
a6279458
YH
1381struct ip6rd_flowi {
1382 struct flowi fl;
1383 struct in6_addr gateway;
1384};
1385
1386static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1387 struct flowi *fl,
1388 int flags)
1da177e4 1389{
a6279458
YH
1390 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1391 struct rt6_info *rt;
e843b9e1 1392 struct fib6_node *fn;
c71099ac 1393
1da177e4 1394 /*
e843b9e1
YH
1395 * Get the "current" route for this destination and
1396 * check if the redirect has come from approriate router.
1397 *
1398 * RFC 2461 specifies that redirects should only be
1399 * accepted if they come from the nexthop to the target.
1400 * Due to the way the routes are chosen, this notion
1401 * is a bit fuzzy and one might need to check all possible
1402 * routes.
1da177e4 1403 */
1da177e4 1404
c71099ac 1405 read_lock_bh(&table->tb6_lock);
a6279458 1406 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1407restart:
7cc48263 1408 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1409 /*
1410 * Current route is on-link; redirect is always invalid.
1411 *
1412 * Seems, previous statement is not true. It could
1413 * be node, which looks for us as on-link (f.e. proxy ndisc)
1414 * But then router serving it might decide, that we should
1415 * know truth 8)8) --ANK (980726).
1416 */
1417 if (rt6_check_expired(rt))
1418 continue;
1419 if (!(rt->rt6i_flags & RTF_GATEWAY))
1420 continue;
a6279458 1421 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1422 continue;
a6279458 1423 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1424 continue;
1425 break;
1426 }
a6279458 1427
cb15d9c2 1428 if (!rt)
a6279458 1429 rt = &ip6_null_entry;
cb15d9c2
YH
1430 BACKTRACK(&fl->fl6_src);
1431out:
a6279458
YH
1432 dst_hold(&rt->u.dst);
1433
c71099ac 1434 read_unlock_bh(&table->tb6_lock);
e843b9e1 1435
a6279458
YH
1436 return rt;
1437};
1438
1439static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1440 struct in6_addr *src,
1441 struct in6_addr *gateway,
1442 struct net_device *dev)
1443{
adaa70bb 1444 int flags = RT6_LOOKUP_F_HAS_SADDR;
a6279458
YH
1445 struct ip6rd_flowi rdfl = {
1446 .fl = {
1447 .oif = dev->ifindex,
1448 .nl_u = {
1449 .ip6_u = {
1450 .daddr = *dest,
1451 .saddr = *src,
1452 },
1453 },
1454 },
1455 .gateway = *gateway,
1456 };
adaa70bb
TG
1457
1458 if (rt6_need_strict(dest))
1459 flags |= RT6_LOOKUP_F_IFACE;
a6279458
YH
1460
1461 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1462}
1463
1464void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1465 struct in6_addr *saddr,
1466 struct neighbour *neigh, u8 *lladdr, int on_link)
1467{
1468 struct rt6_info *rt, *nrt = NULL;
1469 struct netevent_redirect netevent;
1470
1471 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1472
1473 if (rt == &ip6_null_entry) {
1da177e4
LT
1474 if (net_ratelimit())
1475 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1476 "for redirect target\n");
a6279458 1477 goto out;
1da177e4
LT
1478 }
1479
1da177e4
LT
1480 /*
1481 * We have finally decided to accept it.
1482 */
1483
1ab1457c 1484 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1485 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1486 NEIGH_UPDATE_F_OVERRIDE|
1487 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1488 NEIGH_UPDATE_F_ISROUTER))
1489 );
1490
1491 /*
1492 * Redirect received -> path was valid.
1493 * Look, redirects are sent only in response to data packets,
1494 * so that this nexthop apparently is reachable. --ANK
1495 */
1496 dst_confirm(&rt->u.dst);
1497
1498 /* Duplicate redirect: silently ignore. */
1499 if (neigh == rt->u.dst.neighbour)
1500 goto out;
1501
1502 nrt = ip6_rt_copy(rt);
1503 if (nrt == NULL)
1504 goto out;
1505
1506 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1507 if (on_link)
1508 nrt->rt6i_flags &= ~RTF_GATEWAY;
1509
1510 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1511 nrt->rt6i_dst.plen = 128;
1512 nrt->u.dst.flags |= DST_HOST;
1513
1514 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1515 nrt->rt6i_nexthop = neigh_clone(neigh);
1516 /* Reset pmtu, it may be better */
1517 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1518 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1519
40e22e8f 1520 if (ip6_ins_rt(nrt))
1da177e4
LT
1521 goto out;
1522
8d71740c
TT
1523 netevent.old = &rt->u.dst;
1524 netevent.new = &nrt->u.dst;
1525 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1526
1da177e4 1527 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1528 ip6_del_rt(rt);
1da177e4
LT
1529 return;
1530 }
1531
1532out:
1ab1457c 1533 dst_release(&rt->u.dst);
1da177e4
LT
1534 return;
1535}
1536
1537/*
1538 * Handle ICMP "packet too big" messages
1539 * i.e. Path MTU discovery
1540 */
1541
1542void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1543 struct net_device *dev, u32 pmtu)
1544{
1545 struct rt6_info *rt, *nrt;
1546 int allfrag = 0;
1547
1548 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1549 if (rt == NULL)
1550 return;
1551
1552 if (pmtu >= dst_mtu(&rt->u.dst))
1553 goto out;
1554
1555 if (pmtu < IPV6_MIN_MTU) {
1556 /*
1ab1457c 1557 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1558 * MTU (1280) and a fragment header should always be included
1559 * after a node receiving Too Big message reporting PMTU is
1560 * less than the IPv6 Minimum Link MTU.
1561 */
1562 pmtu = IPV6_MIN_MTU;
1563 allfrag = 1;
1564 }
1565
1566 /* New mtu received -> path was valid.
1567 They are sent only in response to data packets,
1568 so that this nexthop apparently is reachable. --ANK
1569 */
1570 dst_confirm(&rt->u.dst);
1571
1572 /* Host route. If it is static, it would be better
1573 not to override it, but add new one, so that
1574 when cache entry will expire old pmtu
1575 would return automatically.
1576 */
1577 if (rt->rt6i_flags & RTF_CACHE) {
1578 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1579 if (allfrag)
1580 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1581 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1582 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1583 goto out;
1584 }
1585
1586 /* Network route.
1587 Two cases are possible:
1588 1. It is connected route. Action: COW
1589 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1590 */
d5315b50 1591 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1592 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1593 else
1594 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1595
d5315b50 1596 if (nrt) {
a1e78363
YH
1597 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1598 if (allfrag)
1599 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1600
1601 /* According to RFC 1981, detecting PMTU increase shouldn't be
1602 * happened within 5 mins, the recommended timer is 10 mins.
1603 * Here this route expiration time is set to ip6_rt_mtu_expires
1604 * which is 10 mins. After 10 mins the decreased pmtu is expired
1605 * and detecting PMTU increase will be automatically happened.
1606 */
1607 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1608 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1609
40e22e8f 1610 ip6_ins_rt(nrt);
1da177e4 1611 }
1da177e4
LT
1612out:
1613 dst_release(&rt->u.dst);
1614}
1615
1616/*
1617 * Misc support functions
1618 */
1619
1620static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1621{
1622 struct rt6_info *rt = ip6_dst_alloc();
1623
1624 if (rt) {
1625 rt->u.dst.input = ort->u.dst.input;
1626 rt->u.dst.output = ort->u.dst.output;
1627
1628 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1629 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1630 rt->u.dst.dev = ort->u.dst.dev;
1631 if (rt->u.dst.dev)
1632 dev_hold(rt->u.dst.dev);
1633 rt->rt6i_idev = ort->rt6i_idev;
1634 if (rt->rt6i_idev)
1635 in6_dev_hold(rt->rt6i_idev);
1636 rt->u.dst.lastuse = jiffies;
1637 rt->rt6i_expires = 0;
1638
1639 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1640 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1641 rt->rt6i_metric = 0;
1642
1643 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1644#ifdef CONFIG_IPV6_SUBTREES
1645 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1646#endif
c71099ac 1647 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1648 }
1649 return rt;
1650}
1651
70ceb4f5
YH
1652#ifdef CONFIG_IPV6_ROUTE_INFO
1653static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1654 struct in6_addr *gwaddr, int ifindex)
1655{
1656 struct fib6_node *fn;
1657 struct rt6_info *rt = NULL;
c71099ac
TG
1658 struct fib6_table *table;
1659
1660 table = fib6_get_table(RT6_TABLE_INFO);
1661 if (table == NULL)
1662 return NULL;
70ceb4f5 1663
c71099ac
TG
1664 write_lock_bh(&table->tb6_lock);
1665 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1666 if (!fn)
1667 goto out;
1668
7cc48263 1669 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1670 if (rt->rt6i_dev->ifindex != ifindex)
1671 continue;
1672 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1673 continue;
1674 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1675 continue;
1676 dst_hold(&rt->u.dst);
1677 break;
1678 }
1679out:
c71099ac 1680 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1681 return rt;
1682}
1683
1684static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1685 struct in6_addr *gwaddr, int ifindex,
1686 unsigned pref)
1687{
86872cb5
TG
1688 struct fib6_config cfg = {
1689 .fc_table = RT6_TABLE_INFO,
1690 .fc_metric = 1024,
1691 .fc_ifindex = ifindex,
1692 .fc_dst_len = prefixlen,
1693 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1694 RTF_UP | RTF_PREF(pref),
1695 };
1696
1697 ipv6_addr_copy(&cfg.fc_dst, prefix);
1698 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1699
e317da96
YH
1700 /* We should treat it as a default route if prefix length is 0. */
1701 if (!prefixlen)
86872cb5 1702 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1703
86872cb5 1704 ip6_route_add(&cfg);
70ceb4f5
YH
1705
1706 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1707}
1708#endif
1709
1da177e4 1710struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1711{
1da177e4 1712 struct rt6_info *rt;
c71099ac 1713 struct fib6_table *table;
1da177e4 1714
c71099ac
TG
1715 table = fib6_get_table(RT6_TABLE_DFLT);
1716 if (table == NULL)
1717 return NULL;
1da177e4 1718
c71099ac 1719 write_lock_bh(&table->tb6_lock);
7cc48263 1720 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1721 if (dev == rt->rt6i_dev &&
045927ff 1722 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1723 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1724 break;
1725 }
1726 if (rt)
1727 dst_hold(&rt->u.dst);
c71099ac 1728 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1729 return rt;
1730}
1731
1732struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1733 struct net_device *dev,
1734 unsigned int pref)
1da177e4 1735{
86872cb5
TG
1736 struct fib6_config cfg = {
1737 .fc_table = RT6_TABLE_DFLT,
1738 .fc_metric = 1024,
1739 .fc_ifindex = dev->ifindex,
1740 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1741 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1742 };
1da177e4 1743
86872cb5 1744 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1745
86872cb5 1746 ip6_route_add(&cfg);
1da177e4 1747
1da177e4
LT
1748 return rt6_get_dflt_router(gwaddr, dev);
1749}
1750
1751void rt6_purge_dflt_routers(void)
1752{
1753 struct rt6_info *rt;
c71099ac
TG
1754 struct fib6_table *table;
1755
1756 /* NOTE: Keep consistent with rt6_get_dflt_router */
1757 table = fib6_get_table(RT6_TABLE_DFLT);
1758 if (table == NULL)
1759 return;
1da177e4
LT
1760
1761restart:
c71099ac 1762 read_lock_bh(&table->tb6_lock);
7cc48263 1763 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1764 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1765 dst_hold(&rt->u.dst);
c71099ac 1766 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1767 ip6_del_rt(rt);
1da177e4
LT
1768 goto restart;
1769 }
1770 }
c71099ac 1771 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1772}
1773
86872cb5
TG
1774static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1775 struct fib6_config *cfg)
1776{
1777 memset(cfg, 0, sizeof(*cfg));
1778
1779 cfg->fc_table = RT6_TABLE_MAIN;
1780 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1781 cfg->fc_metric = rtmsg->rtmsg_metric;
1782 cfg->fc_expires = rtmsg->rtmsg_info;
1783 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1784 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1785 cfg->fc_flags = rtmsg->rtmsg_flags;
1786
1787 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1788 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1789 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1790}
1791
1da177e4
LT
1792int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1793{
86872cb5 1794 struct fib6_config cfg;
1da177e4
LT
1795 struct in6_rtmsg rtmsg;
1796 int err;
1797
1798 switch(cmd) {
1799 case SIOCADDRT: /* Add a route */
1800 case SIOCDELRT: /* Delete a route */
1801 if (!capable(CAP_NET_ADMIN))
1802 return -EPERM;
1803 err = copy_from_user(&rtmsg, arg,
1804 sizeof(struct in6_rtmsg));
1805 if (err)
1806 return -EFAULT;
86872cb5
TG
1807
1808 rtmsg_to_fib6_config(&rtmsg, &cfg);
1809
1da177e4
LT
1810 rtnl_lock();
1811 switch (cmd) {
1812 case SIOCADDRT:
86872cb5 1813 err = ip6_route_add(&cfg);
1da177e4
LT
1814 break;
1815 case SIOCDELRT:
86872cb5 1816 err = ip6_route_del(&cfg);
1da177e4
LT
1817 break;
1818 default:
1819 err = -EINVAL;
1820 }
1821 rtnl_unlock();
1822
1823 return err;
3ff50b79 1824 }
1da177e4
LT
1825
1826 return -EINVAL;
1827}
1828
1829/*
1830 * Drop the packet on the floor
1831 */
1832
612f09e8
YH
1833static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1834 int ipstats_mib_noroutes)
1da177e4 1835{
612f09e8
YH
1836 int type;
1837 switch (ipstats_mib_noroutes) {
1838 case IPSTATS_MIB_INNOROUTES:
0660e03f 1839 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1840 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1841 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1842 break;
1843 }
1844 /* FALLTHROUGH */
1845 case IPSTATS_MIB_OUTNOROUTES:
1846 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1847 break;
1848 }
9ce8ade0 1849 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1850 kfree_skb(skb);
1851 return 0;
1852}
1853
9ce8ade0
TG
1854static int ip6_pkt_discard(struct sk_buff *skb)
1855{
612f09e8 1856 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1857}
1858
20380731 1859static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1860{
1861 skb->dev = skb->dst->dev;
612f09e8 1862 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1863}
1864
6723ab54
DM
1865#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1866
9ce8ade0
TG
1867static int ip6_pkt_prohibit(struct sk_buff *skb)
1868{
612f09e8 1869 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1870}
1871
1872static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1873{
1874 skb->dev = skb->dst->dev;
612f09e8 1875 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1876}
1877
1878static int ip6_pkt_blk_hole(struct sk_buff *skb)
1879{
1880 kfree_skb(skb);
1881 return 0;
1882}
1883
6723ab54
DM
1884#endif
1885
1da177e4
LT
1886/*
1887 * Allocate a dst for local (unicast / anycast) address.
1888 */
1889
1890struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1891 const struct in6_addr *addr,
1892 int anycast)
1893{
1894 struct rt6_info *rt = ip6_dst_alloc();
1895
1896 if (rt == NULL)
1897 return ERR_PTR(-ENOMEM);
1898
1899 dev_hold(&loopback_dev);
1900 in6_dev_hold(idev);
1901
1902 rt->u.dst.flags = DST_HOST;
1903 rt->u.dst.input = ip6_input;
1904 rt->u.dst.output = ip6_output;
1905 rt->rt6i_dev = &loopback_dev;
1906 rt->rt6i_idev = idev;
1907 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1908 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1909 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1910 rt->u.dst.obsolete = -1;
1911
1912 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1913 if (anycast)
1914 rt->rt6i_flags |= RTF_ANYCAST;
1915 else
1da177e4
LT
1916 rt->rt6i_flags |= RTF_LOCAL;
1917 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1918 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1919 dst_free(&rt->u.dst);
1da177e4
LT
1920 return ERR_PTR(-ENOMEM);
1921 }
1922
1923 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1924 rt->rt6i_dst.plen = 128;
c71099ac 1925 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1926
1927 atomic_set(&rt->u.dst.__refcnt, 1);
1928
1929 return rt;
1930}
1931
1932static int fib6_ifdown(struct rt6_info *rt, void *arg)
1933{
1934 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1935 rt != &ip6_null_entry) {
1936 RT6_TRACE("deleted by ifdown %p\n", rt);
1937 return -1;
1938 }
1939 return 0;
1940}
1941
1942void rt6_ifdown(struct net_device *dev)
1943{
c71099ac 1944 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1945}
1946
1947struct rt6_mtu_change_arg
1948{
1949 struct net_device *dev;
1950 unsigned mtu;
1951};
1952
1953static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1954{
1955 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1956 struct inet6_dev *idev;
1957
1958 /* In IPv6 pmtu discovery is not optional,
1959 so that RTAX_MTU lock cannot disable it.
1960 We still use this lock to block changes
1961 caused by addrconf/ndisc.
1962 */
1963
1964 idev = __in6_dev_get(arg->dev);
1965 if (idev == NULL)
1966 return 0;
1967
1968 /* For administrative MTU increase, there is no way to discover
1969 IPv6 PMTU increase, so PMTU increase should be updated here.
1970 Since RFC 1981 doesn't include administrative MTU increase
1971 update PMTU increase is a MUST. (i.e. jumbo frame)
1972 */
1973 /*
1974 If new MTU is less than route PMTU, this new MTU will be the
1975 lowest MTU in the path, update the route PMTU to reflect PMTU
1976 decreases; if new MTU is greater than route PMTU, and the
1977 old MTU is the lowest MTU in the path, update the route PMTU
1978 to reflect the increase. In this case if the other nodes' MTU
1979 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1980 PMTU discouvery.
1981 */
1982 if (rt->rt6i_dev == arg->dev &&
1983 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1ab1457c
YH
1984 (dst_mtu(&rt->u.dst) > arg->mtu ||
1985 (dst_mtu(&rt->u.dst) < arg->mtu &&
1da177e4
LT
1986 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1987 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1988 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1989 return 0;
1990}
1991
1992void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1993{
c71099ac
TG
1994 struct rt6_mtu_change_arg arg = {
1995 .dev = dev,
1996 .mtu = mtu,
1997 };
1da177e4 1998
c71099ac 1999 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2000}
2001
ef7c79ed 2002static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2003 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2004 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2005 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2006 [RTA_PRIORITY] = { .type = NLA_U32 },
2007 [RTA_METRICS] = { .type = NLA_NESTED },
2008};
2009
2010static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2011 struct fib6_config *cfg)
1da177e4 2012{
86872cb5
TG
2013 struct rtmsg *rtm;
2014 struct nlattr *tb[RTA_MAX+1];
2015 int err;
1da177e4 2016
86872cb5
TG
2017 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2018 if (err < 0)
2019 goto errout;
1da177e4 2020
86872cb5
TG
2021 err = -EINVAL;
2022 rtm = nlmsg_data(nlh);
2023 memset(cfg, 0, sizeof(*cfg));
2024
2025 cfg->fc_table = rtm->rtm_table;
2026 cfg->fc_dst_len = rtm->rtm_dst_len;
2027 cfg->fc_src_len = rtm->rtm_src_len;
2028 cfg->fc_flags = RTF_UP;
2029 cfg->fc_protocol = rtm->rtm_protocol;
2030
2031 if (rtm->rtm_type == RTN_UNREACHABLE)
2032 cfg->fc_flags |= RTF_REJECT;
2033
2034 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2035 cfg->fc_nlinfo.nlh = nlh;
2036
2037 if (tb[RTA_GATEWAY]) {
2038 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2039 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2040 }
86872cb5
TG
2041
2042 if (tb[RTA_DST]) {
2043 int plen = (rtm->rtm_dst_len + 7) >> 3;
2044
2045 if (nla_len(tb[RTA_DST]) < plen)
2046 goto errout;
2047
2048 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2049 }
86872cb5
TG
2050
2051 if (tb[RTA_SRC]) {
2052 int plen = (rtm->rtm_src_len + 7) >> 3;
2053
2054 if (nla_len(tb[RTA_SRC]) < plen)
2055 goto errout;
2056
2057 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2058 }
86872cb5
TG
2059
2060 if (tb[RTA_OIF])
2061 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2062
2063 if (tb[RTA_PRIORITY])
2064 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2065
2066 if (tb[RTA_METRICS]) {
2067 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2068 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2069 }
86872cb5
TG
2070
2071 if (tb[RTA_TABLE])
2072 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2073
2074 err = 0;
2075errout:
2076 return err;
1da177e4
LT
2077}
2078
c127ea2c 2079static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2080{
86872cb5
TG
2081 struct fib6_config cfg;
2082 int err;
1da177e4 2083
86872cb5
TG
2084 err = rtm_to_fib6_config(skb, nlh, &cfg);
2085 if (err < 0)
2086 return err;
2087
2088 return ip6_route_del(&cfg);
1da177e4
LT
2089}
2090
c127ea2c 2091static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2092{
86872cb5
TG
2093 struct fib6_config cfg;
2094 int err;
1da177e4 2095
86872cb5
TG
2096 err = rtm_to_fib6_config(skb, nlh, &cfg);
2097 if (err < 0)
2098 return err;
2099
2100 return ip6_route_add(&cfg);
1da177e4
LT
2101}
2102
339bf98f
TG
2103static inline size_t rt6_nlmsg_size(void)
2104{
2105 return NLMSG_ALIGN(sizeof(struct rtmsg))
2106 + nla_total_size(16) /* RTA_SRC */
2107 + nla_total_size(16) /* RTA_DST */
2108 + nla_total_size(16) /* RTA_GATEWAY */
2109 + nla_total_size(16) /* RTA_PREFSRC */
2110 + nla_total_size(4) /* RTA_TABLE */
2111 + nla_total_size(4) /* RTA_IIF */
2112 + nla_total_size(4) /* RTA_OIF */
2113 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2114 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2115 + nla_total_size(sizeof(struct rta_cacheinfo));
2116}
2117
1da177e4 2118static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2119 struct in6_addr *dst, struct in6_addr *src,
2120 int iif, int type, u32 pid, u32 seq,
2121 int prefix, unsigned int flags)
1da177e4
LT
2122{
2123 struct rtmsg *rtm;
2d7202bf 2124 struct nlmsghdr *nlh;
e3703b3d 2125 long expires;
9e762a4a 2126 u32 table;
1da177e4
LT
2127
2128 if (prefix) { /* user wants prefix routes only */
2129 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2130 /* success since this is not a prefix route */
2131 return 1;
2132 }
2133 }
2134
2d7202bf
TG
2135 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2136 if (nlh == NULL)
26932566 2137 return -EMSGSIZE;
2d7202bf
TG
2138
2139 rtm = nlmsg_data(nlh);
1da177e4
LT
2140 rtm->rtm_family = AF_INET6;
2141 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2142 rtm->rtm_src_len = rt->rt6i_src.plen;
2143 rtm->rtm_tos = 0;
c71099ac 2144 if (rt->rt6i_table)
9e762a4a 2145 table = rt->rt6i_table->tb6_id;
c71099ac 2146 else
9e762a4a
PM
2147 table = RT6_TABLE_UNSPEC;
2148 rtm->rtm_table = table;
2d7202bf 2149 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2150 if (rt->rt6i_flags&RTF_REJECT)
2151 rtm->rtm_type = RTN_UNREACHABLE;
2152 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2153 rtm->rtm_type = RTN_LOCAL;
2154 else
2155 rtm->rtm_type = RTN_UNICAST;
2156 rtm->rtm_flags = 0;
2157 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2158 rtm->rtm_protocol = rt->rt6i_protocol;
2159 if (rt->rt6i_flags&RTF_DYNAMIC)
2160 rtm->rtm_protocol = RTPROT_REDIRECT;
2161 else if (rt->rt6i_flags & RTF_ADDRCONF)
2162 rtm->rtm_protocol = RTPROT_KERNEL;
2163 else if (rt->rt6i_flags&RTF_DEFAULT)
2164 rtm->rtm_protocol = RTPROT_RA;
2165
2166 if (rt->rt6i_flags&RTF_CACHE)
2167 rtm->rtm_flags |= RTM_F_CLONED;
2168
2169 if (dst) {
2d7202bf 2170 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2171 rtm->rtm_dst_len = 128;
1da177e4 2172 } else if (rtm->rtm_dst_len)
2d7202bf 2173 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2174#ifdef CONFIG_IPV6_SUBTREES
2175 if (src) {
2d7202bf 2176 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2177 rtm->rtm_src_len = 128;
1da177e4 2178 } else if (rtm->rtm_src_len)
2d7202bf 2179 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2180#endif
2181 if (iif)
2d7202bf 2182 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2183 else if (dst) {
2184 struct in6_addr saddr_buf;
2185 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2186 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2187 }
2d7202bf 2188
1da177e4 2189 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2190 goto nla_put_failure;
2191
1da177e4 2192 if (rt->u.dst.neighbour)
2d7202bf
TG
2193 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2194
1da177e4 2195 if (rt->u.dst.dev)
2d7202bf
TG
2196 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2197
2198 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2199
2200 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2201 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2202 expires, rt->u.dst.error) < 0)
2203 goto nla_put_failure;
2d7202bf
TG
2204
2205 return nlmsg_end(skb, nlh);
2206
2207nla_put_failure:
26932566
PM
2208 nlmsg_cancel(skb, nlh);
2209 return -EMSGSIZE;
1da177e4
LT
2210}
2211
1b43af54 2212int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2213{
2214 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2215 int prefix;
2216
2d7202bf
TG
2217 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2218 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2219 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2220 } else
2221 prefix = 0;
2222
2223 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2224 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2225 prefix, NLM_F_MULTI);
1da177e4
LT
2226}
2227
c127ea2c 2228static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2229{
ab364a6f
TG
2230 struct nlattr *tb[RTA_MAX+1];
2231 struct rt6_info *rt;
1da177e4 2232 struct sk_buff *skb;
ab364a6f 2233 struct rtmsg *rtm;
1da177e4 2234 struct flowi fl;
ab364a6f 2235 int err, iif = 0;
1da177e4 2236
ab364a6f
TG
2237 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2238 if (err < 0)
2239 goto errout;
1da177e4 2240
ab364a6f 2241 err = -EINVAL;
1da177e4 2242 memset(&fl, 0, sizeof(fl));
1da177e4 2243
ab364a6f
TG
2244 if (tb[RTA_SRC]) {
2245 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2246 goto errout;
2247
2248 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2249 }
2250
2251 if (tb[RTA_DST]) {
2252 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2253 goto errout;
2254
2255 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2256 }
2257
2258 if (tb[RTA_IIF])
2259 iif = nla_get_u32(tb[RTA_IIF]);
2260
2261 if (tb[RTA_OIF])
2262 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2263
2264 if (iif) {
2265 struct net_device *dev;
2266 dev = __dev_get_by_index(iif);
2267 if (!dev) {
2268 err = -ENODEV;
ab364a6f 2269 goto errout;
1da177e4
LT
2270 }
2271 }
2272
ab364a6f
TG
2273 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2274 if (skb == NULL) {
2275 err = -ENOBUFS;
2276 goto errout;
2277 }
1da177e4 2278
ab364a6f
TG
2279 /* Reserve room for dummy headers, this skb can pass
2280 through good chunk of routing engine.
2281 */
459a98ed 2282 skb_reset_mac_header(skb);
ab364a6f 2283 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2284
ab364a6f 2285 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2286 skb->dst = &rt->u.dst;
2287
ab364a6f 2288 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2289 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2290 nlh->nlmsg_seq, 0, 0);
1da177e4 2291 if (err < 0) {
ab364a6f
TG
2292 kfree_skb(skb);
2293 goto errout;
1da177e4
LT
2294 }
2295
2942e900 2296 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
ab364a6f 2297errout:
1da177e4 2298 return err;
1da177e4
LT
2299}
2300
86872cb5 2301void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2302{
2303 struct sk_buff *skb;
86872cb5
TG
2304 u32 pid = 0, seq = 0;
2305 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2306 int err = -ENOBUFS;
2307
86872cb5
TG
2308 if (info) {
2309 pid = info->pid;
2310 nlh = info->nlh;
2311 if (nlh)
2312 seq = nlh->nlmsg_seq;
2313 }
2314
339bf98f 2315 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2316 if (skb == NULL)
2317 goto errout;
2318
2319 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
26932566
PM
2320 if (err < 0) {
2321 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2322 WARN_ON(err == -EMSGSIZE);
2323 kfree_skb(skb);
2324 goto errout;
2325 }
21713ebc
TG
2326 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2327errout:
2328 if (err < 0)
2329 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2330}
2331
2332/*
2333 * /proc
2334 */
2335
2336#ifdef CONFIG_PROC_FS
2337
2338#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2339
2340struct rt6_proc_arg
2341{
2342 char *buffer;
2343 int offset;
2344 int length;
2345 int skip;
2346 int len;
2347};
2348
2349static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2350{
2351 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1da177e4
LT
2352
2353 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2354 arg->skip++;
2355 return 0;
2356 }
2357
2358 if (arg->len >= arg->length)
2359 return 0;
2360
33e93c96
YH
2361 arg->len += sprintf(arg->buffer + arg->len,
2362 NIP6_SEQFMT " %02x ",
2363 NIP6(rt->rt6i_dst.addr),
1da177e4
LT
2364 rt->rt6i_dst.plen);
2365
2366#ifdef CONFIG_IPV6_SUBTREES
33e93c96
YH
2367 arg->len += sprintf(arg->buffer + arg->len,
2368 NIP6_SEQFMT " %02x ",
2369 NIP6(rt->rt6i_src.addr),
1da177e4
LT
2370 rt->rt6i_src.plen);
2371#else
33e93c96
YH
2372 arg->len += sprintf(arg->buffer + arg->len,
2373 "00000000000000000000000000000000 00 ");
1da177e4
LT
2374#endif
2375
2376 if (rt->rt6i_nexthop) {
33e93c96
YH
2377 arg->len += sprintf(arg->buffer + arg->len,
2378 NIP6_SEQFMT,
2379 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2380 } else {
33e93c96
YH
2381 arg->len += sprintf(arg->buffer + arg->len,
2382 "00000000000000000000000000000000");
1da177e4
LT
2383 }
2384 arg->len += sprintf(arg->buffer + arg->len,
2385 " %08x %08x %08x %08x %8s\n",
2386 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1ab1457c 2387 rt->u.dst.__use, rt->rt6i_flags,
1da177e4
LT
2388 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2389 return 0;
2390}
2391
2392static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2393{
c71099ac
TG
2394 struct rt6_proc_arg arg = {
2395 .buffer = buffer,
2396 .offset = offset,
2397 .length = length,
2398 };
1da177e4 2399
c71099ac 2400 fib6_clean_all(rt6_info_route, 0, &arg);
1da177e4
LT
2401
2402 *start = buffer;
2403 if (offset)
2404 *start += offset % RT6_INFO_LEN;
2405
2406 arg.len -= offset % RT6_INFO_LEN;
2407
2408 if (arg.len > length)
2409 arg.len = length;
2410 if (arg.len < 0)
2411 arg.len = 0;
2412
2413 return arg.len;
2414}
2415
1da177e4
LT
2416static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2417{
2418 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2419 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2420 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2421 rt6_stats.fib_rt_cache,
2422 atomic_read(&ip6_dst_ops.entries),
2423 rt6_stats.fib_discarded_routes);
2424
2425 return 0;
2426}
2427
2428static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2429{
2430 return single_open(file, rt6_stats_seq_show, NULL);
2431}
2432
9a32144e 2433static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2434 .owner = THIS_MODULE,
2435 .open = rt6_stats_seq_open,
2436 .read = seq_read,
2437 .llseek = seq_lseek,
2438 .release = single_release,
2439};
2440#endif /* CONFIG_PROC_FS */
2441
2442#ifdef CONFIG_SYSCTL
2443
2444static int flush_delay;
2445
2446static
2447int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2448 void __user *buffer, size_t *lenp, loff_t *ppos)
2449{
2450 if (write) {
2451 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2452 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2453 return 0;
2454 } else
2455 return -EINVAL;
2456}
2457
2458ctl_table ipv6_route_table[] = {
1ab1457c
YH
2459 {
2460 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1da177e4 2461 .procname = "flush",
1ab1457c 2462 .data = &flush_delay,
1da177e4 2463 .maxlen = sizeof(int),
89c8b3a1 2464 .mode = 0200,
1ab1457c 2465 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2466 },
2467 {
2468 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2469 .procname = "gc_thresh",
1ab1457c 2470 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2471 .maxlen = sizeof(int),
2472 .mode = 0644,
1ab1457c 2473 .proc_handler = &proc_dointvec,
1da177e4
LT
2474 },
2475 {
2476 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2477 .procname = "max_size",
1ab1457c 2478 .data = &ip6_rt_max_size,
1da177e4
LT
2479 .maxlen = sizeof(int),
2480 .mode = 0644,
1ab1457c 2481 .proc_handler = &proc_dointvec,
1da177e4
LT
2482 },
2483 {
2484 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2485 .procname = "gc_min_interval",
1ab1457c 2486 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2487 .maxlen = sizeof(int),
2488 .mode = 0644,
1ab1457c 2489 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2490 .strategy = &sysctl_jiffies,
2491 },
2492 {
2493 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2494 .procname = "gc_timeout",
1ab1457c 2495 .data = &ip6_rt_gc_timeout,
1da177e4
LT
2496 .maxlen = sizeof(int),
2497 .mode = 0644,
1ab1457c 2498 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2499 .strategy = &sysctl_jiffies,
2500 },
2501 {
2502 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2503 .procname = "gc_interval",
1ab1457c 2504 .data = &ip6_rt_gc_interval,
1da177e4
LT
2505 .maxlen = sizeof(int),
2506 .mode = 0644,
1ab1457c 2507 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2508 .strategy = &sysctl_jiffies,
2509 },
2510 {
2511 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2512 .procname = "gc_elasticity",
1ab1457c 2513 .data = &ip6_rt_gc_elasticity,
1da177e4
LT
2514 .maxlen = sizeof(int),
2515 .mode = 0644,
1ab1457c 2516 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2517 .strategy = &sysctl_jiffies,
2518 },
2519 {
2520 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2521 .procname = "mtu_expires",
1ab1457c 2522 .data = &ip6_rt_mtu_expires,
1da177e4
LT
2523 .maxlen = sizeof(int),
2524 .mode = 0644,
1ab1457c 2525 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2526 .strategy = &sysctl_jiffies,
2527 },
2528 {
2529 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2530 .procname = "min_adv_mss",
1ab1457c 2531 .data = &ip6_rt_min_advmss,
1da177e4
LT
2532 .maxlen = sizeof(int),
2533 .mode = 0644,
1ab1457c 2534 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2535 .strategy = &sysctl_jiffies,
2536 },
2537 {
2538 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2539 .procname = "gc_min_interval_ms",
1ab1457c 2540 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2541 .maxlen = sizeof(int),
2542 .mode = 0644,
1ab1457c 2543 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2544 .strategy = &sysctl_ms_jiffies,
2545 },
2546 { .ctl_name = 0 }
2547};
2548
2549#endif
2550
2551void __init ip6_route_init(void)
2552{
952a10be 2553#ifdef CONFIG_PROC_FS
1da177e4 2554 struct proc_dir_entry *p;
952a10be 2555#endif
e5d679f3
AD
2556 ip6_dst_ops.kmem_cachep =
2557 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
20c2df83 2558 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
14e50e57
DM
2559 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2560
1da177e4
LT
2561 fib6_init();
2562#ifdef CONFIG_PROC_FS
2563 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2564 if (p)
2565 p->owner = THIS_MODULE;
2566
2567 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2568#endif
2569#ifdef CONFIG_XFRM
2570 xfrm6_init();
2571#endif
101367c2
TG
2572#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2573 fib6_rules_init();
2574#endif
c127ea2c
TG
2575
2576 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2577 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2578 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
1da177e4
LT
2579}
2580
2581void ip6_route_cleanup(void)
2582{
101367c2
TG
2583#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2584 fib6_rules_cleanup();
2585#endif
1da177e4
LT
2586#ifdef CONFIG_PROC_FS
2587 proc_net_remove("ipv6_route");
2588 proc_net_remove("rt6_stats");
2589#endif
2590#ifdef CONFIG_XFRM
2591 xfrm6_fini();
2592#endif
2593 rt6_ifdown(NULL);
2594 fib6_gc_cleanup();
2595 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2596}