]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[NET]: Add a might_sleep() to dev_close().
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4
LT
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
67/* Set to 3 to get tracing. */
68#define RT6_DEBUG 2
69
70#if RT6_DEBUG >= 3
71#define RDBG(x) printk x
72#define RT6_TRACE(x...) printk(KERN_DEBUG x)
73#else
74#define RDBG(x)
75#define RT6_TRACE(x...) do { ; } while (0)
76#endif
77
519fbd87 78#define CLONE_OFFLINK_ROUTE 0
1da177e4
LT
79
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
70ceb4f5
YH
101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
1da177e4
LT
109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
14e50e57
DM
123static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
124{
125}
126
127static struct dst_ops ip6_dst_blackhole_ops = {
128 .family = AF_INET6,
129 .protocol = __constant_htons(ETH_P_IPV6),
130 .destroy = ip6_dst_destroy,
131 .check = ip6_dst_check,
132 .update_pmtu = ip6_rt_blackhole_update_pmtu,
133 .entry_size = sizeof(struct rt6_info),
134};
135
1da177e4
LT
136struct rt6_info ip6_null_entry = {
137 .u = {
138 .dst = {
139 .__refcnt = ATOMIC_INIT(1),
140 .__use = 1,
141 .dev = &loopback_dev,
142 .obsolete = -1,
143 .error = -ENETUNREACH,
144 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
145 .input = ip6_pkt_discard,
146 .output = ip6_pkt_discard_out,
147 .ops = &ip6_dst_ops,
148 .path = (struct dst_entry*)&ip6_null_entry,
149 }
150 },
151 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
152 .rt6i_metric = ~(u32) 0,
153 .rt6i_ref = ATOMIC_INIT(1),
154};
155
101367c2
TG
156#ifdef CONFIG_IPV6_MULTIPLE_TABLES
157
6723ab54
DM
158static int ip6_pkt_prohibit(struct sk_buff *skb);
159static int ip6_pkt_prohibit_out(struct sk_buff *skb);
160static int ip6_pkt_blk_hole(struct sk_buff *skb);
161
101367c2
TG
162struct rt6_info ip6_prohibit_entry = {
163 .u = {
164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .dev = &loopback_dev,
168 .obsolete = -1,
169 .error = -EACCES,
170 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
171 .input = ip6_pkt_prohibit,
172 .output = ip6_pkt_prohibit_out,
101367c2
TG
173 .ops = &ip6_dst_ops,
174 .path = (struct dst_entry*)&ip6_prohibit_entry,
175 }
176 },
177 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
178 .rt6i_metric = ~(u32) 0,
179 .rt6i_ref = ATOMIC_INIT(1),
180};
181
182struct rt6_info ip6_blk_hole_entry = {
183 .u = {
184 .dst = {
185 .__refcnt = ATOMIC_INIT(1),
186 .__use = 1,
187 .dev = &loopback_dev,
188 .obsolete = -1,
189 .error = -EINVAL,
190 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
191 .input = ip6_pkt_blk_hole,
192 .output = ip6_pkt_blk_hole,
101367c2
TG
193 .ops = &ip6_dst_ops,
194 .path = (struct dst_entry*)&ip6_blk_hole_entry,
195 }
196 },
197 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
198 .rt6i_metric = ~(u32) 0,
199 .rt6i_ref = ATOMIC_INIT(1),
200};
201
202#endif
203
1da177e4
LT
204/* allocate dst with ip6_dst_ops */
205static __inline__ struct rt6_info *ip6_dst_alloc(void)
206{
207 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
208}
209
210static void ip6_dst_destroy(struct dst_entry *dst)
211{
212 struct rt6_info *rt = (struct rt6_info *)dst;
213 struct inet6_dev *idev = rt->rt6i_idev;
214
215 if (idev != NULL) {
216 rt->rt6i_idev = NULL;
217 in6_dev_put(idev);
1ab1457c 218 }
1da177e4
LT
219}
220
221static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
222 int how)
223{
224 struct rt6_info *rt = (struct rt6_info *)dst;
225 struct inet6_dev *idev = rt->rt6i_idev;
226
227 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
228 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
229 if (loopback_idev != NULL) {
230 rt->rt6i_idev = loopback_idev;
231 in6_dev_put(idev);
232 }
233 }
234}
235
236static __inline__ int rt6_check_expired(const struct rt6_info *rt)
237{
238 return (rt->rt6i_flags & RTF_EXPIRES &&
239 time_after(jiffies, rt->rt6i_expires));
240}
241
c71099ac
TG
242static inline int rt6_need_strict(struct in6_addr *daddr)
243{
244 return (ipv6_addr_type(daddr) &
245 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
246}
247
1da177e4 248/*
c71099ac 249 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
250 */
251
252static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
253 int oif,
254 int strict)
255{
256 struct rt6_info *local = NULL;
257 struct rt6_info *sprt;
258
259 if (oif) {
7cc48263 260 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
261 struct net_device *dev = sprt->rt6i_dev;
262 if (dev->ifindex == oif)
263 return sprt;
264 if (dev->flags & IFF_LOOPBACK) {
265 if (sprt->rt6i_idev == NULL ||
266 sprt->rt6i_idev->dev->ifindex != oif) {
267 if (strict && oif)
268 continue;
1ab1457c 269 if (local && (!oif ||
1da177e4
LT
270 local->rt6i_idev->dev->ifindex == oif))
271 continue;
272 }
273 local = sprt;
274 }
275 }
276
277 if (local)
278 return local;
279
280 if (strict)
281 return &ip6_null_entry;
282 }
283 return rt;
284}
285
27097255
YH
286#ifdef CONFIG_IPV6_ROUTER_PREF
287static void rt6_probe(struct rt6_info *rt)
288{
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
290 /*
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
294 *
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
297 */
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
303 struct in6_addr mcaddr;
304 struct in6_addr *target;
305
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
308
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
314}
315#else
316static inline void rt6_probe(struct rt6_info *rt)
317{
318 return;
319}
320#endif
321
1da177e4 322/*
554cfb7e 323 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 324 */
b6f99a21 325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
326{
327 struct net_device *dev = rt->rt6i_dev;
161980f4 328 if (!oif || dev->ifindex == oif)
554cfb7e 329 return 2;
161980f4
DM
330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
554cfb7e 334}
1da177e4 335
b6f99a21 336static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 337{
554cfb7e
YH
338 struct neighbour *neigh = rt->rt6i_nexthop;
339 int m = 0;
4d0c5911
YH
340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
554cfb7e
YH
344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
4d0c5911 346 m = 2;
ea73ee23
YH
347 else if (!(neigh->nud_state & NUD_FAILED))
348 m = 1;
554cfb7e 349 read_unlock_bh(&neigh->lock);
1da177e4 350 }
554cfb7e 351 return m;
1da177e4
LT
352}
353
554cfb7e
YH
354static int rt6_score_route(struct rt6_info *rt, int oif,
355 int strict)
1da177e4 356{
4d0c5911 357 int m, n;
1ab1457c 358
4d0c5911 359 m = rt6_check_dev(rt, oif);
77d16f45 360 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 361 return -1;
ebacaaa0
YH
362#ifdef CONFIG_IPV6_ROUTER_PREF
363 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
364#endif
4d0c5911 365 n = rt6_check_neigh(rt);
557e92ef 366 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
367 return -1;
368 return m;
369}
370
f11e6659
DM
371static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
372 int *mpri, struct rt6_info *match)
554cfb7e 373{
f11e6659
DM
374 int m;
375
376 if (rt6_check_expired(rt))
377 goto out;
378
379 m = rt6_score_route(rt, oif, strict);
380 if (m < 0)
381 goto out;
382
383 if (m > *mpri) {
384 if (strict & RT6_LOOKUP_F_REACHABLE)
385 rt6_probe(match);
386 *mpri = m;
387 match = rt;
388 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
389 rt6_probe(rt);
390 }
391
392out:
393 return match;
394}
395
396static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
397 struct rt6_info *rr_head,
398 u32 metric, int oif, int strict)
399{
400 struct rt6_info *rt, *match;
554cfb7e 401 int mpri = -1;
1da177e4 402
f11e6659
DM
403 match = NULL;
404 for (rt = rr_head; rt && rt->rt6i_metric == metric;
405 rt = rt->u.dst.rt6_next)
406 match = find_match(rt, oif, strict, &mpri, match);
407 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
408 rt = rt->u.dst.rt6_next)
409 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 410
f11e6659
DM
411 return match;
412}
1da177e4 413
f11e6659
DM
414static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
415{
416 struct rt6_info *match, *rt0;
1da177e4 417
f11e6659
DM
418 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
419 __FUNCTION__, fn->leaf, oif);
554cfb7e 420
f11e6659
DM
421 rt0 = fn->rr_ptr;
422 if (!rt0)
423 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 424
f11e6659 425 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 426
554cfb7e 427 if (!match &&
f11e6659
DM
428 (strict & RT6_LOOKUP_F_REACHABLE)) {
429 struct rt6_info *next = rt0->u.dst.rt6_next;
430
554cfb7e 431 /* no entries matched; do round-robin */
f11e6659
DM
432 if (!next || next->rt6i_metric != rt0->rt6i_metric)
433 next = fn->leaf;
434
435 if (next != rt0)
436 fn->rr_ptr = next;
1da177e4 437 }
1da177e4 438
f11e6659
DM
439 RT6_TRACE("%s() => %p\n",
440 __FUNCTION__, match);
1da177e4 441
554cfb7e 442 return (match ? match : &ip6_null_entry);
1da177e4
LT
443}
444
70ceb4f5
YH
445#ifdef CONFIG_IPV6_ROUTE_INFO
446int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
447 struct in6_addr *gwaddr)
448{
449 struct route_info *rinfo = (struct route_info *) opt;
450 struct in6_addr prefix_buf, *prefix;
451 unsigned int pref;
452 u32 lifetime;
453 struct rt6_info *rt;
454
455 if (len < sizeof(struct route_info)) {
456 return -EINVAL;
457 }
458
459 /* Sanity check for prefix_len and length */
460 if (rinfo->length > 3) {
461 return -EINVAL;
462 } else if (rinfo->prefix_len > 128) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 64) {
465 if (rinfo->length < 2) {
466 return -EINVAL;
467 }
468 } else if (rinfo->prefix_len > 0) {
469 if (rinfo->length < 1) {
470 return -EINVAL;
471 }
472 }
473
474 pref = rinfo->route_pref;
475 if (pref == ICMPV6_ROUTER_PREF_INVALID)
476 pref = ICMPV6_ROUTER_PREF_MEDIUM;
477
e69a4adc 478 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
479 if (lifetime == 0xffffffff) {
480 /* infinity */
481 } else if (lifetime > 0x7fffffff/HZ) {
482 /* Avoid arithmetic overflow */
483 lifetime = 0x7fffffff/HZ - 1;
484 }
485
486 if (rinfo->length == 3)
487 prefix = (struct in6_addr *)rinfo->prefix;
488 else {
489 /* this function is safe */
490 ipv6_addr_prefix(&prefix_buf,
491 (struct in6_addr *)rinfo->prefix,
492 rinfo->prefix_len);
493 prefix = &prefix_buf;
494 }
495
496 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
497
498 if (rt && !lifetime) {
e0a1ad73 499 ip6_del_rt(rt);
70ceb4f5
YH
500 rt = NULL;
501 }
502
503 if (!rt && lifetime)
504 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510 if (rt) {
511 if (lifetime == 0xffffffff) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
516 }
517 dst_release(&rt->u.dst);
518 }
519 return 0;
520}
521#endif
522
982f56f3
YH
523#define BACKTRACK(saddr) \
524do { \
525 if (rt == &ip6_null_entry) { \
526 struct fib6_node *pn; \
e0eda7bb 527 while (1) { \
982f56f3
YH
528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
c71099ac 537 } \
c71099ac 538 } \
982f56f3 539} while(0)
c71099ac
TG
540
541static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
542 struct flowi *fl, int flags)
1da177e4
LT
543{
544 struct fib6_node *fn;
545 struct rt6_info *rt;
546
c71099ac
TG
547 read_lock_bh(&table->tb6_lock);
548 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
549restart:
550 rt = fn->leaf;
77d16f45 551 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 552 BACKTRACK(&fl->fl6_src);
c71099ac 553out:
33cc4896 554 dst_hold(&rt->u.dst);
c71099ac 555 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
556
557 rt->u.dst.lastuse = jiffies;
c71099ac
TG
558 rt->u.dst.__use++;
559
560 return rt;
561
562}
563
564struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
565 int oif, int strict)
566{
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
c71099ac
TG
572 },
573 },
574 };
575 struct dst_entry *dst;
77d16f45 576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 577
adaa70bb
TG
578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
581 }
582
c71099ac
TG
583 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
586
587 dst_release(dst);
588
1da177e4
LT
589 return NULL;
590}
591
7159039a
YH
592EXPORT_SYMBOL(rt6_lookup);
593
c71099ac 594/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
598 */
599
86872cb5 600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
601{
602 int err;
c71099ac 603 struct fib6_table *table;
1da177e4 604
c71099ac
TG
605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
86872cb5 607 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 608 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
609
610 return err;
611}
612
40e22e8f
TG
613int ip6_ins_rt(struct rt6_info *rt)
614{
86872cb5 615 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
616}
617
95a9a5ba
YH
618static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619 struct in6_addr *saddr)
1da177e4 620{
1da177e4
LT
621 struct rt6_info *rt;
622
623 /*
624 * Clone the route.
625 */
626
627 rt = ip6_rt_copy(ort);
628
629 if (rt) {
58c4fb86
YH
630 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631 if (rt->rt6i_dst.plen != 128 &&
632 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 634 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 635 }
1da177e4 636
58c4fb86 637 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
638 rt->rt6i_dst.plen = 128;
639 rt->rt6i_flags |= RTF_CACHE;
640 rt->u.dst.flags |= DST_HOST;
641
642#ifdef CONFIG_IPV6_SUBTREES
643 if (rt->rt6i_src.plen && saddr) {
644 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645 rt->rt6i_src.plen = 128;
646 }
647#endif
648
649 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
650
95a9a5ba 651 }
1da177e4 652
95a9a5ba
YH
653 return rt;
654}
1da177e4 655
299d9939
YH
656static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
657{
658 struct rt6_info *rt = ip6_rt_copy(ort);
659 if (rt) {
660 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
661 rt->rt6i_dst.plen = 128;
662 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
663 rt->u.dst.flags |= DST_HOST;
664 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
665 }
666 return rt;
667}
668
8ce11e6a
AB
669static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
670 struct flowi *fl, int flags)
1da177e4
LT
671{
672 struct fib6_node *fn;
519fbd87 673 struct rt6_info *rt, *nrt;
c71099ac 674 int strict = 0;
1da177e4 675 int attempts = 3;
519fbd87 676 int err;
ea659e07 677 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 678
77d16f45 679 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
680
681relookup:
c71099ac 682 read_lock_bh(&table->tb6_lock);
1da177e4 683
8238dd06 684restart_2:
c71099ac 685 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
686
687restart:
f11e6659 688 rt = rt6_select(fn, fl->iif, strict | reachable);
982f56f3 689 BACKTRACK(&fl->fl6_src);
8238dd06
YH
690 if (rt == &ip6_null_entry ||
691 rt->rt6i_flags & RTF_CACHE)
1ddef044 692 goto out;
1da177e4 693
fb9de91e 694 dst_hold(&rt->u.dst);
c71099ac 695 read_unlock_bh(&table->tb6_lock);
fb9de91e 696
519fbd87 697 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 698 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
699 else {
700#if CLONE_OFFLINK_ROUTE
c71099ac 701 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
702#else
703 goto out2;
704#endif
705 }
e40cf353 706
519fbd87
YH
707 dst_release(&rt->u.dst);
708 rt = nrt ? : &ip6_null_entry;
1da177e4 709
519fbd87
YH
710 dst_hold(&rt->u.dst);
711 if (nrt) {
40e22e8f 712 err = ip6_ins_rt(nrt);
519fbd87 713 if (!err)
1da177e4 714 goto out2;
1da177e4 715 }
1da177e4 716
519fbd87
YH
717 if (--attempts <= 0)
718 goto out2;
719
720 /*
c71099ac 721 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
722 * released someone could insert this route. Relookup.
723 */
724 dst_release(&rt->u.dst);
725 goto relookup;
726
727out:
8238dd06
YH
728 if (reachable) {
729 reachable = 0;
730 goto restart_2;
731 }
519fbd87 732 dst_hold(&rt->u.dst);
c71099ac 733 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
734out2:
735 rt->u.dst.lastuse = jiffies;
736 rt->u.dst.__use++;
c71099ac
TG
737
738 return rt;
1da177e4
LT
739}
740
c71099ac
TG
741void ip6_route_input(struct sk_buff *skb)
742{
0660e03f 743 struct ipv6hdr *iph = ipv6_hdr(skb);
adaa70bb 744 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
745 struct flowi fl = {
746 .iif = skb->dev->ifindex,
747 .nl_u = {
748 .ip6_u = {
749 .daddr = iph->daddr,
750 .saddr = iph->saddr,
90bcaf7b 751 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
752 },
753 },
1ab1457c 754 .mark = skb->mark,
c71099ac
TG
755 .proto = iph->nexthdr,
756 };
adaa70bb
TG
757
758 if (rt6_need_strict(&iph->daddr))
759 flags |= RT6_LOOKUP_F_IFACE;
c71099ac
TG
760
761 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
762}
763
764static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
765 struct flowi *fl, int flags)
1da177e4
LT
766{
767 struct fib6_node *fn;
519fbd87 768 struct rt6_info *rt, *nrt;
c71099ac 769 int strict = 0;
1da177e4 770 int attempts = 3;
519fbd87 771 int err;
ea659e07 772 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 773
77d16f45 774 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
775
776relookup:
c71099ac 777 read_lock_bh(&table->tb6_lock);
1da177e4 778
8238dd06 779restart_2:
c71099ac 780 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
781
782restart:
f11e6659 783 rt = rt6_select(fn, fl->oif, strict | reachable);
982f56f3 784 BACKTRACK(&fl->fl6_src);
8238dd06
YH
785 if (rt == &ip6_null_entry ||
786 rt->rt6i_flags & RTF_CACHE)
1da177e4 787 goto out;
1da177e4 788
fb9de91e 789 dst_hold(&rt->u.dst);
c71099ac 790 read_unlock_bh(&table->tb6_lock);
fb9de91e 791
519fbd87 792 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 793 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
794 else {
795#if CLONE_OFFLINK_ROUTE
796 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
797#else
798 goto out2;
799#endif
800 }
1da177e4 801
519fbd87
YH
802 dst_release(&rt->u.dst);
803 rt = nrt ? : &ip6_null_entry;
1da177e4 804
519fbd87
YH
805 dst_hold(&rt->u.dst);
806 if (nrt) {
40e22e8f 807 err = ip6_ins_rt(nrt);
519fbd87 808 if (!err)
1da177e4 809 goto out2;
1da177e4 810 }
e40cf353 811
519fbd87
YH
812 if (--attempts <= 0)
813 goto out2;
814
815 /*
c71099ac 816 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
817 * released someone could insert this route. Relookup.
818 */
819 dst_release(&rt->u.dst);
820 goto relookup;
821
822out:
8238dd06
YH
823 if (reachable) {
824 reachable = 0;
825 goto restart_2;
826 }
519fbd87 827 dst_hold(&rt->u.dst);
c71099ac 828 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
829out2:
830 rt->u.dst.lastuse = jiffies;
831 rt->u.dst.__use++;
c71099ac
TG
832 return rt;
833}
834
835struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
836{
837 int flags = 0;
838
839 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 840 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 841
adaa70bb
TG
842 if (!ipv6_addr_any(&fl->fl6_src))
843 flags |= RT6_LOOKUP_F_HAS_SADDR;
844
c71099ac 845 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
846}
847
7159039a 848EXPORT_SYMBOL(ip6_route_output);
1da177e4 849
14e50e57
DM
850static int ip6_blackhole_output(struct sk_buff *skb)
851{
852 kfree_skb(skb);
853 return 0;
854}
855
856int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
857{
858 struct rt6_info *ort = (struct rt6_info *) *dstp;
859 struct rt6_info *rt = (struct rt6_info *)
860 dst_alloc(&ip6_dst_blackhole_ops);
861 struct dst_entry *new = NULL;
862
863 if (rt) {
864 new = &rt->u.dst;
865
866 atomic_set(&new->__refcnt, 1);
867 new->__use = 1;
868 new->input = ip6_blackhole_output;
869 new->output = ip6_blackhole_output;
870
871 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
872 new->dev = ort->u.dst.dev;
873 if (new->dev)
874 dev_hold(new->dev);
875 rt->rt6i_idev = ort->rt6i_idev;
876 if (rt->rt6i_idev)
877 in6_dev_hold(rt->rt6i_idev);
878 rt->rt6i_expires = 0;
879
880 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
881 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
882 rt->rt6i_metric = 0;
883
884 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
885#ifdef CONFIG_IPV6_SUBTREES
886 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
887#endif
888
889 dst_free(new);
890 }
891
892 dst_release(*dstp);
893 *dstp = new;
894 return (new ? 0 : -ENOMEM);
895}
896EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
897
1da177e4
LT
898/*
899 * Destination cache support functions
900 */
901
902static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
903{
904 struct rt6_info *rt;
905
906 rt = (struct rt6_info *) dst;
907
908 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
909 return dst;
910
911 return NULL;
912}
913
914static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
915{
916 struct rt6_info *rt = (struct rt6_info *) dst;
917
918 if (rt) {
919 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 920 ip6_del_rt(rt);
1da177e4
LT
921 else
922 dst_release(dst);
923 }
924 return NULL;
925}
926
927static void ip6_link_failure(struct sk_buff *skb)
928{
929 struct rt6_info *rt;
930
931 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
932
933 rt = (struct rt6_info *) skb->dst;
934 if (rt) {
935 if (rt->rt6i_flags&RTF_CACHE) {
936 dst_set_expires(&rt->u.dst, 0);
937 rt->rt6i_flags |= RTF_EXPIRES;
938 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
939 rt->rt6i_node->fn_sernum = -1;
940 }
941}
942
943static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
944{
945 struct rt6_info *rt6 = (struct rt6_info*)dst;
946
947 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
948 rt6->rt6i_flags |= RTF_MODIFIED;
949 if (mtu < IPV6_MIN_MTU) {
950 mtu = IPV6_MIN_MTU;
951 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
952 }
953 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 954 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
955 }
956}
957
1da177e4
LT
958static int ipv6_get_mtu(struct net_device *dev);
959
960static inline unsigned int ipv6_advmss(unsigned int mtu)
961{
962 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
963
964 if (mtu < ip6_rt_min_advmss)
965 mtu = ip6_rt_min_advmss;
966
967 /*
1ab1457c
YH
968 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
969 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
970 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
971 * rely only on pmtu discovery"
972 */
973 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
974 mtu = IPV6_MAXPLEN;
975 return mtu;
976}
977
5d0bbeeb 978static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 979static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 980
1ab1457c 981struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
1da177e4
LT
982 struct neighbour *neigh,
983 struct in6_addr *addr,
984 int (*output)(struct sk_buff *))
985{
986 struct rt6_info *rt;
987 struct inet6_dev *idev = in6_dev_get(dev);
988
989 if (unlikely(idev == NULL))
990 return NULL;
991
992 rt = ip6_dst_alloc();
993 if (unlikely(rt == NULL)) {
994 in6_dev_put(idev);
995 goto out;
996 }
997
998 dev_hold(dev);
999 if (neigh)
1000 neigh_hold(neigh);
1001 else
1002 neigh = ndisc_get_neigh(dev, addr);
1003
1004 rt->rt6i_dev = dev;
1005 rt->rt6i_idev = idev;
1006 rt->rt6i_nexthop = neigh;
1007 atomic_set(&rt->u.dst.__refcnt, 1);
1008 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1009 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1010 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1011 rt->u.dst.output = output;
1012
1013#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
1014 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1015 ? DST_HOST
1da177e4
LT
1016 : 0;
1017 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1018 rt->rt6i_dst.plen = 128;
1019#endif
1020
5d0bbeeb 1021 spin_lock_bh(&ndisc_lock);
1da177e4
LT
1022 rt->u.dst.next = ndisc_dst_gc_list;
1023 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 1024 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
1025
1026 fib6_force_start_gc();
1027
1028out:
40aa7b90 1029 return &rt->u.dst;
1da177e4
LT
1030}
1031
1032int ndisc_dst_gc(int *more)
1033{
1034 struct dst_entry *dst, *next, **pprev;
1035 int freed;
1036
1037 next = NULL;
1ab1457c 1038 freed = 0;
5d0bbeeb
TG
1039
1040 spin_lock_bh(&ndisc_lock);
1da177e4 1041 pprev = &ndisc_dst_gc_list;
5d0bbeeb 1042
1da177e4
LT
1043 while ((dst = *pprev) != NULL) {
1044 if (!atomic_read(&dst->__refcnt)) {
1045 *pprev = dst->next;
1046 dst_free(dst);
1047 freed++;
1048 } else {
1049 pprev = &dst->next;
1050 (*more)++;
1051 }
1052 }
1053
5d0bbeeb
TG
1054 spin_unlock_bh(&ndisc_lock);
1055
1da177e4
LT
1056 return freed;
1057}
1058
1059static int ip6_dst_gc(void)
1060{
1061 static unsigned expire = 30*HZ;
1062 static unsigned long last_gc;
1063 unsigned long now = jiffies;
1064
1065 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1066 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1067 goto out;
1068
1069 expire++;
1070 fib6_run_gc(expire);
1071 last_gc = now;
1072 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1073 expire = ip6_rt_gc_timeout>>1;
1074
1075out:
1076 expire -= expire>>ip6_rt_gc_elasticity;
1077 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1078}
1079
1080/* Clean host part of a prefix. Not necessary in radix tree,
1081 but results in cleaner routing tables.
1082
1083 Remove it only when all the things will work!
1084 */
1085
1086static int ipv6_get_mtu(struct net_device *dev)
1087{
1088 int mtu = IPV6_MIN_MTU;
1089 struct inet6_dev *idev;
1090
1091 idev = in6_dev_get(dev);
1092 if (idev) {
1093 mtu = idev->cnf.mtu6;
1094 in6_dev_put(idev);
1095 }
1096 return mtu;
1097}
1098
1099int ipv6_get_hoplimit(struct net_device *dev)
1100{
1101 int hoplimit = ipv6_devconf.hop_limit;
1102 struct inet6_dev *idev;
1103
1104 idev = in6_dev_get(dev);
1105 if (idev) {
1106 hoplimit = idev->cnf.hop_limit;
1107 in6_dev_put(idev);
1108 }
1109 return hoplimit;
1110}
1111
1112/*
1113 *
1114 */
1115
86872cb5 1116int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1117{
1118 int err;
1da177e4
LT
1119 struct rt6_info *rt = NULL;
1120 struct net_device *dev = NULL;
1121 struct inet6_dev *idev = NULL;
c71099ac 1122 struct fib6_table *table;
1da177e4
LT
1123 int addr_type;
1124
86872cb5 1125 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1126 return -EINVAL;
1127#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1128 if (cfg->fc_src_len)
1da177e4
LT
1129 return -EINVAL;
1130#endif
86872cb5 1131 if (cfg->fc_ifindex) {
1da177e4 1132 err = -ENODEV;
881d966b 1133 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1da177e4
LT
1134 if (!dev)
1135 goto out;
1136 idev = in6_dev_get(dev);
1137 if (!idev)
1138 goto out;
1139 }
1140
86872cb5
TG
1141 if (cfg->fc_metric == 0)
1142 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1143
86872cb5 1144 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1145 if (table == NULL) {
1146 err = -ENOBUFS;
1147 goto out;
1148 }
1149
1da177e4
LT
1150 rt = ip6_dst_alloc();
1151
1152 if (rt == NULL) {
1153 err = -ENOMEM;
1154 goto out;
1155 }
1156
1157 rt->u.dst.obsolete = -1;
86872cb5 1158 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1159
86872cb5
TG
1160 if (cfg->fc_protocol == RTPROT_UNSPEC)
1161 cfg->fc_protocol = RTPROT_BOOT;
1162 rt->rt6i_protocol = cfg->fc_protocol;
1163
1164 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1165
1166 if (addr_type & IPV6_ADDR_MULTICAST)
1167 rt->u.dst.input = ip6_mc_input;
1168 else
1169 rt->u.dst.input = ip6_forward;
1170
1171 rt->u.dst.output = ip6_output;
1172
86872cb5
TG
1173 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1174 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1175 if (rt->rt6i_dst.plen == 128)
1176 rt->u.dst.flags = DST_HOST;
1177
1178#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1179 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1180 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1181#endif
1182
86872cb5 1183 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1184
1185 /* We cannot add true routes via loopback here,
1186 they would result in kernel looping; promote them to reject routes
1187 */
86872cb5 1188 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1189 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1190 /* hold loopback dev/idev if we haven't done so. */
1191 if (dev != &loopback_dev) {
1192 if (dev) {
1193 dev_put(dev);
1194 in6_dev_put(idev);
1195 }
1196 dev = &loopback_dev;
1197 dev_hold(dev);
1198 idev = in6_dev_get(dev);
1199 if (!idev) {
1200 err = -ENODEV;
1201 goto out;
1202 }
1203 }
1204 rt->u.dst.output = ip6_pkt_discard_out;
1205 rt->u.dst.input = ip6_pkt_discard;
1206 rt->u.dst.error = -ENETUNREACH;
1207 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1208 goto install_route;
1209 }
1210
86872cb5 1211 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1212 struct in6_addr *gw_addr;
1213 int gwa_type;
1214
86872cb5
TG
1215 gw_addr = &cfg->fc_gateway;
1216 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1217 gwa_type = ipv6_addr_type(gw_addr);
1218
1219 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1220 struct rt6_info *grt;
1221
1222 /* IPv6 strictly inhibits using not link-local
1223 addresses as nexthop address.
1224 Otherwise, router will not able to send redirects.
1225 It is very good, but in some (rare!) circumstances
1226 (SIT, PtP, NBMA NOARP links) it is handy to allow
1227 some exceptions. --ANK
1228 */
1229 err = -EINVAL;
1230 if (!(gwa_type&IPV6_ADDR_UNICAST))
1231 goto out;
1232
86872cb5 1233 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1234
1235 err = -EHOSTUNREACH;
1236 if (grt == NULL)
1237 goto out;
1238 if (dev) {
1239 if (dev != grt->rt6i_dev) {
1240 dst_release(&grt->u.dst);
1241 goto out;
1242 }
1243 } else {
1244 dev = grt->rt6i_dev;
1245 idev = grt->rt6i_idev;
1246 dev_hold(dev);
1247 in6_dev_hold(grt->rt6i_idev);
1248 }
1249 if (!(grt->rt6i_flags&RTF_GATEWAY))
1250 err = 0;
1251 dst_release(&grt->u.dst);
1252
1253 if (err)
1254 goto out;
1255 }
1256 err = -EINVAL;
1257 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1258 goto out;
1259 }
1260
1261 err = -ENODEV;
1262 if (dev == NULL)
1263 goto out;
1264
86872cb5 1265 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1266 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1267 if (IS_ERR(rt->rt6i_nexthop)) {
1268 err = PTR_ERR(rt->rt6i_nexthop);
1269 rt->rt6i_nexthop = NULL;
1270 goto out;
1271 }
1272 }
1273
86872cb5 1274 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1275
1276install_route:
86872cb5
TG
1277 if (cfg->fc_mx) {
1278 struct nlattr *nla;
1279 int remaining;
1280
1281 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1282 int type = nla->nla_type;
1283
1284 if (type) {
1285 if (type > RTAX_MAX) {
1da177e4
LT
1286 err = -EINVAL;
1287 goto out;
1288 }
86872cb5
TG
1289
1290 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1291 }
1da177e4
LT
1292 }
1293 }
1294
1295 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1296 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1297 if (!rt->u.dst.metrics[RTAX_MTU-1])
1298 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1299 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1300 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1301 rt->u.dst.dev = dev;
1302 rt->rt6i_idev = idev;
c71099ac 1303 rt->rt6i_table = table;
86872cb5 1304 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1305
1306out:
1307 if (dev)
1308 dev_put(dev);
1309 if (idev)
1310 in6_dev_put(idev);
1311 if (rt)
40aa7b90 1312 dst_free(&rt->u.dst);
1da177e4
LT
1313 return err;
1314}
1315
86872cb5 1316static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1317{
1318 int err;
c71099ac 1319 struct fib6_table *table;
1da177e4 1320
6c813a72
PM
1321 if (rt == &ip6_null_entry)
1322 return -ENOENT;
1323
c71099ac
TG
1324 table = rt->rt6i_table;
1325 write_lock_bh(&table->tb6_lock);
1da177e4 1326
86872cb5 1327 err = fib6_del(rt, info);
1da177e4
LT
1328 dst_release(&rt->u.dst);
1329
c71099ac 1330 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1331
1332 return err;
1333}
1334
e0a1ad73
TG
1335int ip6_del_rt(struct rt6_info *rt)
1336{
86872cb5 1337 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1338}
1339
86872cb5 1340static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1341{
c71099ac 1342 struct fib6_table *table;
1da177e4
LT
1343 struct fib6_node *fn;
1344 struct rt6_info *rt;
1345 int err = -ESRCH;
1346
86872cb5 1347 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1348 if (table == NULL)
1349 return err;
1350
1351 read_lock_bh(&table->tb6_lock);
1da177e4 1352
c71099ac 1353 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1354 &cfg->fc_dst, cfg->fc_dst_len,
1355 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1356
1da177e4 1357 if (fn) {
7cc48263 1358 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1359 if (cfg->fc_ifindex &&
1da177e4 1360 (rt->rt6i_dev == NULL ||
86872cb5 1361 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1362 continue;
86872cb5
TG
1363 if (cfg->fc_flags & RTF_GATEWAY &&
1364 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1365 continue;
86872cb5 1366 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1367 continue;
1368 dst_hold(&rt->u.dst);
c71099ac 1369 read_unlock_bh(&table->tb6_lock);
1da177e4 1370
86872cb5 1371 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1372 }
1373 }
c71099ac 1374 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1375
1376 return err;
1377}
1378
1379/*
1380 * Handle redirects
1381 */
a6279458
YH
1382struct ip6rd_flowi {
1383 struct flowi fl;
1384 struct in6_addr gateway;
1385};
1386
1387static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1388 struct flowi *fl,
1389 int flags)
1da177e4 1390{
a6279458
YH
1391 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1392 struct rt6_info *rt;
e843b9e1 1393 struct fib6_node *fn;
c71099ac 1394
1da177e4 1395 /*
e843b9e1
YH
1396 * Get the "current" route for this destination and
1397 * check if the redirect has come from approriate router.
1398 *
1399 * RFC 2461 specifies that redirects should only be
1400 * accepted if they come from the nexthop to the target.
1401 * Due to the way the routes are chosen, this notion
1402 * is a bit fuzzy and one might need to check all possible
1403 * routes.
1da177e4 1404 */
1da177e4 1405
c71099ac 1406 read_lock_bh(&table->tb6_lock);
a6279458 1407 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1408restart:
7cc48263 1409 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1410 /*
1411 * Current route is on-link; redirect is always invalid.
1412 *
1413 * Seems, previous statement is not true. It could
1414 * be node, which looks for us as on-link (f.e. proxy ndisc)
1415 * But then router serving it might decide, that we should
1416 * know truth 8)8) --ANK (980726).
1417 */
1418 if (rt6_check_expired(rt))
1419 continue;
1420 if (!(rt->rt6i_flags & RTF_GATEWAY))
1421 continue;
a6279458 1422 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1423 continue;
a6279458 1424 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1425 continue;
1426 break;
1427 }
a6279458 1428
cb15d9c2 1429 if (!rt)
a6279458 1430 rt = &ip6_null_entry;
cb15d9c2
YH
1431 BACKTRACK(&fl->fl6_src);
1432out:
a6279458
YH
1433 dst_hold(&rt->u.dst);
1434
c71099ac 1435 read_unlock_bh(&table->tb6_lock);
e843b9e1 1436
a6279458
YH
1437 return rt;
1438};
1439
1440static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1441 struct in6_addr *src,
1442 struct in6_addr *gateway,
1443 struct net_device *dev)
1444{
adaa70bb 1445 int flags = RT6_LOOKUP_F_HAS_SADDR;
a6279458
YH
1446 struct ip6rd_flowi rdfl = {
1447 .fl = {
1448 .oif = dev->ifindex,
1449 .nl_u = {
1450 .ip6_u = {
1451 .daddr = *dest,
1452 .saddr = *src,
1453 },
1454 },
1455 },
1456 .gateway = *gateway,
1457 };
adaa70bb
TG
1458
1459 if (rt6_need_strict(dest))
1460 flags |= RT6_LOOKUP_F_IFACE;
a6279458
YH
1461
1462 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1463}
1464
1465void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1466 struct in6_addr *saddr,
1467 struct neighbour *neigh, u8 *lladdr, int on_link)
1468{
1469 struct rt6_info *rt, *nrt = NULL;
1470 struct netevent_redirect netevent;
1471
1472 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1473
1474 if (rt == &ip6_null_entry) {
1da177e4
LT
1475 if (net_ratelimit())
1476 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1477 "for redirect target\n");
a6279458 1478 goto out;
1da177e4
LT
1479 }
1480
1da177e4
LT
1481 /*
1482 * We have finally decided to accept it.
1483 */
1484
1ab1457c 1485 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1486 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1487 NEIGH_UPDATE_F_OVERRIDE|
1488 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1489 NEIGH_UPDATE_F_ISROUTER))
1490 );
1491
1492 /*
1493 * Redirect received -> path was valid.
1494 * Look, redirects are sent only in response to data packets,
1495 * so that this nexthop apparently is reachable. --ANK
1496 */
1497 dst_confirm(&rt->u.dst);
1498
1499 /* Duplicate redirect: silently ignore. */
1500 if (neigh == rt->u.dst.neighbour)
1501 goto out;
1502
1503 nrt = ip6_rt_copy(rt);
1504 if (nrt == NULL)
1505 goto out;
1506
1507 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1508 if (on_link)
1509 nrt->rt6i_flags &= ~RTF_GATEWAY;
1510
1511 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1512 nrt->rt6i_dst.plen = 128;
1513 nrt->u.dst.flags |= DST_HOST;
1514
1515 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1516 nrt->rt6i_nexthop = neigh_clone(neigh);
1517 /* Reset pmtu, it may be better */
1518 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1519 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1520
40e22e8f 1521 if (ip6_ins_rt(nrt))
1da177e4
LT
1522 goto out;
1523
8d71740c
TT
1524 netevent.old = &rt->u.dst;
1525 netevent.new = &nrt->u.dst;
1526 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1527
1da177e4 1528 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1529 ip6_del_rt(rt);
1da177e4
LT
1530 return;
1531 }
1532
1533out:
1ab1457c 1534 dst_release(&rt->u.dst);
1da177e4
LT
1535 return;
1536}
1537
1538/*
1539 * Handle ICMP "packet too big" messages
1540 * i.e. Path MTU discovery
1541 */
1542
1543void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1544 struct net_device *dev, u32 pmtu)
1545{
1546 struct rt6_info *rt, *nrt;
1547 int allfrag = 0;
1548
1549 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1550 if (rt == NULL)
1551 return;
1552
1553 if (pmtu >= dst_mtu(&rt->u.dst))
1554 goto out;
1555
1556 if (pmtu < IPV6_MIN_MTU) {
1557 /*
1ab1457c 1558 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1559 * MTU (1280) and a fragment header should always be included
1560 * after a node receiving Too Big message reporting PMTU is
1561 * less than the IPv6 Minimum Link MTU.
1562 */
1563 pmtu = IPV6_MIN_MTU;
1564 allfrag = 1;
1565 }
1566
1567 /* New mtu received -> path was valid.
1568 They are sent only in response to data packets,
1569 so that this nexthop apparently is reachable. --ANK
1570 */
1571 dst_confirm(&rt->u.dst);
1572
1573 /* Host route. If it is static, it would be better
1574 not to override it, but add new one, so that
1575 when cache entry will expire old pmtu
1576 would return automatically.
1577 */
1578 if (rt->rt6i_flags & RTF_CACHE) {
1579 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1580 if (allfrag)
1581 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1582 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1583 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1584 goto out;
1585 }
1586
1587 /* Network route.
1588 Two cases are possible:
1589 1. It is connected route. Action: COW
1590 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1591 */
d5315b50 1592 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1593 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1594 else
1595 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1596
d5315b50 1597 if (nrt) {
a1e78363
YH
1598 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1599 if (allfrag)
1600 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1601
1602 /* According to RFC 1981, detecting PMTU increase shouldn't be
1603 * happened within 5 mins, the recommended timer is 10 mins.
1604 * Here this route expiration time is set to ip6_rt_mtu_expires
1605 * which is 10 mins. After 10 mins the decreased pmtu is expired
1606 * and detecting PMTU increase will be automatically happened.
1607 */
1608 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1609 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1610
40e22e8f 1611 ip6_ins_rt(nrt);
1da177e4 1612 }
1da177e4
LT
1613out:
1614 dst_release(&rt->u.dst);
1615}
1616
1617/*
1618 * Misc support functions
1619 */
1620
1621static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1622{
1623 struct rt6_info *rt = ip6_dst_alloc();
1624
1625 if (rt) {
1626 rt->u.dst.input = ort->u.dst.input;
1627 rt->u.dst.output = ort->u.dst.output;
1628
1629 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1630 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1631 rt->u.dst.dev = ort->u.dst.dev;
1632 if (rt->u.dst.dev)
1633 dev_hold(rt->u.dst.dev);
1634 rt->rt6i_idev = ort->rt6i_idev;
1635 if (rt->rt6i_idev)
1636 in6_dev_hold(rt->rt6i_idev);
1637 rt->u.dst.lastuse = jiffies;
1638 rt->rt6i_expires = 0;
1639
1640 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1641 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1642 rt->rt6i_metric = 0;
1643
1644 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1645#ifdef CONFIG_IPV6_SUBTREES
1646 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1647#endif
c71099ac 1648 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1649 }
1650 return rt;
1651}
1652
70ceb4f5
YH
1653#ifdef CONFIG_IPV6_ROUTE_INFO
1654static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1655 struct in6_addr *gwaddr, int ifindex)
1656{
1657 struct fib6_node *fn;
1658 struct rt6_info *rt = NULL;
c71099ac
TG
1659 struct fib6_table *table;
1660
1661 table = fib6_get_table(RT6_TABLE_INFO);
1662 if (table == NULL)
1663 return NULL;
70ceb4f5 1664
c71099ac
TG
1665 write_lock_bh(&table->tb6_lock);
1666 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1667 if (!fn)
1668 goto out;
1669
7cc48263 1670 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1671 if (rt->rt6i_dev->ifindex != ifindex)
1672 continue;
1673 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1674 continue;
1675 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1676 continue;
1677 dst_hold(&rt->u.dst);
1678 break;
1679 }
1680out:
c71099ac 1681 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1682 return rt;
1683}
1684
1685static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1686 struct in6_addr *gwaddr, int ifindex,
1687 unsigned pref)
1688{
86872cb5
TG
1689 struct fib6_config cfg = {
1690 .fc_table = RT6_TABLE_INFO,
1691 .fc_metric = 1024,
1692 .fc_ifindex = ifindex,
1693 .fc_dst_len = prefixlen,
1694 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1695 RTF_UP | RTF_PREF(pref),
1696 };
1697
1698 ipv6_addr_copy(&cfg.fc_dst, prefix);
1699 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1700
e317da96
YH
1701 /* We should treat it as a default route if prefix length is 0. */
1702 if (!prefixlen)
86872cb5 1703 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1704
86872cb5 1705 ip6_route_add(&cfg);
70ceb4f5
YH
1706
1707 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1708}
1709#endif
1710
1da177e4 1711struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1712{
1da177e4 1713 struct rt6_info *rt;
c71099ac 1714 struct fib6_table *table;
1da177e4 1715
c71099ac
TG
1716 table = fib6_get_table(RT6_TABLE_DFLT);
1717 if (table == NULL)
1718 return NULL;
1da177e4 1719
c71099ac 1720 write_lock_bh(&table->tb6_lock);
7cc48263 1721 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1722 if (dev == rt->rt6i_dev &&
045927ff 1723 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1724 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1725 break;
1726 }
1727 if (rt)
1728 dst_hold(&rt->u.dst);
c71099ac 1729 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1730 return rt;
1731}
1732
1733struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1734 struct net_device *dev,
1735 unsigned int pref)
1da177e4 1736{
86872cb5
TG
1737 struct fib6_config cfg = {
1738 .fc_table = RT6_TABLE_DFLT,
1739 .fc_metric = 1024,
1740 .fc_ifindex = dev->ifindex,
1741 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1742 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1743 };
1da177e4 1744
86872cb5 1745 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1746
86872cb5 1747 ip6_route_add(&cfg);
1da177e4 1748
1da177e4
LT
1749 return rt6_get_dflt_router(gwaddr, dev);
1750}
1751
1752void rt6_purge_dflt_routers(void)
1753{
1754 struct rt6_info *rt;
c71099ac
TG
1755 struct fib6_table *table;
1756
1757 /* NOTE: Keep consistent with rt6_get_dflt_router */
1758 table = fib6_get_table(RT6_TABLE_DFLT);
1759 if (table == NULL)
1760 return;
1da177e4
LT
1761
1762restart:
c71099ac 1763 read_lock_bh(&table->tb6_lock);
7cc48263 1764 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1765 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1766 dst_hold(&rt->u.dst);
c71099ac 1767 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1768 ip6_del_rt(rt);
1da177e4
LT
1769 goto restart;
1770 }
1771 }
c71099ac 1772 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1773}
1774
86872cb5
TG
1775static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1776 struct fib6_config *cfg)
1777{
1778 memset(cfg, 0, sizeof(*cfg));
1779
1780 cfg->fc_table = RT6_TABLE_MAIN;
1781 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1782 cfg->fc_metric = rtmsg->rtmsg_metric;
1783 cfg->fc_expires = rtmsg->rtmsg_info;
1784 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1785 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1786 cfg->fc_flags = rtmsg->rtmsg_flags;
1787
1788 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1789 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1790 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1791}
1792
1da177e4
LT
1793int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1794{
86872cb5 1795 struct fib6_config cfg;
1da177e4
LT
1796 struct in6_rtmsg rtmsg;
1797 int err;
1798
1799 switch(cmd) {
1800 case SIOCADDRT: /* Add a route */
1801 case SIOCDELRT: /* Delete a route */
1802 if (!capable(CAP_NET_ADMIN))
1803 return -EPERM;
1804 err = copy_from_user(&rtmsg, arg,
1805 sizeof(struct in6_rtmsg));
1806 if (err)
1807 return -EFAULT;
86872cb5
TG
1808
1809 rtmsg_to_fib6_config(&rtmsg, &cfg);
1810
1da177e4
LT
1811 rtnl_lock();
1812 switch (cmd) {
1813 case SIOCADDRT:
86872cb5 1814 err = ip6_route_add(&cfg);
1da177e4
LT
1815 break;
1816 case SIOCDELRT:
86872cb5 1817 err = ip6_route_del(&cfg);
1da177e4
LT
1818 break;
1819 default:
1820 err = -EINVAL;
1821 }
1822 rtnl_unlock();
1823
1824 return err;
3ff50b79 1825 }
1da177e4
LT
1826
1827 return -EINVAL;
1828}
1829
1830/*
1831 * Drop the packet on the floor
1832 */
1833
612f09e8
YH
1834static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1835 int ipstats_mib_noroutes)
1da177e4 1836{
612f09e8
YH
1837 int type;
1838 switch (ipstats_mib_noroutes) {
1839 case IPSTATS_MIB_INNOROUTES:
0660e03f 1840 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1841 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1842 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1843 break;
1844 }
1845 /* FALLTHROUGH */
1846 case IPSTATS_MIB_OUTNOROUTES:
1847 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1848 break;
1849 }
9ce8ade0 1850 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1851 kfree_skb(skb);
1852 return 0;
1853}
1854
9ce8ade0
TG
1855static int ip6_pkt_discard(struct sk_buff *skb)
1856{
612f09e8 1857 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1858}
1859
20380731 1860static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1861{
1862 skb->dev = skb->dst->dev;
612f09e8 1863 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1864}
1865
6723ab54
DM
1866#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1867
9ce8ade0
TG
1868static int ip6_pkt_prohibit(struct sk_buff *skb)
1869{
612f09e8 1870 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1871}
1872
1873static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1874{
1875 skb->dev = skb->dst->dev;
612f09e8 1876 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1877}
1878
1879static int ip6_pkt_blk_hole(struct sk_buff *skb)
1880{
1881 kfree_skb(skb);
1882 return 0;
1883}
1884
6723ab54
DM
1885#endif
1886
1da177e4
LT
1887/*
1888 * Allocate a dst for local (unicast / anycast) address.
1889 */
1890
1891struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1892 const struct in6_addr *addr,
1893 int anycast)
1894{
1895 struct rt6_info *rt = ip6_dst_alloc();
1896
1897 if (rt == NULL)
1898 return ERR_PTR(-ENOMEM);
1899
1900 dev_hold(&loopback_dev);
1901 in6_dev_hold(idev);
1902
1903 rt->u.dst.flags = DST_HOST;
1904 rt->u.dst.input = ip6_input;
1905 rt->u.dst.output = ip6_output;
1906 rt->rt6i_dev = &loopback_dev;
1907 rt->rt6i_idev = idev;
1908 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1909 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1910 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1911 rt->u.dst.obsolete = -1;
1912
1913 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1914 if (anycast)
1915 rt->rt6i_flags |= RTF_ANYCAST;
1916 else
1da177e4
LT
1917 rt->rt6i_flags |= RTF_LOCAL;
1918 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1919 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1920 dst_free(&rt->u.dst);
1da177e4
LT
1921 return ERR_PTR(-ENOMEM);
1922 }
1923
1924 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1925 rt->rt6i_dst.plen = 128;
c71099ac 1926 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1927
1928 atomic_set(&rt->u.dst.__refcnt, 1);
1929
1930 return rt;
1931}
1932
1933static int fib6_ifdown(struct rt6_info *rt, void *arg)
1934{
1935 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1936 rt != &ip6_null_entry) {
1937 RT6_TRACE("deleted by ifdown %p\n", rt);
1938 return -1;
1939 }
1940 return 0;
1941}
1942
1943void rt6_ifdown(struct net_device *dev)
1944{
c71099ac 1945 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1946}
1947
1948struct rt6_mtu_change_arg
1949{
1950 struct net_device *dev;
1951 unsigned mtu;
1952};
1953
1954static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1955{
1956 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1957 struct inet6_dev *idev;
1958
1959 /* In IPv6 pmtu discovery is not optional,
1960 so that RTAX_MTU lock cannot disable it.
1961 We still use this lock to block changes
1962 caused by addrconf/ndisc.
1963 */
1964
1965 idev = __in6_dev_get(arg->dev);
1966 if (idev == NULL)
1967 return 0;
1968
1969 /* For administrative MTU increase, there is no way to discover
1970 IPv6 PMTU increase, so PMTU increase should be updated here.
1971 Since RFC 1981 doesn't include administrative MTU increase
1972 update PMTU increase is a MUST. (i.e. jumbo frame)
1973 */
1974 /*
1975 If new MTU is less than route PMTU, this new MTU will be the
1976 lowest MTU in the path, update the route PMTU to reflect PMTU
1977 decreases; if new MTU is greater than route PMTU, and the
1978 old MTU is the lowest MTU in the path, update the route PMTU
1979 to reflect the increase. In this case if the other nodes' MTU
1980 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1981 PMTU discouvery.
1982 */
1983 if (rt->rt6i_dev == arg->dev &&
1984 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1ab1457c
YH
1985 (dst_mtu(&rt->u.dst) > arg->mtu ||
1986 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 1987 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 1988 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
566cfd8f
SA
1989 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1990 }
1da177e4
LT
1991 return 0;
1992}
1993
1994void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1995{
c71099ac
TG
1996 struct rt6_mtu_change_arg arg = {
1997 .dev = dev,
1998 .mtu = mtu,
1999 };
1da177e4 2000
c71099ac 2001 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2002}
2003
ef7c79ed 2004static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2005 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2006 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2007 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2008 [RTA_PRIORITY] = { .type = NLA_U32 },
2009 [RTA_METRICS] = { .type = NLA_NESTED },
2010};
2011
2012static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2013 struct fib6_config *cfg)
1da177e4 2014{
86872cb5
TG
2015 struct rtmsg *rtm;
2016 struct nlattr *tb[RTA_MAX+1];
2017 int err;
1da177e4 2018
86872cb5
TG
2019 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2020 if (err < 0)
2021 goto errout;
1da177e4 2022
86872cb5
TG
2023 err = -EINVAL;
2024 rtm = nlmsg_data(nlh);
2025 memset(cfg, 0, sizeof(*cfg));
2026
2027 cfg->fc_table = rtm->rtm_table;
2028 cfg->fc_dst_len = rtm->rtm_dst_len;
2029 cfg->fc_src_len = rtm->rtm_src_len;
2030 cfg->fc_flags = RTF_UP;
2031 cfg->fc_protocol = rtm->rtm_protocol;
2032
2033 if (rtm->rtm_type == RTN_UNREACHABLE)
2034 cfg->fc_flags |= RTF_REJECT;
2035
2036 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2037 cfg->fc_nlinfo.nlh = nlh;
2038
2039 if (tb[RTA_GATEWAY]) {
2040 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2041 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2042 }
86872cb5
TG
2043
2044 if (tb[RTA_DST]) {
2045 int plen = (rtm->rtm_dst_len + 7) >> 3;
2046
2047 if (nla_len(tb[RTA_DST]) < plen)
2048 goto errout;
2049
2050 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2051 }
86872cb5
TG
2052
2053 if (tb[RTA_SRC]) {
2054 int plen = (rtm->rtm_src_len + 7) >> 3;
2055
2056 if (nla_len(tb[RTA_SRC]) < plen)
2057 goto errout;
2058
2059 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2060 }
86872cb5
TG
2061
2062 if (tb[RTA_OIF])
2063 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2064
2065 if (tb[RTA_PRIORITY])
2066 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2067
2068 if (tb[RTA_METRICS]) {
2069 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2070 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2071 }
86872cb5
TG
2072
2073 if (tb[RTA_TABLE])
2074 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2075
2076 err = 0;
2077errout:
2078 return err;
1da177e4
LT
2079}
2080
c127ea2c 2081static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2082{
86872cb5
TG
2083 struct fib6_config cfg;
2084 int err;
1da177e4 2085
86872cb5
TG
2086 err = rtm_to_fib6_config(skb, nlh, &cfg);
2087 if (err < 0)
2088 return err;
2089
2090 return ip6_route_del(&cfg);
1da177e4
LT
2091}
2092
c127ea2c 2093static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2094{
86872cb5
TG
2095 struct fib6_config cfg;
2096 int err;
1da177e4 2097
86872cb5
TG
2098 err = rtm_to_fib6_config(skb, nlh, &cfg);
2099 if (err < 0)
2100 return err;
2101
2102 return ip6_route_add(&cfg);
1da177e4
LT
2103}
2104
339bf98f
TG
2105static inline size_t rt6_nlmsg_size(void)
2106{
2107 return NLMSG_ALIGN(sizeof(struct rtmsg))
2108 + nla_total_size(16) /* RTA_SRC */
2109 + nla_total_size(16) /* RTA_DST */
2110 + nla_total_size(16) /* RTA_GATEWAY */
2111 + nla_total_size(16) /* RTA_PREFSRC */
2112 + nla_total_size(4) /* RTA_TABLE */
2113 + nla_total_size(4) /* RTA_IIF */
2114 + nla_total_size(4) /* RTA_OIF */
2115 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2116 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2117 + nla_total_size(sizeof(struct rta_cacheinfo));
2118}
2119
1da177e4 2120static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2121 struct in6_addr *dst, struct in6_addr *src,
2122 int iif, int type, u32 pid, u32 seq,
2123 int prefix, unsigned int flags)
1da177e4
LT
2124{
2125 struct rtmsg *rtm;
2d7202bf 2126 struct nlmsghdr *nlh;
e3703b3d 2127 long expires;
9e762a4a 2128 u32 table;
1da177e4
LT
2129
2130 if (prefix) { /* user wants prefix routes only */
2131 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2132 /* success since this is not a prefix route */
2133 return 1;
2134 }
2135 }
2136
2d7202bf
TG
2137 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2138 if (nlh == NULL)
26932566 2139 return -EMSGSIZE;
2d7202bf
TG
2140
2141 rtm = nlmsg_data(nlh);
1da177e4
LT
2142 rtm->rtm_family = AF_INET6;
2143 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2144 rtm->rtm_src_len = rt->rt6i_src.plen;
2145 rtm->rtm_tos = 0;
c71099ac 2146 if (rt->rt6i_table)
9e762a4a 2147 table = rt->rt6i_table->tb6_id;
c71099ac 2148 else
9e762a4a
PM
2149 table = RT6_TABLE_UNSPEC;
2150 rtm->rtm_table = table;
2d7202bf 2151 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2152 if (rt->rt6i_flags&RTF_REJECT)
2153 rtm->rtm_type = RTN_UNREACHABLE;
2154 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2155 rtm->rtm_type = RTN_LOCAL;
2156 else
2157 rtm->rtm_type = RTN_UNICAST;
2158 rtm->rtm_flags = 0;
2159 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2160 rtm->rtm_protocol = rt->rt6i_protocol;
2161 if (rt->rt6i_flags&RTF_DYNAMIC)
2162 rtm->rtm_protocol = RTPROT_REDIRECT;
2163 else if (rt->rt6i_flags & RTF_ADDRCONF)
2164 rtm->rtm_protocol = RTPROT_KERNEL;
2165 else if (rt->rt6i_flags&RTF_DEFAULT)
2166 rtm->rtm_protocol = RTPROT_RA;
2167
2168 if (rt->rt6i_flags&RTF_CACHE)
2169 rtm->rtm_flags |= RTM_F_CLONED;
2170
2171 if (dst) {
2d7202bf 2172 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2173 rtm->rtm_dst_len = 128;
1da177e4 2174 } else if (rtm->rtm_dst_len)
2d7202bf 2175 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2176#ifdef CONFIG_IPV6_SUBTREES
2177 if (src) {
2d7202bf 2178 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2179 rtm->rtm_src_len = 128;
1da177e4 2180 } else if (rtm->rtm_src_len)
2d7202bf 2181 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2182#endif
2183 if (iif)
2d7202bf 2184 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2185 else if (dst) {
2186 struct in6_addr saddr_buf;
2187 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2188 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2189 }
2d7202bf 2190
1da177e4 2191 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2192 goto nla_put_failure;
2193
1da177e4 2194 if (rt->u.dst.neighbour)
2d7202bf
TG
2195 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2196
1da177e4 2197 if (rt->u.dst.dev)
2d7202bf
TG
2198 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2199
2200 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2201
2202 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2203 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2204 expires, rt->u.dst.error) < 0)
2205 goto nla_put_failure;
2d7202bf
TG
2206
2207 return nlmsg_end(skb, nlh);
2208
2209nla_put_failure:
26932566
PM
2210 nlmsg_cancel(skb, nlh);
2211 return -EMSGSIZE;
1da177e4
LT
2212}
2213
1b43af54 2214int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2215{
2216 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2217 int prefix;
2218
2d7202bf
TG
2219 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2220 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2221 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2222 } else
2223 prefix = 0;
2224
2225 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2226 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2227 prefix, NLM_F_MULTI);
1da177e4
LT
2228}
2229
c127ea2c 2230static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2231{
ab364a6f
TG
2232 struct nlattr *tb[RTA_MAX+1];
2233 struct rt6_info *rt;
1da177e4 2234 struct sk_buff *skb;
ab364a6f 2235 struct rtmsg *rtm;
1da177e4 2236 struct flowi fl;
ab364a6f 2237 int err, iif = 0;
1da177e4 2238
ab364a6f
TG
2239 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2240 if (err < 0)
2241 goto errout;
1da177e4 2242
ab364a6f 2243 err = -EINVAL;
1da177e4 2244 memset(&fl, 0, sizeof(fl));
1da177e4 2245
ab364a6f
TG
2246 if (tb[RTA_SRC]) {
2247 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2248 goto errout;
2249
2250 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2251 }
2252
2253 if (tb[RTA_DST]) {
2254 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2255 goto errout;
2256
2257 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2258 }
2259
2260 if (tb[RTA_IIF])
2261 iif = nla_get_u32(tb[RTA_IIF]);
2262
2263 if (tb[RTA_OIF])
2264 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2265
2266 if (iif) {
2267 struct net_device *dev;
881d966b 2268 dev = __dev_get_by_index(&init_net, iif);
1da177e4
LT
2269 if (!dev) {
2270 err = -ENODEV;
ab364a6f 2271 goto errout;
1da177e4
LT
2272 }
2273 }
2274
ab364a6f
TG
2275 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2276 if (skb == NULL) {
2277 err = -ENOBUFS;
2278 goto errout;
2279 }
1da177e4 2280
ab364a6f
TG
2281 /* Reserve room for dummy headers, this skb can pass
2282 through good chunk of routing engine.
2283 */
459a98ed 2284 skb_reset_mac_header(skb);
ab364a6f 2285 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2286
ab364a6f 2287 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2288 skb->dst = &rt->u.dst;
2289
ab364a6f 2290 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2291 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2292 nlh->nlmsg_seq, 0, 0);
1da177e4 2293 if (err < 0) {
ab364a6f
TG
2294 kfree_skb(skb);
2295 goto errout;
1da177e4
LT
2296 }
2297
2942e900 2298 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
ab364a6f 2299errout:
1da177e4 2300 return err;
1da177e4
LT
2301}
2302
86872cb5 2303void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2304{
2305 struct sk_buff *skb;
86872cb5
TG
2306 u32 pid = 0, seq = 0;
2307 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2308 int err = -ENOBUFS;
2309
86872cb5
TG
2310 if (info) {
2311 pid = info->pid;
2312 nlh = info->nlh;
2313 if (nlh)
2314 seq = nlh->nlmsg_seq;
2315 }
2316
339bf98f 2317 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2318 if (skb == NULL)
2319 goto errout;
2320
2321 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
26932566
PM
2322 if (err < 0) {
2323 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2324 WARN_ON(err == -EMSGSIZE);
2325 kfree_skb(skb);
2326 goto errout;
2327 }
21713ebc
TG
2328 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2329errout:
2330 if (err < 0)
2331 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2332}
2333
2334/*
2335 * /proc
2336 */
2337
2338#ifdef CONFIG_PROC_FS
2339
2340#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2341
2342struct rt6_proc_arg
2343{
2344 char *buffer;
2345 int offset;
2346 int length;
2347 int skip;
2348 int len;
2349};
2350
2351static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2352{
2353 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1da177e4
LT
2354
2355 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2356 arg->skip++;
2357 return 0;
2358 }
2359
2360 if (arg->len >= arg->length)
2361 return 0;
2362
33e93c96
YH
2363 arg->len += sprintf(arg->buffer + arg->len,
2364 NIP6_SEQFMT " %02x ",
2365 NIP6(rt->rt6i_dst.addr),
1da177e4
LT
2366 rt->rt6i_dst.plen);
2367
2368#ifdef CONFIG_IPV6_SUBTREES
33e93c96
YH
2369 arg->len += sprintf(arg->buffer + arg->len,
2370 NIP6_SEQFMT " %02x ",
2371 NIP6(rt->rt6i_src.addr),
1da177e4
LT
2372 rt->rt6i_src.plen);
2373#else
33e93c96
YH
2374 arg->len += sprintf(arg->buffer + arg->len,
2375 "00000000000000000000000000000000 00 ");
1da177e4
LT
2376#endif
2377
2378 if (rt->rt6i_nexthop) {
33e93c96
YH
2379 arg->len += sprintf(arg->buffer + arg->len,
2380 NIP6_SEQFMT,
2381 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2382 } else {
33e93c96
YH
2383 arg->len += sprintf(arg->buffer + arg->len,
2384 "00000000000000000000000000000000");
1da177e4
LT
2385 }
2386 arg->len += sprintf(arg->buffer + arg->len,
2387 " %08x %08x %08x %08x %8s\n",
2388 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1ab1457c 2389 rt->u.dst.__use, rt->rt6i_flags,
1da177e4
LT
2390 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2391 return 0;
2392}
2393
2394static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2395{
c71099ac
TG
2396 struct rt6_proc_arg arg = {
2397 .buffer = buffer,
2398 .offset = offset,
2399 .length = length,
2400 };
1da177e4 2401
c71099ac 2402 fib6_clean_all(rt6_info_route, 0, &arg);
1da177e4
LT
2403
2404 *start = buffer;
2405 if (offset)
2406 *start += offset % RT6_INFO_LEN;
2407
2408 arg.len -= offset % RT6_INFO_LEN;
2409
2410 if (arg.len > length)
2411 arg.len = length;
2412 if (arg.len < 0)
2413 arg.len = 0;
2414
2415 return arg.len;
2416}
2417
1da177e4
LT
2418static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2419{
2420 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2421 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2422 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2423 rt6_stats.fib_rt_cache,
2424 atomic_read(&ip6_dst_ops.entries),
2425 rt6_stats.fib_discarded_routes);
2426
2427 return 0;
2428}
2429
2430static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2431{
2432 return single_open(file, rt6_stats_seq_show, NULL);
2433}
2434
9a32144e 2435static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2436 .owner = THIS_MODULE,
2437 .open = rt6_stats_seq_open,
2438 .read = seq_read,
2439 .llseek = seq_lseek,
2440 .release = single_release,
2441};
2442#endif /* CONFIG_PROC_FS */
2443
2444#ifdef CONFIG_SYSCTL
2445
2446static int flush_delay;
2447
2448static
2449int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2450 void __user *buffer, size_t *lenp, loff_t *ppos)
2451{
2452 if (write) {
2453 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2454 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2455 return 0;
2456 } else
2457 return -EINVAL;
2458}
2459
2460ctl_table ipv6_route_table[] = {
1ab1457c
YH
2461 {
2462 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1da177e4 2463 .procname = "flush",
1ab1457c 2464 .data = &flush_delay,
1da177e4 2465 .maxlen = sizeof(int),
89c8b3a1 2466 .mode = 0200,
1ab1457c 2467 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2468 },
2469 {
2470 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2471 .procname = "gc_thresh",
1ab1457c 2472 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2473 .maxlen = sizeof(int),
2474 .mode = 0644,
1ab1457c 2475 .proc_handler = &proc_dointvec,
1da177e4
LT
2476 },
2477 {
2478 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2479 .procname = "max_size",
1ab1457c 2480 .data = &ip6_rt_max_size,
1da177e4
LT
2481 .maxlen = sizeof(int),
2482 .mode = 0644,
1ab1457c 2483 .proc_handler = &proc_dointvec,
1da177e4
LT
2484 },
2485 {
2486 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2487 .procname = "gc_min_interval",
1ab1457c 2488 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2489 .maxlen = sizeof(int),
2490 .mode = 0644,
1ab1457c 2491 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2492 .strategy = &sysctl_jiffies,
2493 },
2494 {
2495 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2496 .procname = "gc_timeout",
1ab1457c 2497 .data = &ip6_rt_gc_timeout,
1da177e4
LT
2498 .maxlen = sizeof(int),
2499 .mode = 0644,
1ab1457c 2500 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2501 .strategy = &sysctl_jiffies,
2502 },
2503 {
2504 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2505 .procname = "gc_interval",
1ab1457c 2506 .data = &ip6_rt_gc_interval,
1da177e4
LT
2507 .maxlen = sizeof(int),
2508 .mode = 0644,
1ab1457c 2509 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2510 .strategy = &sysctl_jiffies,
2511 },
2512 {
2513 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2514 .procname = "gc_elasticity",
1ab1457c 2515 .data = &ip6_rt_gc_elasticity,
1da177e4
LT
2516 .maxlen = sizeof(int),
2517 .mode = 0644,
1ab1457c 2518 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2519 .strategy = &sysctl_jiffies,
2520 },
2521 {
2522 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2523 .procname = "mtu_expires",
1ab1457c 2524 .data = &ip6_rt_mtu_expires,
1da177e4
LT
2525 .maxlen = sizeof(int),
2526 .mode = 0644,
1ab1457c 2527 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2528 .strategy = &sysctl_jiffies,
2529 },
2530 {
2531 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2532 .procname = "min_adv_mss",
1ab1457c 2533 .data = &ip6_rt_min_advmss,
1da177e4
LT
2534 .maxlen = sizeof(int),
2535 .mode = 0644,
1ab1457c 2536 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2537 .strategy = &sysctl_jiffies,
2538 },
2539 {
2540 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2541 .procname = "gc_min_interval_ms",
1ab1457c 2542 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2543 .maxlen = sizeof(int),
2544 .mode = 0644,
1ab1457c 2545 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2546 .strategy = &sysctl_ms_jiffies,
2547 },
2548 { .ctl_name = 0 }
2549};
2550
2551#endif
2552
2553void __init ip6_route_init(void)
2554{
952a10be 2555#ifdef CONFIG_PROC_FS
1da177e4 2556 struct proc_dir_entry *p;
952a10be 2557#endif
e5d679f3
AD
2558 ip6_dst_ops.kmem_cachep =
2559 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
20c2df83 2560 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
14e50e57
DM
2561 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2562
1da177e4
LT
2563 fib6_init();
2564#ifdef CONFIG_PROC_FS
457c4cbc 2565 p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
1da177e4
LT
2566 if (p)
2567 p->owner = THIS_MODULE;
2568
457c4cbc 2569 proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
1da177e4
LT
2570#endif
2571#ifdef CONFIG_XFRM
2572 xfrm6_init();
2573#endif
101367c2
TG
2574#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2575 fib6_rules_init();
2576#endif
c127ea2c
TG
2577
2578 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2579 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2580 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
1da177e4
LT
2581}
2582
2583void ip6_route_cleanup(void)
2584{
101367c2
TG
2585#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2586 fib6_rules_cleanup();
2587#endif
1da177e4 2588#ifdef CONFIG_PROC_FS
457c4cbc
EB
2589 proc_net_remove(&init_net, "ipv6_route");
2590 proc_net_remove(&init_net, "rt6_stats");
1da177e4
LT
2591#endif
2592#ifdef CONFIG_XFRM
2593 xfrm6_fini();
2594#endif
2595 rt6_ifdown(NULL);
2596 fib6_gc_cleanup();
2597 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2598}