]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[NETNS][IPV6] rt6_info - make rt6_info accessed as a pointer
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
1da177e4
LT
100static struct dst_ops ip6_dst_ops = {
101 .family = AF_INET6,
102 .protocol = __constant_htons(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
862b82c6 111 .local_out = ip6_local_out,
1da177e4 112 .entry_size = sizeof(struct rt6_info),
e2422970 113 .entries = ATOMIC_INIT(0),
1da177e4
LT
114};
115
14e50e57
DM
116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
e2422970 127 .entries = ATOMIC_INIT(0),
14e50e57
DM
128};
129
bdb3289f 130static struct rt6_info ip6_null_entry_template = {
1da177e4
LT
131 .u = {
132 .dst = {
133 .__refcnt = ATOMIC_INIT(1),
134 .__use = 1,
1da177e4
LT
135 .obsolete = -1,
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
140 .ops = &ip6_dst_ops,
1da177e4
LT
141 }
142 },
143 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
144 .rt6i_metric = ~(u32) 0,
145 .rt6i_ref = ATOMIC_INIT(1),
146};
147
bdb3289f
DL
148struct rt6_info *ip6_null_entry;
149
101367c2
TG
150#ifdef CONFIG_IPV6_MULTIPLE_TABLES
151
6723ab54
DM
152static int ip6_pkt_prohibit(struct sk_buff *skb);
153static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 154
bdb3289f 155struct rt6_info ip6_prohibit_entry_template = {
101367c2
TG
156 .u = {
157 .dst = {
158 .__refcnt = ATOMIC_INIT(1),
159 .__use = 1,
101367c2
TG
160 .obsolete = -1,
161 .error = -EACCES,
162 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
163 .input = ip6_pkt_prohibit,
164 .output = ip6_pkt_prohibit_out,
101367c2 165 .ops = &ip6_dst_ops,
101367c2
TG
166 }
167 },
168 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
169 .rt6i_metric = ~(u32) 0,
170 .rt6i_ref = ATOMIC_INIT(1),
171};
172
bdb3289f
DL
173struct rt6_info *ip6_prohibit_entry;
174
175static struct rt6_info ip6_blk_hole_entry_template = {
101367c2
TG
176 .u = {
177 .dst = {
178 .__refcnt = ATOMIC_INIT(1),
179 .__use = 1,
101367c2
TG
180 .obsolete = -1,
181 .error = -EINVAL,
182 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
183 .input = dst_discard,
184 .output = dst_discard,
101367c2 185 .ops = &ip6_dst_ops,
101367c2
TG
186 }
187 },
188 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
189 .rt6i_metric = ~(u32) 0,
190 .rt6i_ref = ATOMIC_INIT(1),
191};
192
bdb3289f
DL
193struct rt6_info *ip6_blk_hole_entry;
194
101367c2
TG
195#endif
196
1da177e4
LT
197/* allocate dst with ip6_dst_ops */
198static __inline__ struct rt6_info *ip6_dst_alloc(void)
199{
200 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
201}
202
203static void ip6_dst_destroy(struct dst_entry *dst)
204{
205 struct rt6_info *rt = (struct rt6_info *)dst;
206 struct inet6_dev *idev = rt->rt6i_idev;
207
208 if (idev != NULL) {
209 rt->rt6i_idev = NULL;
210 in6_dev_put(idev);
1ab1457c 211 }
1da177e4
LT
212}
213
214static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
215 int how)
216{
217 struct rt6_info *rt = (struct rt6_info *)dst;
218 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6
DL
219 struct net_device *loopback_dev =
220 dev->nd_net->loopback_dev;
1da177e4 221
5a3e55d6
DL
222 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
223 struct inet6_dev *loopback_idev =
224 in6_dev_get(loopback_dev);
1da177e4
LT
225 if (loopback_idev != NULL) {
226 rt->rt6i_idev = loopback_idev;
227 in6_dev_put(idev);
228 }
229 }
230}
231
232static __inline__ int rt6_check_expired(const struct rt6_info *rt)
233{
234 return (rt->rt6i_flags & RTF_EXPIRES &&
235 time_after(jiffies, rt->rt6i_expires));
236}
237
c71099ac
TG
238static inline int rt6_need_strict(struct in6_addr *daddr)
239{
240 return (ipv6_addr_type(daddr) &
241 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
242}
243
1da177e4 244/*
c71099ac 245 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
246 */
247
248static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
249 int oif,
250 int strict)
251{
252 struct rt6_info *local = NULL;
253 struct rt6_info *sprt;
254
255 if (oif) {
7cc48263 256 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
257 struct net_device *dev = sprt->rt6i_dev;
258 if (dev->ifindex == oif)
259 return sprt;
260 if (dev->flags & IFF_LOOPBACK) {
261 if (sprt->rt6i_idev == NULL ||
262 sprt->rt6i_idev->dev->ifindex != oif) {
263 if (strict && oif)
264 continue;
1ab1457c 265 if (local && (!oif ||
1da177e4
LT
266 local->rt6i_idev->dev->ifindex == oif))
267 continue;
268 }
269 local = sprt;
270 }
271 }
272
273 if (local)
274 return local;
275
276 if (strict)
bdb3289f 277 return ip6_null_entry;
1da177e4
LT
278 }
279 return rt;
280}
281
27097255
YH
282#ifdef CONFIG_IPV6_ROUTER_PREF
283static void rt6_probe(struct rt6_info *rt)
284{
285 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
286 /*
287 * Okay, this does not seem to be appropriate
288 * for now, however, we need to check if it
289 * is really so; aka Router Reachability Probing.
290 *
291 * Router Reachability Probe MUST be rate-limited
292 * to no more than one per minute.
293 */
294 if (!neigh || (neigh->nud_state & NUD_VALID))
295 return;
296 read_lock_bh(&neigh->lock);
297 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 298 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
299 struct in6_addr mcaddr;
300 struct in6_addr *target;
301
302 neigh->updated = jiffies;
303 read_unlock_bh(&neigh->lock);
304
305 target = (struct in6_addr *)&neigh->primary_key;
306 addrconf_addr_solict_mult(target, &mcaddr);
307 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
308 } else
309 read_unlock_bh(&neigh->lock);
310}
311#else
312static inline void rt6_probe(struct rt6_info *rt)
313{
314 return;
315}
316#endif
317
1da177e4 318/*
554cfb7e 319 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 320 */
b6f99a21 321static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
322{
323 struct net_device *dev = rt->rt6i_dev;
161980f4 324 if (!oif || dev->ifindex == oif)
554cfb7e 325 return 2;
161980f4
DM
326 if ((dev->flags & IFF_LOOPBACK) &&
327 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
328 return 1;
329 return 0;
554cfb7e 330}
1da177e4 331
b6f99a21 332static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 333{
554cfb7e 334 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 335 int m;
4d0c5911
YH
336 if (rt->rt6i_flags & RTF_NONEXTHOP ||
337 !(rt->rt6i_flags & RTF_GATEWAY))
338 m = 1;
339 else if (neigh) {
554cfb7e
YH
340 read_lock_bh(&neigh->lock);
341 if (neigh->nud_state & NUD_VALID)
4d0c5911 342 m = 2;
398bcbeb
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344 else if (neigh->nud_state & NUD_FAILED)
345 m = 0;
346#endif
347 else
ea73ee23 348 m = 1;
554cfb7e 349 read_unlock_bh(&neigh->lock);
398bcbeb
YH
350 } else
351 m = 0;
554cfb7e 352 return m;
1da177e4
LT
353}
354
554cfb7e
YH
355static int rt6_score_route(struct rt6_info *rt, int oif,
356 int strict)
1da177e4 357{
4d0c5911 358 int m, n;
1ab1457c 359
4d0c5911 360 m = rt6_check_dev(rt, oif);
77d16f45 361 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 362 return -1;
ebacaaa0
YH
363#ifdef CONFIG_IPV6_ROUTER_PREF
364 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
365#endif
4d0c5911 366 n = rt6_check_neigh(rt);
557e92ef 367 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
368 return -1;
369 return m;
370}
371
f11e6659
DM
372static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
373 int *mpri, struct rt6_info *match)
554cfb7e 374{
f11e6659
DM
375 int m;
376
377 if (rt6_check_expired(rt))
378 goto out;
379
380 m = rt6_score_route(rt, oif, strict);
381 if (m < 0)
382 goto out;
383
384 if (m > *mpri) {
385 if (strict & RT6_LOOKUP_F_REACHABLE)
386 rt6_probe(match);
387 *mpri = m;
388 match = rt;
389 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
390 rt6_probe(rt);
391 }
392
393out:
394 return match;
395}
396
397static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
398 struct rt6_info *rr_head,
399 u32 metric, int oif, int strict)
400{
401 struct rt6_info *rt, *match;
554cfb7e 402 int mpri = -1;
1da177e4 403
f11e6659
DM
404 match = NULL;
405 for (rt = rr_head; rt && rt->rt6i_metric == metric;
406 rt = rt->u.dst.rt6_next)
407 match = find_match(rt, oif, strict, &mpri, match);
408 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
409 rt = rt->u.dst.rt6_next)
410 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 411
f11e6659
DM
412 return match;
413}
1da177e4 414
f11e6659
DM
415static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
416{
417 struct rt6_info *match, *rt0;
1da177e4 418
f11e6659
DM
419 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
420 __FUNCTION__, fn->leaf, oif);
554cfb7e 421
f11e6659
DM
422 rt0 = fn->rr_ptr;
423 if (!rt0)
424 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 425
f11e6659 426 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 427
554cfb7e 428 if (!match &&
f11e6659
DM
429 (strict & RT6_LOOKUP_F_REACHABLE)) {
430 struct rt6_info *next = rt0->u.dst.rt6_next;
431
554cfb7e 432 /* no entries matched; do round-robin */
f11e6659
DM
433 if (!next || next->rt6i_metric != rt0->rt6i_metric)
434 next = fn->leaf;
435
436 if (next != rt0)
437 fn->rr_ptr = next;
1da177e4 438 }
1da177e4 439
f11e6659
DM
440 RT6_TRACE("%s() => %p\n",
441 __FUNCTION__, match);
1da177e4 442
bdb3289f 443 return (match ? match : ip6_null_entry);
1da177e4
LT
444}
445
70ceb4f5
YH
446#ifdef CONFIG_IPV6_ROUTE_INFO
447int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
448 struct in6_addr *gwaddr)
449{
efa2cea0 450 struct net *net = dev->nd_net;
70ceb4f5
YH
451 struct route_info *rinfo = (struct route_info *) opt;
452 struct in6_addr prefix_buf, *prefix;
453 unsigned int pref;
454 u32 lifetime;
455 struct rt6_info *rt;
456
457 if (len < sizeof(struct route_info)) {
458 return -EINVAL;
459 }
460
461 /* Sanity check for prefix_len and length */
462 if (rinfo->length > 3) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 128) {
465 return -EINVAL;
466 } else if (rinfo->prefix_len > 64) {
467 if (rinfo->length < 2) {
468 return -EINVAL;
469 }
470 } else if (rinfo->prefix_len > 0) {
471 if (rinfo->length < 1) {
472 return -EINVAL;
473 }
474 }
475
476 pref = rinfo->route_pref;
477 if (pref == ICMPV6_ROUTER_PREF_INVALID)
478 pref = ICMPV6_ROUTER_PREF_MEDIUM;
479
e69a4adc 480 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
481 if (lifetime == 0xffffffff) {
482 /* infinity */
483 } else if (lifetime > 0x7fffffff/HZ) {
484 /* Avoid arithmetic overflow */
485 lifetime = 0x7fffffff/HZ - 1;
486 }
487
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
496 }
497
efa2cea0
DL
498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
70ceb4f5
YH
500
501 if (rt && !lifetime) {
e0a1ad73 502 ip6_del_rt(rt);
70ceb4f5
YH
503 rt = NULL;
504 }
505
506 if (!rt && lifetime)
efa2cea0 507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
512
513 if (rt) {
514 if (lifetime == 0xffffffff) {
515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
519 }
520 dst_release(&rt->u.dst);
521 }
522 return 0;
523}
524#endif
525
982f56f3
YH
526#define BACKTRACK(saddr) \
527do { \
bdb3289f 528 if (rt == ip6_null_entry) { \
982f56f3 529 struct fib6_node *pn; \
e0eda7bb 530 while (1) { \
982f56f3
YH
531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
c71099ac 540 } \
c71099ac 541 } \
982f56f3 542} while(0)
c71099ac
TG
543
544static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
545 struct flowi *fl, int flags)
1da177e4
LT
546{
547 struct fib6_node *fn;
548 struct rt6_info *rt;
549
c71099ac
TG
550 read_lock_bh(&table->tb6_lock);
551 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552restart:
553 rt = fn->leaf;
77d16f45 554 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 555 BACKTRACK(&fl->fl6_src);
c71099ac 556out:
03f49f34 557 dst_use(&rt->u.dst, jiffies);
c71099ac 558 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
559 return rt;
560
561}
562
606a2b48
DL
563struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
564 struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
565{
566 struct flowi fl = {
567 .oif = oif,
568 .nl_u = {
569 .ip6_u = {
570 .daddr = *daddr,
c71099ac
TG
571 },
572 },
573 };
574 struct dst_entry *dst;
77d16f45 575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 576
adaa70bb
TG
577 if (saddr) {
578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 }
581
606a2b48 582 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
583 if (dst->error == 0)
584 return (struct rt6_info *) dst;
585
586 dst_release(dst);
587
1da177e4
LT
588 return NULL;
589}
590
7159039a
YH
591EXPORT_SYMBOL(rt6_lookup);
592
c71099ac 593/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
596 be destroyed.
597 */
598
86872cb5 599static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
600{
601 int err;
c71099ac 602 struct fib6_table *table;
1da177e4 603
c71099ac
TG
604 table = rt->rt6i_table;
605 write_lock_bh(&table->tb6_lock);
86872cb5 606 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 607 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
608
609 return err;
610}
611
40e22e8f
TG
612int ip6_ins_rt(struct rt6_info *rt)
613{
4d1169c1 614 struct nl_info info = {
5578689a 615 .nl_net = rt->rt6i_dev->nd_net,
4d1169c1 616 };
528c4ceb 617 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
618}
619
95a9a5ba
YH
620static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
621 struct in6_addr *saddr)
1da177e4 622{
1da177e4
LT
623 struct rt6_info *rt;
624
625 /*
626 * Clone the route.
627 */
628
629 rt = ip6_rt_copy(ort);
630
631 if (rt) {
58c4fb86
YH
632 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
633 if (rt->rt6i_dst.plen != 128 &&
634 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
635 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 636 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 637 }
1da177e4 638
58c4fb86 639 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
640 rt->rt6i_dst.plen = 128;
641 rt->rt6i_flags |= RTF_CACHE;
642 rt->u.dst.flags |= DST_HOST;
643
644#ifdef CONFIG_IPV6_SUBTREES
645 if (rt->rt6i_src.plen && saddr) {
646 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
647 rt->rt6i_src.plen = 128;
648 }
649#endif
650
651 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
652
95a9a5ba 653 }
1da177e4 654
95a9a5ba
YH
655 return rt;
656}
1da177e4 657
299d9939
YH
658static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659{
660 struct rt6_info *rt = ip6_rt_copy(ort);
661 if (rt) {
662 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663 rt->rt6i_dst.plen = 128;
664 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
665 rt->u.dst.flags |= DST_HOST;
666 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667 }
668 return rt;
669}
670
4acad72d 671static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
8ce11e6a 672 struct flowi *fl, int flags)
1da177e4
LT
673{
674 struct fib6_node *fn;
519fbd87 675 struct rt6_info *rt, *nrt;
c71099ac 676 int strict = 0;
1da177e4 677 int attempts = 3;
519fbd87 678 int err;
ea659e07 679 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 680
77d16f45 681 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
682
683relookup:
c71099ac 684 read_lock_bh(&table->tb6_lock);
1da177e4 685
8238dd06 686restart_2:
c71099ac 687 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
688
689restart:
4acad72d 690 rt = rt6_select(fn, oif, strict | reachable);
982f56f3 691 BACKTRACK(&fl->fl6_src);
bdb3289f 692 if (rt == ip6_null_entry ||
8238dd06 693 rt->rt6i_flags & RTF_CACHE)
1ddef044 694 goto out;
1da177e4 695
fb9de91e 696 dst_hold(&rt->u.dst);
c71099ac 697 read_unlock_bh(&table->tb6_lock);
fb9de91e 698
519fbd87 699 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 700 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
701 else {
702#if CLONE_OFFLINK_ROUTE
c71099ac 703 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
704#else
705 goto out2;
706#endif
707 }
e40cf353 708
519fbd87 709 dst_release(&rt->u.dst);
bdb3289f 710 rt = nrt ? : ip6_null_entry;
1da177e4 711
519fbd87
YH
712 dst_hold(&rt->u.dst);
713 if (nrt) {
40e22e8f 714 err = ip6_ins_rt(nrt);
519fbd87 715 if (!err)
1da177e4 716 goto out2;
1da177e4 717 }
1da177e4 718
519fbd87
YH
719 if (--attempts <= 0)
720 goto out2;
721
722 /*
c71099ac 723 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
724 * released someone could insert this route. Relookup.
725 */
726 dst_release(&rt->u.dst);
727 goto relookup;
728
729out:
8238dd06
YH
730 if (reachable) {
731 reachable = 0;
732 goto restart_2;
733 }
519fbd87 734 dst_hold(&rt->u.dst);
c71099ac 735 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
736out2:
737 rt->u.dst.lastuse = jiffies;
738 rt->u.dst.__use++;
c71099ac
TG
739
740 return rt;
1da177e4
LT
741}
742
4acad72d
PE
743static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
744 struct flowi *fl, int flags)
745{
746 return ip6_pol_route(table, fl->iif, fl, flags);
747}
748
c71099ac
TG
749void ip6_route_input(struct sk_buff *skb)
750{
0660e03f 751 struct ipv6hdr *iph = ipv6_hdr(skb);
5578689a 752 struct net *net = skb->dev->nd_net;
adaa70bb 753 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
754 struct flowi fl = {
755 .iif = skb->dev->ifindex,
756 .nl_u = {
757 .ip6_u = {
758 .daddr = iph->daddr,
759 .saddr = iph->saddr,
90bcaf7b 760 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
761 },
762 },
1ab1457c 763 .mark = skb->mark,
c71099ac
TG
764 .proto = iph->nexthdr,
765 };
adaa70bb
TG
766
767 if (rt6_need_strict(&iph->daddr))
768 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 769
5578689a 770 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
c71099ac
TG
771}
772
773static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
774 struct flowi *fl, int flags)
1da177e4 775{
4acad72d 776 return ip6_pol_route(table, fl->oif, fl, flags);
c71099ac
TG
777}
778
779struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
780{
781 int flags = 0;
782
783 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 784 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 785
adaa70bb
TG
786 if (!ipv6_addr_any(&fl->fl6_src))
787 flags |= RT6_LOOKUP_F_HAS_SADDR;
788
58f09b78 789 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
1da177e4
LT
790}
791
7159039a 792EXPORT_SYMBOL(ip6_route_output);
1da177e4 793
14e50e57
DM
794int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
795{
796 struct rt6_info *ort = (struct rt6_info *) *dstp;
797 struct rt6_info *rt = (struct rt6_info *)
798 dst_alloc(&ip6_dst_blackhole_ops);
799 struct dst_entry *new = NULL;
800
801 if (rt) {
802 new = &rt->u.dst;
803
804 atomic_set(&new->__refcnt, 1);
805 new->__use = 1;
352e512c
HX
806 new->input = dst_discard;
807 new->output = dst_discard;
14e50e57
DM
808
809 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
810 new->dev = ort->u.dst.dev;
811 if (new->dev)
812 dev_hold(new->dev);
813 rt->rt6i_idev = ort->rt6i_idev;
814 if (rt->rt6i_idev)
815 in6_dev_hold(rt->rt6i_idev);
816 rt->rt6i_expires = 0;
817
818 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
819 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
820 rt->rt6i_metric = 0;
821
822 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
823#ifdef CONFIG_IPV6_SUBTREES
824 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
825#endif
826
827 dst_free(new);
828 }
829
830 dst_release(*dstp);
831 *dstp = new;
832 return (new ? 0 : -ENOMEM);
833}
834EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
835
1da177e4
LT
836/*
837 * Destination cache support functions
838 */
839
840static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
841{
842 struct rt6_info *rt;
843
844 rt = (struct rt6_info *) dst;
845
846 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
847 return dst;
848
849 return NULL;
850}
851
852static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
853{
854 struct rt6_info *rt = (struct rt6_info *) dst;
855
856 if (rt) {
857 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 858 ip6_del_rt(rt);
1da177e4
LT
859 else
860 dst_release(dst);
861 }
862 return NULL;
863}
864
865static void ip6_link_failure(struct sk_buff *skb)
866{
867 struct rt6_info *rt;
868
869 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
870
871 rt = (struct rt6_info *) skb->dst;
872 if (rt) {
873 if (rt->rt6i_flags&RTF_CACHE) {
874 dst_set_expires(&rt->u.dst, 0);
875 rt->rt6i_flags |= RTF_EXPIRES;
876 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
877 rt->rt6i_node->fn_sernum = -1;
878 }
879}
880
881static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
882{
883 struct rt6_info *rt6 = (struct rt6_info*)dst;
884
885 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
886 rt6->rt6i_flags |= RTF_MODIFIED;
887 if (mtu < IPV6_MIN_MTU) {
888 mtu = IPV6_MIN_MTU;
889 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
890 }
891 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 892 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
893 }
894}
895
1da177e4
LT
896static int ipv6_get_mtu(struct net_device *dev);
897
5578689a 898static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
899{
900 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
901
5578689a
DL
902 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
903 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
904
905 /*
1ab1457c
YH
906 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
907 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
908 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
909 * rely only on pmtu discovery"
910 */
911 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
912 mtu = IPV6_MAXPLEN;
913 return mtu;
914}
915
3b00944c
YH
916static struct dst_entry *icmp6_dst_gc_list;
917static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 918
3b00944c 919struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 920 struct neighbour *neigh,
3b00944c 921 struct in6_addr *addr)
1da177e4
LT
922{
923 struct rt6_info *rt;
924 struct inet6_dev *idev = in6_dev_get(dev);
5578689a 925 struct net *net = dev->nd_net;
1da177e4
LT
926
927 if (unlikely(idev == NULL))
928 return NULL;
929
930 rt = ip6_dst_alloc();
931 if (unlikely(rt == NULL)) {
932 in6_dev_put(idev);
933 goto out;
934 }
935
936 dev_hold(dev);
937 if (neigh)
938 neigh_hold(neigh);
939 else
940 neigh = ndisc_get_neigh(dev, addr);
941
942 rt->rt6i_dev = dev;
943 rt->rt6i_idev = idev;
944 rt->rt6i_nexthop = neigh;
945 atomic_set(&rt->u.dst.__refcnt, 1);
946 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
947 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 948 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
3b00944c 949 rt->u.dst.output = ip6_output;
1da177e4
LT
950
951#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
952 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
953 ? DST_HOST
1da177e4
LT
954 : 0;
955 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
956 rt->rt6i_dst.plen = 128;
957#endif
958
3b00944c
YH
959 spin_lock_bh(&icmp6_dst_lock);
960 rt->u.dst.next = icmp6_dst_gc_list;
961 icmp6_dst_gc_list = &rt->u.dst;
962 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 963
5578689a 964 fib6_force_start_gc(net);
1da177e4
LT
965
966out:
40aa7b90 967 return &rt->u.dst;
1da177e4
LT
968}
969
3b00944c 970int icmp6_dst_gc(int *more)
1da177e4
LT
971{
972 struct dst_entry *dst, *next, **pprev;
973 int freed;
974
975 next = NULL;
1ab1457c 976 freed = 0;
5d0bbeeb 977
3b00944c
YH
978 spin_lock_bh(&icmp6_dst_lock);
979 pprev = &icmp6_dst_gc_list;
5d0bbeeb 980
1da177e4
LT
981 while ((dst = *pprev) != NULL) {
982 if (!atomic_read(&dst->__refcnt)) {
983 *pprev = dst->next;
984 dst_free(dst);
985 freed++;
986 } else {
987 pprev = &dst->next;
988 (*more)++;
989 }
990 }
991
3b00944c 992 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 993
1da177e4
LT
994 return freed;
995}
996
569d3645 997static int ip6_dst_gc(struct dst_ops *ops)
1da177e4
LT
998{
999 static unsigned expire = 30*HZ;
1000 static unsigned long last_gc;
1001 unsigned long now = jiffies;
1002
4990509f
DL
1003 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1004 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1da177e4
LT
1005 goto out;
1006
1007 expire++;
5b7c931d 1008 fib6_run_gc(expire, &init_net);
1da177e4
LT
1009 last_gc = now;
1010 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
4990509f 1011 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1da177e4
LT
1012
1013out:
4990509f
DL
1014 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1015 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1da177e4
LT
1016}
1017
1018/* Clean host part of a prefix. Not necessary in radix tree,
1019 but results in cleaner routing tables.
1020
1021 Remove it only when all the things will work!
1022 */
1023
1024static int ipv6_get_mtu(struct net_device *dev)
1025{
1026 int mtu = IPV6_MIN_MTU;
1027 struct inet6_dev *idev;
1028
1029 idev = in6_dev_get(dev);
1030 if (idev) {
1031 mtu = idev->cnf.mtu6;
1032 in6_dev_put(idev);
1033 }
1034 return mtu;
1035}
1036
1037int ipv6_get_hoplimit(struct net_device *dev)
1038{
1039 int hoplimit = ipv6_devconf.hop_limit;
1040 struct inet6_dev *idev;
1041
1042 idev = in6_dev_get(dev);
1043 if (idev) {
1044 hoplimit = idev->cnf.hop_limit;
1045 in6_dev_put(idev);
1046 }
1047 return hoplimit;
1048}
1049
1050/*
1051 *
1052 */
1053
86872cb5 1054int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1055{
1056 int err;
5578689a 1057 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1058 struct rt6_info *rt = NULL;
1059 struct net_device *dev = NULL;
1060 struct inet6_dev *idev = NULL;
c71099ac 1061 struct fib6_table *table;
1da177e4
LT
1062 int addr_type;
1063
86872cb5 1064 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1065 return -EINVAL;
1066#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1067 if (cfg->fc_src_len)
1da177e4
LT
1068 return -EINVAL;
1069#endif
86872cb5 1070 if (cfg->fc_ifindex) {
1da177e4 1071 err = -ENODEV;
5578689a 1072 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1073 if (!dev)
1074 goto out;
1075 idev = in6_dev_get(dev);
1076 if (!idev)
1077 goto out;
1078 }
1079
86872cb5
TG
1080 if (cfg->fc_metric == 0)
1081 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1082
5578689a 1083 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1084 if (table == NULL) {
1085 err = -ENOBUFS;
1086 goto out;
1087 }
1088
1da177e4
LT
1089 rt = ip6_dst_alloc();
1090
1091 if (rt == NULL) {
1092 err = -ENOMEM;
1093 goto out;
1094 }
1095
1096 rt->u.dst.obsolete = -1;
86872cb5 1097 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1098
86872cb5
TG
1099 if (cfg->fc_protocol == RTPROT_UNSPEC)
1100 cfg->fc_protocol = RTPROT_BOOT;
1101 rt->rt6i_protocol = cfg->fc_protocol;
1102
1103 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1104
1105 if (addr_type & IPV6_ADDR_MULTICAST)
1106 rt->u.dst.input = ip6_mc_input;
1107 else
1108 rt->u.dst.input = ip6_forward;
1109
1110 rt->u.dst.output = ip6_output;
1111
86872cb5
TG
1112 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1113 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1114 if (rt->rt6i_dst.plen == 128)
1115 rt->u.dst.flags = DST_HOST;
1116
1117#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1118 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1119 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1120#endif
1121
86872cb5 1122 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1123
1124 /* We cannot add true routes via loopback here,
1125 they would result in kernel looping; promote them to reject routes
1126 */
86872cb5 1127 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1128 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1129 /* hold loopback dev/idev if we haven't done so. */
5578689a 1130 if (dev != net->loopback_dev) {
1da177e4
LT
1131 if (dev) {
1132 dev_put(dev);
1133 in6_dev_put(idev);
1134 }
5578689a 1135 dev = net->loopback_dev;
1da177e4
LT
1136 dev_hold(dev);
1137 idev = in6_dev_get(dev);
1138 if (!idev) {
1139 err = -ENODEV;
1140 goto out;
1141 }
1142 }
1143 rt->u.dst.output = ip6_pkt_discard_out;
1144 rt->u.dst.input = ip6_pkt_discard;
1145 rt->u.dst.error = -ENETUNREACH;
1146 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1147 goto install_route;
1148 }
1149
86872cb5 1150 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1151 struct in6_addr *gw_addr;
1152 int gwa_type;
1153
86872cb5
TG
1154 gw_addr = &cfg->fc_gateway;
1155 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1156 gwa_type = ipv6_addr_type(gw_addr);
1157
1158 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1159 struct rt6_info *grt;
1160
1161 /* IPv6 strictly inhibits using not link-local
1162 addresses as nexthop address.
1163 Otherwise, router will not able to send redirects.
1164 It is very good, but in some (rare!) circumstances
1165 (SIT, PtP, NBMA NOARP links) it is handy to allow
1166 some exceptions. --ANK
1167 */
1168 err = -EINVAL;
1169 if (!(gwa_type&IPV6_ADDR_UNICAST))
1170 goto out;
1171
5578689a 1172 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1173
1174 err = -EHOSTUNREACH;
1175 if (grt == NULL)
1176 goto out;
1177 if (dev) {
1178 if (dev != grt->rt6i_dev) {
1179 dst_release(&grt->u.dst);
1180 goto out;
1181 }
1182 } else {
1183 dev = grt->rt6i_dev;
1184 idev = grt->rt6i_idev;
1185 dev_hold(dev);
1186 in6_dev_hold(grt->rt6i_idev);
1187 }
1188 if (!(grt->rt6i_flags&RTF_GATEWAY))
1189 err = 0;
1190 dst_release(&grt->u.dst);
1191
1192 if (err)
1193 goto out;
1194 }
1195 err = -EINVAL;
1196 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1197 goto out;
1198 }
1199
1200 err = -ENODEV;
1201 if (dev == NULL)
1202 goto out;
1203
86872cb5 1204 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1205 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1206 if (IS_ERR(rt->rt6i_nexthop)) {
1207 err = PTR_ERR(rt->rt6i_nexthop);
1208 rt->rt6i_nexthop = NULL;
1209 goto out;
1210 }
1211 }
1212
86872cb5 1213 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1214
1215install_route:
86872cb5
TG
1216 if (cfg->fc_mx) {
1217 struct nlattr *nla;
1218 int remaining;
1219
1220 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1221 int type = nla_type(nla);
86872cb5
TG
1222
1223 if (type) {
1224 if (type > RTAX_MAX) {
1da177e4
LT
1225 err = -EINVAL;
1226 goto out;
1227 }
86872cb5
TG
1228
1229 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1230 }
1da177e4
LT
1231 }
1232 }
1233
1234 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1235 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1236 if (!rt->u.dst.metrics[RTAX_MTU-1])
1237 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1238 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
5578689a 1239 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1240 rt->u.dst.dev = dev;
1241 rt->rt6i_idev = idev;
c71099ac 1242 rt->rt6i_table = table;
63152fc0
DL
1243
1244 cfg->fc_nlinfo.nl_net = dev->nd_net;
1245
86872cb5 1246 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1247
1248out:
1249 if (dev)
1250 dev_put(dev);
1251 if (idev)
1252 in6_dev_put(idev);
1253 if (rt)
40aa7b90 1254 dst_free(&rt->u.dst);
1da177e4
LT
1255 return err;
1256}
1257
86872cb5 1258static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1259{
1260 int err;
c71099ac 1261 struct fib6_table *table;
1da177e4 1262
bdb3289f 1263 if (rt == ip6_null_entry)
6c813a72
PM
1264 return -ENOENT;
1265
c71099ac
TG
1266 table = rt->rt6i_table;
1267 write_lock_bh(&table->tb6_lock);
1da177e4 1268
86872cb5 1269 err = fib6_del(rt, info);
1da177e4
LT
1270 dst_release(&rt->u.dst);
1271
c71099ac 1272 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1273
1274 return err;
1275}
1276
e0a1ad73
TG
1277int ip6_del_rt(struct rt6_info *rt)
1278{
4d1169c1 1279 struct nl_info info = {
5578689a 1280 .nl_net = rt->rt6i_dev->nd_net,
4d1169c1 1281 };
528c4ceb 1282 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1283}
1284
86872cb5 1285static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1286{
c71099ac 1287 struct fib6_table *table;
1da177e4
LT
1288 struct fib6_node *fn;
1289 struct rt6_info *rt;
1290 int err = -ESRCH;
1291
5578689a 1292 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1293 if (table == NULL)
1294 return err;
1295
1296 read_lock_bh(&table->tb6_lock);
1da177e4 1297
c71099ac 1298 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1299 &cfg->fc_dst, cfg->fc_dst_len,
1300 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1301
1da177e4 1302 if (fn) {
7cc48263 1303 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1304 if (cfg->fc_ifindex &&
1da177e4 1305 (rt->rt6i_dev == NULL ||
86872cb5 1306 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1307 continue;
86872cb5
TG
1308 if (cfg->fc_flags & RTF_GATEWAY &&
1309 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1310 continue;
86872cb5 1311 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1312 continue;
1313 dst_hold(&rt->u.dst);
c71099ac 1314 read_unlock_bh(&table->tb6_lock);
1da177e4 1315
86872cb5 1316 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1317 }
1318 }
c71099ac 1319 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1320
1321 return err;
1322}
1323
1324/*
1325 * Handle redirects
1326 */
a6279458
YH
1327struct ip6rd_flowi {
1328 struct flowi fl;
1329 struct in6_addr gateway;
1330};
1331
1332static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1333 struct flowi *fl,
1334 int flags)
1da177e4 1335{
a6279458
YH
1336 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1337 struct rt6_info *rt;
e843b9e1 1338 struct fib6_node *fn;
c71099ac 1339
1da177e4 1340 /*
e843b9e1
YH
1341 * Get the "current" route for this destination and
1342 * check if the redirect has come from approriate router.
1343 *
1344 * RFC 2461 specifies that redirects should only be
1345 * accepted if they come from the nexthop to the target.
1346 * Due to the way the routes are chosen, this notion
1347 * is a bit fuzzy and one might need to check all possible
1348 * routes.
1da177e4 1349 */
1da177e4 1350
c71099ac 1351 read_lock_bh(&table->tb6_lock);
a6279458 1352 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1353restart:
7cc48263 1354 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1355 /*
1356 * Current route is on-link; redirect is always invalid.
1357 *
1358 * Seems, previous statement is not true. It could
1359 * be node, which looks for us as on-link (f.e. proxy ndisc)
1360 * But then router serving it might decide, that we should
1361 * know truth 8)8) --ANK (980726).
1362 */
1363 if (rt6_check_expired(rt))
1364 continue;
1365 if (!(rt->rt6i_flags & RTF_GATEWAY))
1366 continue;
a6279458 1367 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1368 continue;
a6279458 1369 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1370 continue;
1371 break;
1372 }
a6279458 1373
cb15d9c2 1374 if (!rt)
bdb3289f 1375 rt = ip6_null_entry;
cb15d9c2
YH
1376 BACKTRACK(&fl->fl6_src);
1377out:
a6279458
YH
1378 dst_hold(&rt->u.dst);
1379
c71099ac 1380 read_unlock_bh(&table->tb6_lock);
e843b9e1 1381
a6279458
YH
1382 return rt;
1383};
1384
1385static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1386 struct in6_addr *src,
1387 struct in6_addr *gateway,
1388 struct net_device *dev)
1389{
adaa70bb 1390 int flags = RT6_LOOKUP_F_HAS_SADDR;
5578689a 1391 struct net *net = dev->nd_net;
a6279458
YH
1392 struct ip6rd_flowi rdfl = {
1393 .fl = {
1394 .oif = dev->ifindex,
1395 .nl_u = {
1396 .ip6_u = {
1397 .daddr = *dest,
1398 .saddr = *src,
1399 },
1400 },
1401 },
1402 .gateway = *gateway,
1403 };
adaa70bb
TG
1404
1405 if (rt6_need_strict(dest))
1406 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1407
5578689a 1408 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1409 flags, __ip6_route_redirect);
a6279458
YH
1410}
1411
1412void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1413 struct in6_addr *saddr,
1414 struct neighbour *neigh, u8 *lladdr, int on_link)
1415{
1416 struct rt6_info *rt, *nrt = NULL;
1417 struct netevent_redirect netevent;
1418
1419 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1420
bdb3289f 1421 if (rt == ip6_null_entry) {
1da177e4
LT
1422 if (net_ratelimit())
1423 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1424 "for redirect target\n");
a6279458 1425 goto out;
1da177e4
LT
1426 }
1427
1da177e4
LT
1428 /*
1429 * We have finally decided to accept it.
1430 */
1431
1ab1457c 1432 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1433 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1434 NEIGH_UPDATE_F_OVERRIDE|
1435 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1436 NEIGH_UPDATE_F_ISROUTER))
1437 );
1438
1439 /*
1440 * Redirect received -> path was valid.
1441 * Look, redirects are sent only in response to data packets,
1442 * so that this nexthop apparently is reachable. --ANK
1443 */
1444 dst_confirm(&rt->u.dst);
1445
1446 /* Duplicate redirect: silently ignore. */
1447 if (neigh == rt->u.dst.neighbour)
1448 goto out;
1449
1450 nrt = ip6_rt_copy(rt);
1451 if (nrt == NULL)
1452 goto out;
1453
1454 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1455 if (on_link)
1456 nrt->rt6i_flags &= ~RTF_GATEWAY;
1457
1458 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1459 nrt->rt6i_dst.plen = 128;
1460 nrt->u.dst.flags |= DST_HOST;
1461
1462 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1463 nrt->rt6i_nexthop = neigh_clone(neigh);
1464 /* Reset pmtu, it may be better */
1465 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
5578689a
DL
1466 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1467 dst_mtu(&nrt->u.dst));
1da177e4 1468
40e22e8f 1469 if (ip6_ins_rt(nrt))
1da177e4
LT
1470 goto out;
1471
8d71740c
TT
1472 netevent.old = &rt->u.dst;
1473 netevent.new = &nrt->u.dst;
1474 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1475
1da177e4 1476 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1477 ip6_del_rt(rt);
1da177e4
LT
1478 return;
1479 }
1480
1481out:
1ab1457c 1482 dst_release(&rt->u.dst);
1da177e4
LT
1483 return;
1484}
1485
1486/*
1487 * Handle ICMP "packet too big" messages
1488 * i.e. Path MTU discovery
1489 */
1490
1491void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1492 struct net_device *dev, u32 pmtu)
1493{
1494 struct rt6_info *rt, *nrt;
5578689a 1495 struct net *net = dev->nd_net;
1da177e4
LT
1496 int allfrag = 0;
1497
5578689a 1498 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1da177e4
LT
1499 if (rt == NULL)
1500 return;
1501
1502 if (pmtu >= dst_mtu(&rt->u.dst))
1503 goto out;
1504
1505 if (pmtu < IPV6_MIN_MTU) {
1506 /*
1ab1457c 1507 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1508 * MTU (1280) and a fragment header should always be included
1509 * after a node receiving Too Big message reporting PMTU is
1510 * less than the IPv6 Minimum Link MTU.
1511 */
1512 pmtu = IPV6_MIN_MTU;
1513 allfrag = 1;
1514 }
1515
1516 /* New mtu received -> path was valid.
1517 They are sent only in response to data packets,
1518 so that this nexthop apparently is reachable. --ANK
1519 */
1520 dst_confirm(&rt->u.dst);
1521
1522 /* Host route. If it is static, it would be better
1523 not to override it, but add new one, so that
1524 when cache entry will expire old pmtu
1525 would return automatically.
1526 */
1527 if (rt->rt6i_flags & RTF_CACHE) {
1528 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1529 if (allfrag)
1530 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
5578689a 1531 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1532 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1533 goto out;
1534 }
1535
1536 /* Network route.
1537 Two cases are possible:
1538 1. It is connected route. Action: COW
1539 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1540 */
d5315b50 1541 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1542 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1543 else
1544 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1545
d5315b50 1546 if (nrt) {
a1e78363
YH
1547 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1548 if (allfrag)
1549 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1550
1551 /* According to RFC 1981, detecting PMTU increase shouldn't be
1552 * happened within 5 mins, the recommended timer is 10 mins.
1553 * Here this route expiration time is set to ip6_rt_mtu_expires
1554 * which is 10 mins. After 10 mins the decreased pmtu is expired
1555 * and detecting PMTU increase will be automatically happened.
1556 */
5578689a 1557 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1558 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1559
40e22e8f 1560 ip6_ins_rt(nrt);
1da177e4 1561 }
1da177e4
LT
1562out:
1563 dst_release(&rt->u.dst);
1564}
1565
1566/*
1567 * Misc support functions
1568 */
1569
1570static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1571{
1572 struct rt6_info *rt = ip6_dst_alloc();
1573
1574 if (rt) {
1575 rt->u.dst.input = ort->u.dst.input;
1576 rt->u.dst.output = ort->u.dst.output;
1577
1578 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1579 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1580 rt->u.dst.dev = ort->u.dst.dev;
1581 if (rt->u.dst.dev)
1582 dev_hold(rt->u.dst.dev);
1583 rt->rt6i_idev = ort->rt6i_idev;
1584 if (rt->rt6i_idev)
1585 in6_dev_hold(rt->rt6i_idev);
1586 rt->u.dst.lastuse = jiffies;
1587 rt->rt6i_expires = 0;
1588
1589 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1590 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1591 rt->rt6i_metric = 0;
1592
1593 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1594#ifdef CONFIG_IPV6_SUBTREES
1595 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1596#endif
c71099ac 1597 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1598 }
1599 return rt;
1600}
1601
70ceb4f5 1602#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1603static struct rt6_info *rt6_get_route_info(struct net *net,
1604 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1605 struct in6_addr *gwaddr, int ifindex)
1606{
1607 struct fib6_node *fn;
1608 struct rt6_info *rt = NULL;
c71099ac
TG
1609 struct fib6_table *table;
1610
efa2cea0 1611 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1612 if (table == NULL)
1613 return NULL;
70ceb4f5 1614
c71099ac
TG
1615 write_lock_bh(&table->tb6_lock);
1616 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1617 if (!fn)
1618 goto out;
1619
7cc48263 1620 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1621 if (rt->rt6i_dev->ifindex != ifindex)
1622 continue;
1623 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1624 continue;
1625 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1626 continue;
1627 dst_hold(&rt->u.dst);
1628 break;
1629 }
1630out:
c71099ac 1631 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1632 return rt;
1633}
1634
efa2cea0
DL
1635static struct rt6_info *rt6_add_route_info(struct net *net,
1636 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1637 struct in6_addr *gwaddr, int ifindex,
1638 unsigned pref)
1639{
86872cb5
TG
1640 struct fib6_config cfg = {
1641 .fc_table = RT6_TABLE_INFO,
238fc7ea 1642 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1643 .fc_ifindex = ifindex,
1644 .fc_dst_len = prefixlen,
1645 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1646 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1647 .fc_nlinfo.pid = 0,
1648 .fc_nlinfo.nlh = NULL,
1649 .fc_nlinfo.nl_net = net,
86872cb5
TG
1650 };
1651
1652 ipv6_addr_copy(&cfg.fc_dst, prefix);
1653 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1654
e317da96
YH
1655 /* We should treat it as a default route if prefix length is 0. */
1656 if (!prefixlen)
86872cb5 1657 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1658
86872cb5 1659 ip6_route_add(&cfg);
70ceb4f5 1660
efa2cea0 1661 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1662}
1663#endif
1664
1da177e4 1665struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1666{
1da177e4 1667 struct rt6_info *rt;
c71099ac 1668 struct fib6_table *table;
1da177e4 1669
5578689a 1670 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
c71099ac
TG
1671 if (table == NULL)
1672 return NULL;
1da177e4 1673
c71099ac 1674 write_lock_bh(&table->tb6_lock);
7cc48263 1675 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1676 if (dev == rt->rt6i_dev &&
045927ff 1677 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1678 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1679 break;
1680 }
1681 if (rt)
1682 dst_hold(&rt->u.dst);
c71099ac 1683 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1684 return rt;
1685}
1686
c7dc89c0
FT
1687EXPORT_SYMBOL(rt6_get_dflt_router);
1688
1da177e4 1689struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1690 struct net_device *dev,
1691 unsigned int pref)
1da177e4 1692{
86872cb5
TG
1693 struct fib6_config cfg = {
1694 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1695 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1696 .fc_ifindex = dev->ifindex,
1697 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1698 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1699 .fc_nlinfo.pid = 0,
1700 .fc_nlinfo.nlh = NULL,
1701 .fc_nlinfo.nl_net = dev->nd_net,
86872cb5 1702 };
1da177e4 1703
86872cb5 1704 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1705
86872cb5 1706 ip6_route_add(&cfg);
1da177e4 1707
1da177e4
LT
1708 return rt6_get_dflt_router(gwaddr, dev);
1709}
1710
7b4da532 1711void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1712{
1713 struct rt6_info *rt;
c71099ac
TG
1714 struct fib6_table *table;
1715
1716 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1717 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1718 if (table == NULL)
1719 return;
1da177e4
LT
1720
1721restart:
c71099ac 1722 read_lock_bh(&table->tb6_lock);
7cc48263 1723 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1724 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1725 dst_hold(&rt->u.dst);
c71099ac 1726 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1727 ip6_del_rt(rt);
1da177e4
LT
1728 goto restart;
1729 }
1730 }
c71099ac 1731 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1732}
1733
5578689a
DL
1734static void rtmsg_to_fib6_config(struct net *net,
1735 struct in6_rtmsg *rtmsg,
86872cb5
TG
1736 struct fib6_config *cfg)
1737{
1738 memset(cfg, 0, sizeof(*cfg));
1739
1740 cfg->fc_table = RT6_TABLE_MAIN;
1741 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1742 cfg->fc_metric = rtmsg->rtmsg_metric;
1743 cfg->fc_expires = rtmsg->rtmsg_info;
1744 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1745 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1746 cfg->fc_flags = rtmsg->rtmsg_flags;
1747
5578689a 1748 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1749
86872cb5
TG
1750 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1751 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1752 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1753}
1754
5578689a 1755int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1756{
86872cb5 1757 struct fib6_config cfg;
1da177e4
LT
1758 struct in6_rtmsg rtmsg;
1759 int err;
1760
1761 switch(cmd) {
1762 case SIOCADDRT: /* Add a route */
1763 case SIOCDELRT: /* Delete a route */
1764 if (!capable(CAP_NET_ADMIN))
1765 return -EPERM;
1766 err = copy_from_user(&rtmsg, arg,
1767 sizeof(struct in6_rtmsg));
1768 if (err)
1769 return -EFAULT;
86872cb5 1770
5578689a 1771 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1772
1da177e4
LT
1773 rtnl_lock();
1774 switch (cmd) {
1775 case SIOCADDRT:
86872cb5 1776 err = ip6_route_add(&cfg);
1da177e4
LT
1777 break;
1778 case SIOCDELRT:
86872cb5 1779 err = ip6_route_del(&cfg);
1da177e4
LT
1780 break;
1781 default:
1782 err = -EINVAL;
1783 }
1784 rtnl_unlock();
1785
1786 return err;
3ff50b79 1787 }
1da177e4
LT
1788
1789 return -EINVAL;
1790}
1791
1792/*
1793 * Drop the packet on the floor
1794 */
1795
50eb431d 1796static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1da177e4 1797{
612f09e8
YH
1798 int type;
1799 switch (ipstats_mib_noroutes) {
1800 case IPSTATS_MIB_INNOROUTES:
0660e03f 1801 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1802 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1803 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1804 break;
1805 }
1806 /* FALLTHROUGH */
1807 case IPSTATS_MIB_OUTNOROUTES:
1808 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1809 break;
1810 }
9ce8ade0 1811 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1812 kfree_skb(skb);
1813 return 0;
1814}
1815
9ce8ade0
TG
1816static int ip6_pkt_discard(struct sk_buff *skb)
1817{
612f09e8 1818 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1819}
1820
20380731 1821static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1822{
1823 skb->dev = skb->dst->dev;
612f09e8 1824 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1825}
1826
6723ab54
DM
1827#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1828
9ce8ade0
TG
1829static int ip6_pkt_prohibit(struct sk_buff *skb)
1830{
612f09e8 1831 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1832}
1833
1834static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1835{
1836 skb->dev = skb->dst->dev;
612f09e8 1837 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1838}
1839
6723ab54
DM
1840#endif
1841
1da177e4
LT
1842/*
1843 * Allocate a dst for local (unicast / anycast) address.
1844 */
1845
1846struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1847 const struct in6_addr *addr,
1848 int anycast)
1849{
5578689a 1850 struct net *net = idev->dev->nd_net;
1da177e4
LT
1851 struct rt6_info *rt = ip6_dst_alloc();
1852
1853 if (rt == NULL)
1854 return ERR_PTR(-ENOMEM);
1855
5578689a 1856 dev_hold(net->loopback_dev);
1da177e4
LT
1857 in6_dev_hold(idev);
1858
1859 rt->u.dst.flags = DST_HOST;
1860 rt->u.dst.input = ip6_input;
1861 rt->u.dst.output = ip6_output;
5578689a 1862 rt->rt6i_dev = net->loopback_dev;
1da177e4
LT
1863 rt->rt6i_idev = idev;
1864 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 1865 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1866 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1867 rt->u.dst.obsolete = -1;
1868
1869 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1870 if (anycast)
1871 rt->rt6i_flags |= RTF_ANYCAST;
1872 else
1da177e4
LT
1873 rt->rt6i_flags |= RTF_LOCAL;
1874 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1875 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1876 dst_free(&rt->u.dst);
1da177e4
LT
1877 return ERR_PTR(-ENOMEM);
1878 }
1879
1880 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1881 rt->rt6i_dst.plen = 128;
5578689a 1882 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4
LT
1883
1884 atomic_set(&rt->u.dst.__refcnt, 1);
1885
1886 return rt;
1887}
1888
1889static int fib6_ifdown(struct rt6_info *rt, void *arg)
1890{
1891 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
bdb3289f 1892 rt != ip6_null_entry) {
1da177e4
LT
1893 RT6_TRACE("deleted by ifdown %p\n", rt);
1894 return -1;
1895 }
1896 return 0;
1897}
1898
f3db4851 1899void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1900{
f3db4851 1901 fib6_clean_all(net, fib6_ifdown, 0, dev);
1da177e4
LT
1902}
1903
1904struct rt6_mtu_change_arg
1905{
1906 struct net_device *dev;
1907 unsigned mtu;
1908};
1909
1910static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1911{
1912 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1913 struct inet6_dev *idev;
5578689a 1914 struct net *net = arg->dev->nd_net;
1da177e4
LT
1915
1916 /* In IPv6 pmtu discovery is not optional,
1917 so that RTAX_MTU lock cannot disable it.
1918 We still use this lock to block changes
1919 caused by addrconf/ndisc.
1920 */
1921
1922 idev = __in6_dev_get(arg->dev);
1923 if (idev == NULL)
1924 return 0;
1925
1926 /* For administrative MTU increase, there is no way to discover
1927 IPv6 PMTU increase, so PMTU increase should be updated here.
1928 Since RFC 1981 doesn't include administrative MTU increase
1929 update PMTU increase is a MUST. (i.e. jumbo frame)
1930 */
1931 /*
1932 If new MTU is less than route PMTU, this new MTU will be the
1933 lowest MTU in the path, update the route PMTU to reflect PMTU
1934 decreases; if new MTU is greater than route PMTU, and the
1935 old MTU is the lowest MTU in the path, update the route PMTU
1936 to reflect the increase. In this case if the other nodes' MTU
1937 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1938 PMTU discouvery.
1939 */
1940 if (rt->rt6i_dev == arg->dev &&
1941 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 1942 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 1943 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 1944 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 1945 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
5578689a 1946 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 1947 }
1da177e4
LT
1948 return 0;
1949}
1950
1951void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1952{
c71099ac
TG
1953 struct rt6_mtu_change_arg arg = {
1954 .dev = dev,
1955 .mtu = mtu,
1956 };
1da177e4 1957
f3db4851 1958 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1959}
1960
ef7c79ed 1961static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 1962 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1963 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1964 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1965 [RTA_PRIORITY] = { .type = NLA_U32 },
1966 [RTA_METRICS] = { .type = NLA_NESTED },
1967};
1968
1969static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1970 struct fib6_config *cfg)
1da177e4 1971{
86872cb5
TG
1972 struct rtmsg *rtm;
1973 struct nlattr *tb[RTA_MAX+1];
1974 int err;
1da177e4 1975
86872cb5
TG
1976 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1977 if (err < 0)
1978 goto errout;
1da177e4 1979
86872cb5
TG
1980 err = -EINVAL;
1981 rtm = nlmsg_data(nlh);
1982 memset(cfg, 0, sizeof(*cfg));
1983
1984 cfg->fc_table = rtm->rtm_table;
1985 cfg->fc_dst_len = rtm->rtm_dst_len;
1986 cfg->fc_src_len = rtm->rtm_src_len;
1987 cfg->fc_flags = RTF_UP;
1988 cfg->fc_protocol = rtm->rtm_protocol;
1989
1990 if (rtm->rtm_type == RTN_UNREACHABLE)
1991 cfg->fc_flags |= RTF_REJECT;
1992
1993 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1994 cfg->fc_nlinfo.nlh = nlh;
2216b483 1995 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
86872cb5
TG
1996
1997 if (tb[RTA_GATEWAY]) {
1998 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1999 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2000 }
86872cb5
TG
2001
2002 if (tb[RTA_DST]) {
2003 int plen = (rtm->rtm_dst_len + 7) >> 3;
2004
2005 if (nla_len(tb[RTA_DST]) < plen)
2006 goto errout;
2007
2008 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2009 }
86872cb5
TG
2010
2011 if (tb[RTA_SRC]) {
2012 int plen = (rtm->rtm_src_len + 7) >> 3;
2013
2014 if (nla_len(tb[RTA_SRC]) < plen)
2015 goto errout;
2016
2017 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2018 }
86872cb5
TG
2019
2020 if (tb[RTA_OIF])
2021 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2022
2023 if (tb[RTA_PRIORITY])
2024 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2025
2026 if (tb[RTA_METRICS]) {
2027 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2028 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2029 }
86872cb5
TG
2030
2031 if (tb[RTA_TABLE])
2032 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2033
2034 err = 0;
2035errout:
2036 return err;
1da177e4
LT
2037}
2038
c127ea2c 2039static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2040{
86872cb5
TG
2041 struct fib6_config cfg;
2042 int err;
1da177e4 2043
86872cb5
TG
2044 err = rtm_to_fib6_config(skb, nlh, &cfg);
2045 if (err < 0)
2046 return err;
2047
2048 return ip6_route_del(&cfg);
1da177e4
LT
2049}
2050
c127ea2c 2051static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2052{
86872cb5
TG
2053 struct fib6_config cfg;
2054 int err;
1da177e4 2055
86872cb5
TG
2056 err = rtm_to_fib6_config(skb, nlh, &cfg);
2057 if (err < 0)
2058 return err;
2059
2060 return ip6_route_add(&cfg);
1da177e4
LT
2061}
2062
339bf98f
TG
2063static inline size_t rt6_nlmsg_size(void)
2064{
2065 return NLMSG_ALIGN(sizeof(struct rtmsg))
2066 + nla_total_size(16) /* RTA_SRC */
2067 + nla_total_size(16) /* RTA_DST */
2068 + nla_total_size(16) /* RTA_GATEWAY */
2069 + nla_total_size(16) /* RTA_PREFSRC */
2070 + nla_total_size(4) /* RTA_TABLE */
2071 + nla_total_size(4) /* RTA_IIF */
2072 + nla_total_size(4) /* RTA_OIF */
2073 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2074 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2075 + nla_total_size(sizeof(struct rta_cacheinfo));
2076}
2077
1da177e4 2078static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2079 struct in6_addr *dst, struct in6_addr *src,
2080 int iif, int type, u32 pid, u32 seq,
2081 int prefix, unsigned int flags)
1da177e4
LT
2082{
2083 struct rtmsg *rtm;
2d7202bf 2084 struct nlmsghdr *nlh;
e3703b3d 2085 long expires;
9e762a4a 2086 u32 table;
1da177e4
LT
2087
2088 if (prefix) { /* user wants prefix routes only */
2089 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2090 /* success since this is not a prefix route */
2091 return 1;
2092 }
2093 }
2094
2d7202bf
TG
2095 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2096 if (nlh == NULL)
26932566 2097 return -EMSGSIZE;
2d7202bf
TG
2098
2099 rtm = nlmsg_data(nlh);
1da177e4
LT
2100 rtm->rtm_family = AF_INET6;
2101 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2102 rtm->rtm_src_len = rt->rt6i_src.plen;
2103 rtm->rtm_tos = 0;
c71099ac 2104 if (rt->rt6i_table)
9e762a4a 2105 table = rt->rt6i_table->tb6_id;
c71099ac 2106 else
9e762a4a
PM
2107 table = RT6_TABLE_UNSPEC;
2108 rtm->rtm_table = table;
2d7202bf 2109 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2110 if (rt->rt6i_flags&RTF_REJECT)
2111 rtm->rtm_type = RTN_UNREACHABLE;
2112 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2113 rtm->rtm_type = RTN_LOCAL;
2114 else
2115 rtm->rtm_type = RTN_UNICAST;
2116 rtm->rtm_flags = 0;
2117 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2118 rtm->rtm_protocol = rt->rt6i_protocol;
2119 if (rt->rt6i_flags&RTF_DYNAMIC)
2120 rtm->rtm_protocol = RTPROT_REDIRECT;
2121 else if (rt->rt6i_flags & RTF_ADDRCONF)
2122 rtm->rtm_protocol = RTPROT_KERNEL;
2123 else if (rt->rt6i_flags&RTF_DEFAULT)
2124 rtm->rtm_protocol = RTPROT_RA;
2125
2126 if (rt->rt6i_flags&RTF_CACHE)
2127 rtm->rtm_flags |= RTM_F_CLONED;
2128
2129 if (dst) {
2d7202bf 2130 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2131 rtm->rtm_dst_len = 128;
1da177e4 2132 } else if (rtm->rtm_dst_len)
2d7202bf 2133 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2134#ifdef CONFIG_IPV6_SUBTREES
2135 if (src) {
2d7202bf 2136 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2137 rtm->rtm_src_len = 128;
1da177e4 2138 } else if (rtm->rtm_src_len)
2d7202bf 2139 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2140#endif
2141 if (iif)
2d7202bf 2142 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2143 else if (dst) {
2144 struct in6_addr saddr_buf;
5e5f3f0f
YH
2145 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2146 dst, &saddr_buf) == 0)
2d7202bf 2147 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2148 }
2d7202bf 2149
1da177e4 2150 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2151 goto nla_put_failure;
2152
1da177e4 2153 if (rt->u.dst.neighbour)
2d7202bf
TG
2154 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2155
1da177e4 2156 if (rt->u.dst.dev)
2d7202bf
TG
2157 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2158
2159 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2160
2161 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2162 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2163 expires, rt->u.dst.error) < 0)
2164 goto nla_put_failure;
2d7202bf
TG
2165
2166 return nlmsg_end(skb, nlh);
2167
2168nla_put_failure:
26932566
PM
2169 nlmsg_cancel(skb, nlh);
2170 return -EMSGSIZE;
1da177e4
LT
2171}
2172
1b43af54 2173int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2174{
2175 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2176 int prefix;
2177
2d7202bf
TG
2178 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2179 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2180 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2181 } else
2182 prefix = 0;
2183
2184 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2185 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2186 prefix, NLM_F_MULTI);
1da177e4
LT
2187}
2188
c127ea2c 2189static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2190{
b854272b 2191 struct net *net = in_skb->sk->sk_net;
ab364a6f
TG
2192 struct nlattr *tb[RTA_MAX+1];
2193 struct rt6_info *rt;
1da177e4 2194 struct sk_buff *skb;
ab364a6f 2195 struct rtmsg *rtm;
1da177e4 2196 struct flowi fl;
ab364a6f 2197 int err, iif = 0;
1da177e4 2198
ab364a6f
TG
2199 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2200 if (err < 0)
2201 goto errout;
1da177e4 2202
ab364a6f 2203 err = -EINVAL;
1da177e4 2204 memset(&fl, 0, sizeof(fl));
1da177e4 2205
ab364a6f
TG
2206 if (tb[RTA_SRC]) {
2207 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2208 goto errout;
2209
2210 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2211 }
2212
2213 if (tb[RTA_DST]) {
2214 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2215 goto errout;
2216
2217 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2218 }
2219
2220 if (tb[RTA_IIF])
2221 iif = nla_get_u32(tb[RTA_IIF]);
2222
2223 if (tb[RTA_OIF])
2224 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2225
2226 if (iif) {
2227 struct net_device *dev;
5578689a 2228 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2229 if (!dev) {
2230 err = -ENODEV;
ab364a6f 2231 goto errout;
1da177e4
LT
2232 }
2233 }
2234
ab364a6f
TG
2235 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2236 if (skb == NULL) {
2237 err = -ENOBUFS;
2238 goto errout;
2239 }
1da177e4 2240
ab364a6f
TG
2241 /* Reserve room for dummy headers, this skb can pass
2242 through good chunk of routing engine.
2243 */
459a98ed 2244 skb_reset_mac_header(skb);
ab364a6f 2245 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2246
ab364a6f 2247 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2248 skb->dst = &rt->u.dst;
2249
ab364a6f 2250 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2251 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2252 nlh->nlmsg_seq, 0, 0);
1da177e4 2253 if (err < 0) {
ab364a6f
TG
2254 kfree_skb(skb);
2255 goto errout;
1da177e4
LT
2256 }
2257
5578689a 2258 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2259errout:
1da177e4 2260 return err;
1da177e4
LT
2261}
2262
86872cb5 2263void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2264{
2265 struct sk_buff *skb;
5578689a 2266 struct net *net = info->nl_net;
528c4ceb
DL
2267 u32 seq;
2268 int err;
2269
2270 err = -ENOBUFS;
2271 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2272
339bf98f 2273 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2274 if (skb == NULL)
2275 goto errout;
2276
528c4ceb
DL
2277 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2278 event, info->pid, seq, 0, 0);
26932566
PM
2279 if (err < 0) {
2280 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2281 WARN_ON(err == -EMSGSIZE);
2282 kfree_skb(skb);
2283 goto errout;
2284 }
5578689a
DL
2285 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2286 info->nlh, gfp_any());
21713ebc
TG
2287errout:
2288 if (err < 0)
5578689a 2289 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2290}
2291
2292/*
2293 * /proc
2294 */
2295
2296#ifdef CONFIG_PROC_FS
2297
2298#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2299
2300struct rt6_proc_arg
2301{
2302 char *buffer;
2303 int offset;
2304 int length;
2305 int skip;
2306 int len;
2307};
2308
2309static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2310{
33120b30 2311 struct seq_file *m = p_arg;
1da177e4 2312
33120b30
AD
2313 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2314 rt->rt6i_dst.plen);
1da177e4
LT
2315
2316#ifdef CONFIG_IPV6_SUBTREES
33120b30
AD
2317 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2318 rt->rt6i_src.plen);
1da177e4 2319#else
33120b30 2320 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2321#endif
2322
2323 if (rt->rt6i_nexthop) {
33120b30
AD
2324 seq_printf(m, NIP6_SEQFMT,
2325 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2326 } else {
33120b30 2327 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2328 }
33120b30
AD
2329 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2330 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2331 rt->u.dst.__use, rt->rt6i_flags,
2332 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2333 return 0;
2334}
2335
33120b30 2336static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2337{
f3db4851
DL
2338 struct net *net = (struct net *)m->private;
2339 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2340 return 0;
2341}
1da177e4 2342
33120b30
AD
2343static int ipv6_route_open(struct inode *inode, struct file *file)
2344{
f3db4851
DL
2345 struct net *net = get_proc_net(inode);
2346 if (!net)
2347 return -ENXIO;
2348 return single_open(file, ipv6_route_show, net);
2349}
2350
2351static int ipv6_route_release(struct inode *inode, struct file *file)
2352{
2353 struct seq_file *seq = file->private_data;
2354 struct net *net = seq->private;
2355 put_net(net);
2356 return single_release(inode, file);
1da177e4
LT
2357}
2358
33120b30
AD
2359static const struct file_operations ipv6_route_proc_fops = {
2360 .owner = THIS_MODULE,
2361 .open = ipv6_route_open,
2362 .read = seq_read,
2363 .llseek = seq_lseek,
f3db4851 2364 .release = ipv6_route_release,
33120b30
AD
2365};
2366
1da177e4
LT
2367static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2368{
69ddb805 2369 struct net *net = (struct net *)seq->private;
1da177e4 2370 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2371 net->ipv6.rt6_stats->fib_nodes,
2372 net->ipv6.rt6_stats->fib_route_nodes,
2373 net->ipv6.rt6_stats->fib_rt_alloc,
2374 net->ipv6.rt6_stats->fib_rt_entries,
2375 net->ipv6.rt6_stats->fib_rt_cache,
c572872f 2376 atomic_read(&ip6_dst_ops.entries),
69ddb805 2377 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2378
2379 return 0;
2380}
2381
2382static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2383{
69ddb805
DL
2384 struct net *net = get_proc_net(inode);
2385 return single_open(file, rt6_stats_seq_show, net);
2386}
2387
2388static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2389{
2390 struct seq_file *seq = file->private_data;
2391 struct net *net = (struct net *)seq->private;
2392 put_net(net);
2393 return single_release(inode, file);
1da177e4
LT
2394}
2395
9a32144e 2396static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2397 .owner = THIS_MODULE,
2398 .open = rt6_stats_seq_open,
2399 .read = seq_read,
2400 .llseek = seq_lseek,
69ddb805 2401 .release = rt6_stats_seq_release,
1da177e4
LT
2402};
2403#endif /* CONFIG_PROC_FS */
2404
2405#ifdef CONFIG_SYSCTL
2406
1da177e4
LT
2407static
2408int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2409 void __user *buffer, size_t *lenp, loff_t *ppos)
2410{
5b7c931d
DL
2411 struct net *net = current->nsproxy->net_ns;
2412 int delay = net->ipv6.sysctl.flush_delay;
1da177e4
LT
2413 if (write) {
2414 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
5b7c931d 2415 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2416 return 0;
2417 } else
2418 return -EINVAL;
2419}
2420
760f2d01 2421ctl_table ipv6_route_table_template[] = {
1ab1457c 2422 {
1da177e4 2423 .procname = "flush",
4990509f 2424 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2425 .maxlen = sizeof(int),
89c8b3a1 2426 .mode = 0200,
1ab1457c 2427 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2428 },
2429 {
2430 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2431 .procname = "gc_thresh",
1ab1457c 2432 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2433 .maxlen = sizeof(int),
2434 .mode = 0644,
1ab1457c 2435 .proc_handler = &proc_dointvec,
1da177e4
LT
2436 },
2437 {
2438 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2439 .procname = "max_size",
4990509f 2440 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2441 .maxlen = sizeof(int),
2442 .mode = 0644,
1ab1457c 2443 .proc_handler = &proc_dointvec,
1da177e4
LT
2444 },
2445 {
2446 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2447 .procname = "gc_min_interval",
4990509f 2448 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2449 .maxlen = sizeof(int),
2450 .mode = 0644,
1ab1457c 2451 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2452 .strategy = &sysctl_jiffies,
2453 },
2454 {
2455 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2456 .procname = "gc_timeout",
4990509f 2457 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2458 .maxlen = sizeof(int),
2459 .mode = 0644,
1ab1457c 2460 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2461 .strategy = &sysctl_jiffies,
2462 },
2463 {
2464 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2465 .procname = "gc_interval",
4990509f 2466 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2467 .maxlen = sizeof(int),
2468 .mode = 0644,
1ab1457c 2469 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2470 .strategy = &sysctl_jiffies,
2471 },
2472 {
2473 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2474 .procname = "gc_elasticity",
4990509f 2475 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2476 .maxlen = sizeof(int),
2477 .mode = 0644,
1ab1457c 2478 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2479 .strategy = &sysctl_jiffies,
2480 },
2481 {
2482 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2483 .procname = "mtu_expires",
4990509f 2484 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2485 .maxlen = sizeof(int),
2486 .mode = 0644,
1ab1457c 2487 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2488 .strategy = &sysctl_jiffies,
2489 },
2490 {
2491 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2492 .procname = "min_adv_mss",
4990509f 2493 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2494 .maxlen = sizeof(int),
2495 .mode = 0644,
1ab1457c 2496 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2497 .strategy = &sysctl_jiffies,
2498 },
2499 {
2500 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2501 .procname = "gc_min_interval_ms",
4990509f 2502 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2503 .maxlen = sizeof(int),
2504 .mode = 0644,
1ab1457c 2505 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2506 .strategy = &sysctl_ms_jiffies,
2507 },
2508 { .ctl_name = 0 }
2509};
2510
760f2d01
DL
2511struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2512{
2513 struct ctl_table *table;
2514
2515 table = kmemdup(ipv6_route_table_template,
2516 sizeof(ipv6_route_table_template),
2517 GFP_KERNEL);
5ee09105
YH
2518
2519 if (table) {
2520 table[0].data = &net->ipv6.sysctl.flush_delay;
2521 /* table[1].data will be handled when we have
2522 routes per namespace */
2523 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2524 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2525 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2526 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2527 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2528 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2529 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2530 }
2531
760f2d01
DL
2532 return table;
2533}
1da177e4
LT
2534#endif
2535
cdb18761
DL
2536static int ip6_route_net_init(struct net *net)
2537{
2538#ifdef CONFIG_PROC_FS
2539 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2540 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2541#endif
2542 return 0;
2543}
2544
2545static void ip6_route_net_exit(struct net *net)
2546{
2547#ifdef CONFIG_PROC_FS
2548 proc_net_remove(net, "ipv6_route");
2549 proc_net_remove(net, "rt6_stats");
2550#endif
5578689a 2551 rt6_ifdown(net, NULL);
cdb18761
DL
2552}
2553
2554static struct pernet_operations ip6_route_net_ops = {
2555 .init = ip6_route_net_init,
2556 .exit = ip6_route_net_exit,
2557};
2558
433d49c3 2559int __init ip6_route_init(void)
1da177e4 2560{
433d49c3
DL
2561 int ret;
2562
e5d679f3
AD
2563 ip6_dst_ops.kmem_cachep =
2564 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b
DL
2565 SLAB_HWCACHE_ALIGN, NULL);
2566 if (!ip6_dst_ops.kmem_cachep)
2567 return -ENOMEM;
2568
14e50e57
DM
2569 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2570
bdb3289f
DL
2571 ret = -ENOMEM;
2572 ip6_null_entry = kmemdup(&ip6_null_entry_template,
2573 sizeof(*ip6_null_entry), GFP_KERNEL);
2574 if (!ip6_null_entry)
2575 goto out_kmem_cache;
2576 ip6_null_entry->u.dst.path = (struct dst_entry *)ip6_null_entry;
2577
2578#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2579 ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2580 sizeof(*ip6_prohibit_entry), GFP_KERNEL);
2581 if (!ip6_prohibit_entry)
2582 goto out_ip6_null_entry;
2583 ip6_prohibit_entry->u.dst.path = (struct dst_entry *)ip6_prohibit_entry;
2584
2585 ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2586 sizeof(*ip6_blk_hole_entry), GFP_KERNEL);
2587 if (!ip6_blk_hole_entry)
2588 goto out_ip6_prohibit_entry;
2589 ip6_blk_hole_entry->u.dst.path = (struct dst_entry *)ip6_blk_hole_entry;
2590#endif
2591
433d49c3
DL
2592 ret = fib6_init();
2593 if (ret)
bdb3289f 2594 goto out_ip6_blk_hole_entry;
433d49c3 2595
433d49c3
DL
2596 ret = xfrm6_init();
2597 if (ret)
cdb18761 2598 goto out_fib6_init;
c35b7e72 2599
433d49c3
DL
2600 ret = fib6_rules_init();
2601 if (ret)
2602 goto xfrm6_init;
7e5449c2 2603
433d49c3
DL
2604 ret = -ENOBUFS;
2605 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2606 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2607 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2608 goto fib6_rules_init;
c127ea2c 2609
cdb18761
DL
2610 ret = register_pernet_subsys(&ip6_route_net_ops);
2611 if (ret)
2612 goto fib6_rules_init;
433d49c3
DL
2613out:
2614 return ret;
2615
2616fib6_rules_init:
433d49c3
DL
2617 fib6_rules_cleanup();
2618xfrm6_init:
433d49c3 2619 xfrm6_fini();
433d49c3 2620out_fib6_init:
433d49c3 2621 fib6_gc_cleanup();
bdb3289f
DL
2622out_ip6_blk_hole_entry:
2623#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2624 kfree(ip6_blk_hole_entry);
2625out_ip6_prohibit_entry:
2626 kfree(ip6_prohibit_entry);
2627out_ip6_null_entry:
2628#endif
2629 kfree(ip6_null_entry);
433d49c3
DL
2630out_kmem_cache:
2631 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2632 goto out;
1da177e4
LT
2633}
2634
2635void ip6_route_cleanup(void)
2636{
cdb18761 2637 unregister_pernet_subsys(&ip6_route_net_ops);
101367c2 2638 fib6_rules_cleanup();
1da177e4 2639 xfrm6_fini();
1da177e4
LT
2640 fib6_gc_cleanup();
2641 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
bdb3289f
DL
2642
2643 kfree(ip6_null_entry);
2644#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2645 kfree(ip6_prohibit_entry);
2646 kfree(ip6_blk_hole_entry);
2647#endif
1da177e4 2648}