]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
[NETNS][IPV6] route6 - make route6 per namespace
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void             ip6_dst_destroy(struct dst_entry *);
81 static void             ip6_dst_ifdown(struct dst_entry *,
82                                        struct net_device *dev, int how);
83 static int               ip6_dst_gc(struct dst_ops *ops);
84
85 static int              ip6_pkt_discard(struct sk_buff *skb);
86 static int              ip6_pkt_discard_out(struct sk_buff *skb);
87 static void             ip6_link_failure(struct sk_buff *skb);
88 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92                                            struct in6_addr *prefix, int prefixlen,
93                                            struct in6_addr *gwaddr, int ifindex,
94                                            unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96                                            struct in6_addr *prefix, int prefixlen,
97                                            struct in6_addr *gwaddr, int ifindex);
98 #endif
99
100 static struct dst_ops ip6_dst_ops = {
101         .family                 =       AF_INET6,
102         .protocol               =       __constant_htons(ETH_P_IPV6),
103         .gc                     =       ip6_dst_gc,
104         .gc_thresh              =       1024,
105         .check                  =       ip6_dst_check,
106         .destroy                =       ip6_dst_destroy,
107         .ifdown                 =       ip6_dst_ifdown,
108         .negative_advice        =       ip6_negative_advice,
109         .link_failure           =       ip6_link_failure,
110         .update_pmtu            =       ip6_rt_update_pmtu,
111         .local_out              =       ip6_local_out,
112         .entry_size             =       sizeof(struct rt6_info),
113         .entries                =       ATOMIC_INIT(0),
114 };
115
116 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117 {
118 }
119
120 static struct dst_ops ip6_dst_blackhole_ops = {
121         .family                 =       AF_INET6,
122         .protocol               =       __constant_htons(ETH_P_IPV6),
123         .destroy                =       ip6_dst_destroy,
124         .check                  =       ip6_dst_check,
125         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
126         .entry_size             =       sizeof(struct rt6_info),
127         .entries                =       ATOMIC_INIT(0),
128 };
129
130 struct rt6_info ip6_null_entry = {
131         .u = {
132                 .dst = {
133                         .__refcnt       = ATOMIC_INIT(1),
134                         .__use          = 1,
135                         .obsolete       = -1,
136                         .error          = -ENETUNREACH,
137                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
138                         .input          = ip6_pkt_discard,
139                         .output         = ip6_pkt_discard_out,
140                         .ops            = &ip6_dst_ops,
141                         .path           = (struct dst_entry*)&ip6_null_entry,
142                 }
143         },
144         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
145         .rt6i_metric    = ~(u32) 0,
146         .rt6i_ref       = ATOMIC_INIT(1),
147 };
148
149 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
150
151 static int ip6_pkt_prohibit(struct sk_buff *skb);
152 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
153
154 struct rt6_info ip6_prohibit_entry = {
155         .u = {
156                 .dst = {
157                         .__refcnt       = ATOMIC_INIT(1),
158                         .__use          = 1,
159                         .obsolete       = -1,
160                         .error          = -EACCES,
161                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
162                         .input          = ip6_pkt_prohibit,
163                         .output         = ip6_pkt_prohibit_out,
164                         .ops            = &ip6_dst_ops,
165                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
166                 }
167         },
168         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
169         .rt6i_metric    = ~(u32) 0,
170         .rt6i_ref       = ATOMIC_INIT(1),
171 };
172
173 struct rt6_info ip6_blk_hole_entry = {
174         .u = {
175                 .dst = {
176                         .__refcnt       = ATOMIC_INIT(1),
177                         .__use          = 1,
178                         .obsolete       = -1,
179                         .error          = -EINVAL,
180                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
181                         .input          = dst_discard,
182                         .output         = dst_discard,
183                         .ops            = &ip6_dst_ops,
184                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
185                 }
186         },
187         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
188         .rt6i_metric    = ~(u32) 0,
189         .rt6i_ref       = ATOMIC_INIT(1),
190 };
191
192 #endif
193
194 /* allocate dst with ip6_dst_ops */
195 static __inline__ struct rt6_info *ip6_dst_alloc(void)
196 {
197         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
198 }
199
200 static void ip6_dst_destroy(struct dst_entry *dst)
201 {
202         struct rt6_info *rt = (struct rt6_info *)dst;
203         struct inet6_dev *idev = rt->rt6i_idev;
204
205         if (idev != NULL) {
206                 rt->rt6i_idev = NULL;
207                 in6_dev_put(idev);
208         }
209 }
210
211 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
212                            int how)
213 {
214         struct rt6_info *rt = (struct rt6_info *)dst;
215         struct inet6_dev *idev = rt->rt6i_idev;
216         struct net_device *loopback_dev =
217                 dev->nd_net->loopback_dev;
218
219         if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
220                 struct inet6_dev *loopback_idev =
221                         in6_dev_get(loopback_dev);
222                 if (loopback_idev != NULL) {
223                         rt->rt6i_idev = loopback_idev;
224                         in6_dev_put(idev);
225                 }
226         }
227 }
228
229 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
230 {
231         return (rt->rt6i_flags & RTF_EXPIRES &&
232                 time_after(jiffies, rt->rt6i_expires));
233 }
234
235 static inline int rt6_need_strict(struct in6_addr *daddr)
236 {
237         return (ipv6_addr_type(daddr) &
238                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
239 }
240
241 /*
242  *      Route lookup. Any table->tb6_lock is implied.
243  */
244
245 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
246                                                     int oif,
247                                                     int strict)
248 {
249         struct rt6_info *local = NULL;
250         struct rt6_info *sprt;
251
252         if (oif) {
253                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
254                         struct net_device *dev = sprt->rt6i_dev;
255                         if (dev->ifindex == oif)
256                                 return sprt;
257                         if (dev->flags & IFF_LOOPBACK) {
258                                 if (sprt->rt6i_idev == NULL ||
259                                     sprt->rt6i_idev->dev->ifindex != oif) {
260                                         if (strict && oif)
261                                                 continue;
262                                         if (local && (!oif ||
263                                                       local->rt6i_idev->dev->ifindex == oif))
264                                                 continue;
265                                 }
266                                 local = sprt;
267                         }
268                 }
269
270                 if (local)
271                         return local;
272
273                 if (strict)
274                         return &ip6_null_entry;
275         }
276         return rt;
277 }
278
279 #ifdef CONFIG_IPV6_ROUTER_PREF
280 static void rt6_probe(struct rt6_info *rt)
281 {
282         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
283         /*
284          * Okay, this does not seem to be appropriate
285          * for now, however, we need to check if it
286          * is really so; aka Router Reachability Probing.
287          *
288          * Router Reachability Probe MUST be rate-limited
289          * to no more than one per minute.
290          */
291         if (!neigh || (neigh->nud_state & NUD_VALID))
292                 return;
293         read_lock_bh(&neigh->lock);
294         if (!(neigh->nud_state & NUD_VALID) &&
295             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
296                 struct in6_addr mcaddr;
297                 struct in6_addr *target;
298
299                 neigh->updated = jiffies;
300                 read_unlock_bh(&neigh->lock);
301
302                 target = (struct in6_addr *)&neigh->primary_key;
303                 addrconf_addr_solict_mult(target, &mcaddr);
304                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305         } else
306                 read_unlock_bh(&neigh->lock);
307 }
308 #else
309 static inline void rt6_probe(struct rt6_info *rt)
310 {
311         return;
312 }
313 #endif
314
315 /*
316  * Default Router Selection (RFC 2461 6.3.6)
317  */
318 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
319 {
320         struct net_device *dev = rt->rt6i_dev;
321         if (!oif || dev->ifindex == oif)
322                 return 2;
323         if ((dev->flags & IFF_LOOPBACK) &&
324             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
325                 return 1;
326         return 0;
327 }
328
329 static inline int rt6_check_neigh(struct rt6_info *rt)
330 {
331         struct neighbour *neigh = rt->rt6i_nexthop;
332         int m;
333         if (rt->rt6i_flags & RTF_NONEXTHOP ||
334             !(rt->rt6i_flags & RTF_GATEWAY))
335                 m = 1;
336         else if (neigh) {
337                 read_lock_bh(&neigh->lock);
338                 if (neigh->nud_state & NUD_VALID)
339                         m = 2;
340 #ifdef CONFIG_IPV6_ROUTER_PREF
341                 else if (neigh->nud_state & NUD_FAILED)
342                         m = 0;
343 #endif
344                 else
345                         m = 1;
346                 read_unlock_bh(&neigh->lock);
347         } else
348                 m = 0;
349         return m;
350 }
351
352 static int rt6_score_route(struct rt6_info *rt, int oif,
353                            int strict)
354 {
355         int m, n;
356
357         m = rt6_check_dev(rt, oif);
358         if (!m && (strict & RT6_LOOKUP_F_IFACE))
359                 return -1;
360 #ifdef CONFIG_IPV6_ROUTER_PREF
361         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
362 #endif
363         n = rt6_check_neigh(rt);
364         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
365                 return -1;
366         return m;
367 }
368
369 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
370                                    int *mpri, struct rt6_info *match)
371 {
372         int m;
373
374         if (rt6_check_expired(rt))
375                 goto out;
376
377         m = rt6_score_route(rt, oif, strict);
378         if (m < 0)
379                 goto out;
380
381         if (m > *mpri) {
382                 if (strict & RT6_LOOKUP_F_REACHABLE)
383                         rt6_probe(match);
384                 *mpri = m;
385                 match = rt;
386         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
387                 rt6_probe(rt);
388         }
389
390 out:
391         return match;
392 }
393
394 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
395                                      struct rt6_info *rr_head,
396                                      u32 metric, int oif, int strict)
397 {
398         struct rt6_info *rt, *match;
399         int mpri = -1;
400
401         match = NULL;
402         for (rt = rr_head; rt && rt->rt6i_metric == metric;
403              rt = rt->u.dst.rt6_next)
404                 match = find_match(rt, oif, strict, &mpri, match);
405         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
406              rt = rt->u.dst.rt6_next)
407                 match = find_match(rt, oif, strict, &mpri, match);
408
409         return match;
410 }
411
412 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
413 {
414         struct rt6_info *match, *rt0;
415
416         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
417                   __FUNCTION__, fn->leaf, oif);
418
419         rt0 = fn->rr_ptr;
420         if (!rt0)
421                 fn->rr_ptr = rt0 = fn->leaf;
422
423         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
424
425         if (!match &&
426             (strict & RT6_LOOKUP_F_REACHABLE)) {
427                 struct rt6_info *next = rt0->u.dst.rt6_next;
428
429                 /* no entries matched; do round-robin */
430                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
431                         next = fn->leaf;
432
433                 if (next != rt0)
434                         fn->rr_ptr = next;
435         }
436
437         RT6_TRACE("%s() => %p\n",
438                   __FUNCTION__, match);
439
440         return (match ? match : &ip6_null_entry);
441 }
442
443 #ifdef CONFIG_IPV6_ROUTE_INFO
444 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445                   struct in6_addr *gwaddr)
446 {
447         struct net *net = dev->nd_net;
448         struct route_info *rinfo = (struct route_info *) opt;
449         struct in6_addr prefix_buf, *prefix;
450         unsigned int pref;
451         u32 lifetime;
452         struct rt6_info *rt;
453
454         if (len < sizeof(struct route_info)) {
455                 return -EINVAL;
456         }
457
458         /* Sanity check for prefix_len and length */
459         if (rinfo->length > 3) {
460                 return -EINVAL;
461         } else if (rinfo->prefix_len > 128) {
462                 return -EINVAL;
463         } else if (rinfo->prefix_len > 64) {
464                 if (rinfo->length < 2) {
465                         return -EINVAL;
466                 }
467         } else if (rinfo->prefix_len > 0) {
468                 if (rinfo->length < 1) {
469                         return -EINVAL;
470                 }
471         }
472
473         pref = rinfo->route_pref;
474         if (pref == ICMPV6_ROUTER_PREF_INVALID)
475                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
477         lifetime = ntohl(rinfo->lifetime);
478         if (lifetime == 0xffffffff) {
479                 /* infinity */
480         } else if (lifetime > 0x7fffffff/HZ) {
481                 /* Avoid arithmetic overflow */
482                 lifetime = 0x7fffffff/HZ - 1;
483         }
484
485         if (rinfo->length == 3)
486                 prefix = (struct in6_addr *)rinfo->prefix;
487         else {
488                 /* this function is safe */
489                 ipv6_addr_prefix(&prefix_buf,
490                                  (struct in6_addr *)rinfo->prefix,
491                                  rinfo->prefix_len);
492                 prefix = &prefix_buf;
493         }
494
495         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496                                 dev->ifindex);
497
498         if (rt && !lifetime) {
499                 ip6_del_rt(rt);
500                 rt = NULL;
501         }
502
503         if (!rt && lifetime)
504                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
505                                         pref);
506         else if (rt)
507                 rt->rt6i_flags = RTF_ROUTEINFO |
508                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510         if (rt) {
511                 if (lifetime == 0xffffffff) {
512                         rt->rt6i_flags &= ~RTF_EXPIRES;
513                 } else {
514                         rt->rt6i_expires = jiffies + HZ * lifetime;
515                         rt->rt6i_flags |= RTF_EXPIRES;
516                 }
517                 dst_release(&rt->u.dst);
518         }
519         return 0;
520 }
521 #endif
522
523 #define BACKTRACK(saddr) \
524 do { \
525         if (rt == &ip6_null_entry) { \
526                 struct fib6_node *pn; \
527                 while (1) { \
528                         if (fn->fn_flags & RTN_TL_ROOT) \
529                                 goto out; \
530                         pn = fn->parent; \
531                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
532                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
533                         else \
534                                 fn = pn; \
535                         if (fn->fn_flags & RTN_RTINFO) \
536                                 goto restart; \
537                 } \
538         } \
539 } while(0)
540
541 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
542                                              struct flowi *fl, int flags)
543 {
544         struct fib6_node *fn;
545         struct rt6_info *rt;
546
547         read_lock_bh(&table->tb6_lock);
548         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
549 restart:
550         rt = fn->leaf;
551         rt = rt6_device_match(rt, fl->oif, flags);
552         BACKTRACK(&fl->fl6_src);
553 out:
554         dst_use(&rt->u.dst, jiffies);
555         read_unlock_bh(&table->tb6_lock);
556         return rt;
557
558 }
559
560 struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
561                             struct in6_addr *saddr, int oif, int strict)
562 {
563         struct flowi fl = {
564                 .oif = oif,
565                 .nl_u = {
566                         .ip6_u = {
567                                 .daddr = *daddr,
568                         },
569                 },
570         };
571         struct dst_entry *dst;
572         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
573
574         if (saddr) {
575                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
576                 flags |= RT6_LOOKUP_F_HAS_SADDR;
577         }
578
579         dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
580         if (dst->error == 0)
581                 return (struct rt6_info *) dst;
582
583         dst_release(dst);
584
585         return NULL;
586 }
587
588 EXPORT_SYMBOL(rt6_lookup);
589
590 /* ip6_ins_rt is called with FREE table->tb6_lock.
591    It takes new route entry, the addition fails by any reason the
592    route is freed. In any case, if caller does not hold it, it may
593    be destroyed.
594  */
595
596 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
597 {
598         int err;
599         struct fib6_table *table;
600
601         table = rt->rt6i_table;
602         write_lock_bh(&table->tb6_lock);
603         err = fib6_add(&table->tb6_root, rt, info);
604         write_unlock_bh(&table->tb6_lock);
605
606         return err;
607 }
608
609 int ip6_ins_rt(struct rt6_info *rt)
610 {
611         struct nl_info info = {
612                 .nl_net = rt->rt6i_dev->nd_net,
613         };
614         return __ip6_ins_rt(rt, &info);
615 }
616
617 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618                                       struct in6_addr *saddr)
619 {
620         struct rt6_info *rt;
621
622         /*
623          *      Clone the route.
624          */
625
626         rt = ip6_rt_copy(ort);
627
628         if (rt) {
629                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630                         if (rt->rt6i_dst.plen != 128 &&
631                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632                                 rt->rt6i_flags |= RTF_ANYCAST;
633                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
634                 }
635
636                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
637                 rt->rt6i_dst.plen = 128;
638                 rt->rt6i_flags |= RTF_CACHE;
639                 rt->u.dst.flags |= DST_HOST;
640
641 #ifdef CONFIG_IPV6_SUBTREES
642                 if (rt->rt6i_src.plen && saddr) {
643                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644                         rt->rt6i_src.plen = 128;
645                 }
646 #endif
647
648                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649
650         }
651
652         return rt;
653 }
654
655 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
656 {
657         struct rt6_info *rt = ip6_rt_copy(ort);
658         if (rt) {
659                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660                 rt->rt6i_dst.plen = 128;
661                 rt->rt6i_flags |= RTF_CACHE;
662                 rt->u.dst.flags |= DST_HOST;
663                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664         }
665         return rt;
666 }
667
668 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
669                                             struct flowi *fl, int flags)
670 {
671         struct fib6_node *fn;
672         struct rt6_info *rt, *nrt;
673         int strict = 0;
674         int attempts = 3;
675         int err;
676         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
677
678         strict |= flags & RT6_LOOKUP_F_IFACE;
679
680 relookup:
681         read_lock_bh(&table->tb6_lock);
682
683 restart_2:
684         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
685
686 restart:
687         rt = rt6_select(fn, oif, strict | reachable);
688         BACKTRACK(&fl->fl6_src);
689         if (rt == &ip6_null_entry ||
690             rt->rt6i_flags & RTF_CACHE)
691                 goto out;
692
693         dst_hold(&rt->u.dst);
694         read_unlock_bh(&table->tb6_lock);
695
696         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
697                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
698         else {
699 #if CLONE_OFFLINK_ROUTE
700                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
701 #else
702                 goto out2;
703 #endif
704         }
705
706         dst_release(&rt->u.dst);
707         rt = nrt ? : &ip6_null_entry;
708
709         dst_hold(&rt->u.dst);
710         if (nrt) {
711                 err = ip6_ins_rt(nrt);
712                 if (!err)
713                         goto out2;
714         }
715
716         if (--attempts <= 0)
717                 goto out2;
718
719         /*
720          * Race condition! In the gap, when table->tb6_lock was
721          * released someone could insert this route.  Relookup.
722          */
723         dst_release(&rt->u.dst);
724         goto relookup;
725
726 out:
727         if (reachable) {
728                 reachable = 0;
729                 goto restart_2;
730         }
731         dst_hold(&rt->u.dst);
732         read_unlock_bh(&table->tb6_lock);
733 out2:
734         rt->u.dst.lastuse = jiffies;
735         rt->u.dst.__use++;
736
737         return rt;
738 }
739
740 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
741                                             struct flowi *fl, int flags)
742 {
743         return ip6_pol_route(table, fl->iif, fl, flags);
744 }
745
746 void ip6_route_input(struct sk_buff *skb)
747 {
748         struct ipv6hdr *iph = ipv6_hdr(skb);
749         struct net *net = skb->dev->nd_net;
750         int flags = RT6_LOOKUP_F_HAS_SADDR;
751         struct flowi fl = {
752                 .iif = skb->dev->ifindex,
753                 .nl_u = {
754                         .ip6_u = {
755                                 .daddr = iph->daddr,
756                                 .saddr = iph->saddr,
757                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
758                         },
759                 },
760                 .mark = skb->mark,
761                 .proto = iph->nexthdr,
762         };
763
764         if (rt6_need_strict(&iph->daddr))
765                 flags |= RT6_LOOKUP_F_IFACE;
766
767         skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
768 }
769
770 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
771                                              struct flowi *fl, int flags)
772 {
773         return ip6_pol_route(table, fl->oif, fl, flags);
774 }
775
776 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
777 {
778         int flags = 0;
779
780         if (rt6_need_strict(&fl->fl6_dst))
781                 flags |= RT6_LOOKUP_F_IFACE;
782
783         if (!ipv6_addr_any(&fl->fl6_src))
784                 flags |= RT6_LOOKUP_F_HAS_SADDR;
785
786         return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
787 }
788
789 EXPORT_SYMBOL(ip6_route_output);
790
791 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
792 {
793         struct rt6_info *ort = (struct rt6_info *) *dstp;
794         struct rt6_info *rt = (struct rt6_info *)
795                 dst_alloc(&ip6_dst_blackhole_ops);
796         struct dst_entry *new = NULL;
797
798         if (rt) {
799                 new = &rt->u.dst;
800
801                 atomic_set(&new->__refcnt, 1);
802                 new->__use = 1;
803                 new->input = dst_discard;
804                 new->output = dst_discard;
805
806                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
807                 new->dev = ort->u.dst.dev;
808                 if (new->dev)
809                         dev_hold(new->dev);
810                 rt->rt6i_idev = ort->rt6i_idev;
811                 if (rt->rt6i_idev)
812                         in6_dev_hold(rt->rt6i_idev);
813                 rt->rt6i_expires = 0;
814
815                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
816                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
817                 rt->rt6i_metric = 0;
818
819                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
820 #ifdef CONFIG_IPV6_SUBTREES
821                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
822 #endif
823
824                 dst_free(new);
825         }
826
827         dst_release(*dstp);
828         *dstp = new;
829         return (new ? 0 : -ENOMEM);
830 }
831 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
832
833 /*
834  *      Destination cache support functions
835  */
836
837 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
838 {
839         struct rt6_info *rt;
840
841         rt = (struct rt6_info *) dst;
842
843         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
844                 return dst;
845
846         return NULL;
847 }
848
849 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
850 {
851         struct rt6_info *rt = (struct rt6_info *) dst;
852
853         if (rt) {
854                 if (rt->rt6i_flags & RTF_CACHE)
855                         ip6_del_rt(rt);
856                 else
857                         dst_release(dst);
858         }
859         return NULL;
860 }
861
862 static void ip6_link_failure(struct sk_buff *skb)
863 {
864         struct rt6_info *rt;
865
866         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
867
868         rt = (struct rt6_info *) skb->dst;
869         if (rt) {
870                 if (rt->rt6i_flags&RTF_CACHE) {
871                         dst_set_expires(&rt->u.dst, 0);
872                         rt->rt6i_flags |= RTF_EXPIRES;
873                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
874                         rt->rt6i_node->fn_sernum = -1;
875         }
876 }
877
878 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
879 {
880         struct rt6_info *rt6 = (struct rt6_info*)dst;
881
882         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
883                 rt6->rt6i_flags |= RTF_MODIFIED;
884                 if (mtu < IPV6_MIN_MTU) {
885                         mtu = IPV6_MIN_MTU;
886                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
887                 }
888                 dst->metrics[RTAX_MTU-1] = mtu;
889                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
890         }
891 }
892
893 static int ipv6_get_mtu(struct net_device *dev);
894
895 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
896 {
897         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
898
899         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
900                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
901
902         /*
903          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
904          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
905          * IPV6_MAXPLEN is also valid and means: "any MSS,
906          * rely only on pmtu discovery"
907          */
908         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
909                 mtu = IPV6_MAXPLEN;
910         return mtu;
911 }
912
913 static struct dst_entry *icmp6_dst_gc_list;
914 static DEFINE_SPINLOCK(icmp6_dst_lock);
915
916 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
917                                   struct neighbour *neigh,
918                                   struct in6_addr *addr)
919 {
920         struct rt6_info *rt;
921         struct inet6_dev *idev = in6_dev_get(dev);
922         struct net *net = dev->nd_net;
923
924         if (unlikely(idev == NULL))
925                 return NULL;
926
927         rt = ip6_dst_alloc();
928         if (unlikely(rt == NULL)) {
929                 in6_dev_put(idev);
930                 goto out;
931         }
932
933         dev_hold(dev);
934         if (neigh)
935                 neigh_hold(neigh);
936         else
937                 neigh = ndisc_get_neigh(dev, addr);
938
939         rt->rt6i_dev      = dev;
940         rt->rt6i_idev     = idev;
941         rt->rt6i_nexthop  = neigh;
942         atomic_set(&rt->u.dst.__refcnt, 1);
943         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
944         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
945         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
946         rt->u.dst.output  = ip6_output;
947
948 #if 0   /* there's no chance to use these for ndisc */
949         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
950                                 ? DST_HOST
951                                 : 0;
952         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
953         rt->rt6i_dst.plen = 128;
954 #endif
955
956         spin_lock_bh(&icmp6_dst_lock);
957         rt->u.dst.next = icmp6_dst_gc_list;
958         icmp6_dst_gc_list = &rt->u.dst;
959         spin_unlock_bh(&icmp6_dst_lock);
960
961         fib6_force_start_gc(net);
962
963 out:
964         return &rt->u.dst;
965 }
966
967 int icmp6_dst_gc(int *more)
968 {
969         struct dst_entry *dst, *next, **pprev;
970         int freed;
971
972         next = NULL;
973         freed = 0;
974
975         spin_lock_bh(&icmp6_dst_lock);
976         pprev = &icmp6_dst_gc_list;
977
978         while ((dst = *pprev) != NULL) {
979                 if (!atomic_read(&dst->__refcnt)) {
980                         *pprev = dst->next;
981                         dst_free(dst);
982                         freed++;
983                 } else {
984                         pprev = &dst->next;
985                         (*more)++;
986                 }
987         }
988
989         spin_unlock_bh(&icmp6_dst_lock);
990
991         return freed;
992 }
993
994 static int ip6_dst_gc(struct dst_ops *ops)
995 {
996         static unsigned expire = 30*HZ;
997         static unsigned long last_gc;
998         unsigned long now = jiffies;
999
1000         if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1001             atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1002                 goto out;
1003
1004         expire++;
1005         fib6_run_gc(expire, &init_net);
1006         last_gc = now;
1007         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1008                 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1009
1010 out:
1011         expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1012         return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1013 }
1014
1015 /* Clean host part of a prefix. Not necessary in radix tree,
1016    but results in cleaner routing tables.
1017
1018    Remove it only when all the things will work!
1019  */
1020
1021 static int ipv6_get_mtu(struct net_device *dev)
1022 {
1023         int mtu = IPV6_MIN_MTU;
1024         struct inet6_dev *idev;
1025
1026         idev = in6_dev_get(dev);
1027         if (idev) {
1028                 mtu = idev->cnf.mtu6;
1029                 in6_dev_put(idev);
1030         }
1031         return mtu;
1032 }
1033
1034 int ipv6_get_hoplimit(struct net_device *dev)
1035 {
1036         int hoplimit = ipv6_devconf.hop_limit;
1037         struct inet6_dev *idev;
1038
1039         idev = in6_dev_get(dev);
1040         if (idev) {
1041                 hoplimit = idev->cnf.hop_limit;
1042                 in6_dev_put(idev);
1043         }
1044         return hoplimit;
1045 }
1046
1047 /*
1048  *
1049  */
1050
1051 int ip6_route_add(struct fib6_config *cfg)
1052 {
1053         int err;
1054         struct net *net = cfg->fc_nlinfo.nl_net;
1055         struct rt6_info *rt = NULL;
1056         struct net_device *dev = NULL;
1057         struct inet6_dev *idev = NULL;
1058         struct fib6_table *table;
1059         int addr_type;
1060
1061         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1062                 return -EINVAL;
1063 #ifndef CONFIG_IPV6_SUBTREES
1064         if (cfg->fc_src_len)
1065                 return -EINVAL;
1066 #endif
1067         if (cfg->fc_ifindex) {
1068                 err = -ENODEV;
1069                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1070                 if (!dev)
1071                         goto out;
1072                 idev = in6_dev_get(dev);
1073                 if (!idev)
1074                         goto out;
1075         }
1076
1077         if (cfg->fc_metric == 0)
1078                 cfg->fc_metric = IP6_RT_PRIO_USER;
1079
1080         table = fib6_new_table(net, cfg->fc_table);
1081         if (table == NULL) {
1082                 err = -ENOBUFS;
1083                 goto out;
1084         }
1085
1086         rt = ip6_dst_alloc();
1087
1088         if (rt == NULL) {
1089                 err = -ENOMEM;
1090                 goto out;
1091         }
1092
1093         rt->u.dst.obsolete = -1;
1094         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1095
1096         if (cfg->fc_protocol == RTPROT_UNSPEC)
1097                 cfg->fc_protocol = RTPROT_BOOT;
1098         rt->rt6i_protocol = cfg->fc_protocol;
1099
1100         addr_type = ipv6_addr_type(&cfg->fc_dst);
1101
1102         if (addr_type & IPV6_ADDR_MULTICAST)
1103                 rt->u.dst.input = ip6_mc_input;
1104         else
1105                 rt->u.dst.input = ip6_forward;
1106
1107         rt->u.dst.output = ip6_output;
1108
1109         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1110         rt->rt6i_dst.plen = cfg->fc_dst_len;
1111         if (rt->rt6i_dst.plen == 128)
1112                rt->u.dst.flags = DST_HOST;
1113
1114 #ifdef CONFIG_IPV6_SUBTREES
1115         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1116         rt->rt6i_src.plen = cfg->fc_src_len;
1117 #endif
1118
1119         rt->rt6i_metric = cfg->fc_metric;
1120
1121         /* We cannot add true routes via loopback here,
1122            they would result in kernel looping; promote them to reject routes
1123          */
1124         if ((cfg->fc_flags & RTF_REJECT) ||
1125             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1126                 /* hold loopback dev/idev if we haven't done so. */
1127                 if (dev != net->loopback_dev) {
1128                         if (dev) {
1129                                 dev_put(dev);
1130                                 in6_dev_put(idev);
1131                         }
1132                         dev = net->loopback_dev;
1133                         dev_hold(dev);
1134                         idev = in6_dev_get(dev);
1135                         if (!idev) {
1136                                 err = -ENODEV;
1137                                 goto out;
1138                         }
1139                 }
1140                 rt->u.dst.output = ip6_pkt_discard_out;
1141                 rt->u.dst.input = ip6_pkt_discard;
1142                 rt->u.dst.error = -ENETUNREACH;
1143                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1144                 goto install_route;
1145         }
1146
1147         if (cfg->fc_flags & RTF_GATEWAY) {
1148                 struct in6_addr *gw_addr;
1149                 int gwa_type;
1150
1151                 gw_addr = &cfg->fc_gateway;
1152                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1153                 gwa_type = ipv6_addr_type(gw_addr);
1154
1155                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1156                         struct rt6_info *grt;
1157
1158                         /* IPv6 strictly inhibits using not link-local
1159                            addresses as nexthop address.
1160                            Otherwise, router will not able to send redirects.
1161                            It is very good, but in some (rare!) circumstances
1162                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1163                            some exceptions. --ANK
1164                          */
1165                         err = -EINVAL;
1166                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1167                                 goto out;
1168
1169                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1170
1171                         err = -EHOSTUNREACH;
1172                         if (grt == NULL)
1173                                 goto out;
1174                         if (dev) {
1175                                 if (dev != grt->rt6i_dev) {
1176                                         dst_release(&grt->u.dst);
1177                                         goto out;
1178                                 }
1179                         } else {
1180                                 dev = grt->rt6i_dev;
1181                                 idev = grt->rt6i_idev;
1182                                 dev_hold(dev);
1183                                 in6_dev_hold(grt->rt6i_idev);
1184                         }
1185                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1186                                 err = 0;
1187                         dst_release(&grt->u.dst);
1188
1189                         if (err)
1190                                 goto out;
1191                 }
1192                 err = -EINVAL;
1193                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1194                         goto out;
1195         }
1196
1197         err = -ENODEV;
1198         if (dev == NULL)
1199                 goto out;
1200
1201         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1202                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1203                 if (IS_ERR(rt->rt6i_nexthop)) {
1204                         err = PTR_ERR(rt->rt6i_nexthop);
1205                         rt->rt6i_nexthop = NULL;
1206                         goto out;
1207                 }
1208         }
1209
1210         rt->rt6i_flags = cfg->fc_flags;
1211
1212 install_route:
1213         if (cfg->fc_mx) {
1214                 struct nlattr *nla;
1215                 int remaining;
1216
1217                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1218                         int type = nla_type(nla);
1219
1220                         if (type) {
1221                                 if (type > RTAX_MAX) {
1222                                         err = -EINVAL;
1223                                         goto out;
1224                                 }
1225
1226                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1227                         }
1228                 }
1229         }
1230
1231         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1232                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1233         if (!rt->u.dst.metrics[RTAX_MTU-1])
1234                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1235         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1236                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1237         rt->u.dst.dev = dev;
1238         rt->rt6i_idev = idev;
1239         rt->rt6i_table = table;
1240
1241         cfg->fc_nlinfo.nl_net = dev->nd_net;
1242
1243         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1244
1245 out:
1246         if (dev)
1247                 dev_put(dev);
1248         if (idev)
1249                 in6_dev_put(idev);
1250         if (rt)
1251                 dst_free(&rt->u.dst);
1252         return err;
1253 }
1254
1255 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1256 {
1257         int err;
1258         struct fib6_table *table;
1259
1260         if (rt == &ip6_null_entry)
1261                 return -ENOENT;
1262
1263         table = rt->rt6i_table;
1264         write_lock_bh(&table->tb6_lock);
1265
1266         err = fib6_del(rt, info);
1267         dst_release(&rt->u.dst);
1268
1269         write_unlock_bh(&table->tb6_lock);
1270
1271         return err;
1272 }
1273
1274 int ip6_del_rt(struct rt6_info *rt)
1275 {
1276         struct nl_info info = {
1277                 .nl_net = rt->rt6i_dev->nd_net,
1278         };
1279         return __ip6_del_rt(rt, &info);
1280 }
1281
1282 static int ip6_route_del(struct fib6_config *cfg)
1283 {
1284         struct fib6_table *table;
1285         struct fib6_node *fn;
1286         struct rt6_info *rt;
1287         int err = -ESRCH;
1288
1289         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1290         if (table == NULL)
1291                 return err;
1292
1293         read_lock_bh(&table->tb6_lock);
1294
1295         fn = fib6_locate(&table->tb6_root,
1296                          &cfg->fc_dst, cfg->fc_dst_len,
1297                          &cfg->fc_src, cfg->fc_src_len);
1298
1299         if (fn) {
1300                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1301                         if (cfg->fc_ifindex &&
1302                             (rt->rt6i_dev == NULL ||
1303                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1304                                 continue;
1305                         if (cfg->fc_flags & RTF_GATEWAY &&
1306                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1307                                 continue;
1308                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1309                                 continue;
1310                         dst_hold(&rt->u.dst);
1311                         read_unlock_bh(&table->tb6_lock);
1312
1313                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1314                 }
1315         }
1316         read_unlock_bh(&table->tb6_lock);
1317
1318         return err;
1319 }
1320
1321 /*
1322  *      Handle redirects
1323  */
1324 struct ip6rd_flowi {
1325         struct flowi fl;
1326         struct in6_addr gateway;
1327 };
1328
1329 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1330                                              struct flowi *fl,
1331                                              int flags)
1332 {
1333         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1334         struct rt6_info *rt;
1335         struct fib6_node *fn;
1336
1337         /*
1338          * Get the "current" route for this destination and
1339          * check if the redirect has come from approriate router.
1340          *
1341          * RFC 2461 specifies that redirects should only be
1342          * accepted if they come from the nexthop to the target.
1343          * Due to the way the routes are chosen, this notion
1344          * is a bit fuzzy and one might need to check all possible
1345          * routes.
1346          */
1347
1348         read_lock_bh(&table->tb6_lock);
1349         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1350 restart:
1351         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1352                 /*
1353                  * Current route is on-link; redirect is always invalid.
1354                  *
1355                  * Seems, previous statement is not true. It could
1356                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1357                  * But then router serving it might decide, that we should
1358                  * know truth 8)8) --ANK (980726).
1359                  */
1360                 if (rt6_check_expired(rt))
1361                         continue;
1362                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1363                         continue;
1364                 if (fl->oif != rt->rt6i_dev->ifindex)
1365                         continue;
1366                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1367                         continue;
1368                 break;
1369         }
1370
1371         if (!rt)
1372                 rt = &ip6_null_entry;
1373         BACKTRACK(&fl->fl6_src);
1374 out:
1375         dst_hold(&rt->u.dst);
1376
1377         read_unlock_bh(&table->tb6_lock);
1378
1379         return rt;
1380 };
1381
1382 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1383                                            struct in6_addr *src,
1384                                            struct in6_addr *gateway,
1385                                            struct net_device *dev)
1386 {
1387         int flags = RT6_LOOKUP_F_HAS_SADDR;
1388         struct net *net = dev->nd_net;
1389         struct ip6rd_flowi rdfl = {
1390                 .fl = {
1391                         .oif = dev->ifindex,
1392                         .nl_u = {
1393                                 .ip6_u = {
1394                                         .daddr = *dest,
1395                                         .saddr = *src,
1396                                 },
1397                         },
1398                 },
1399                 .gateway = *gateway,
1400         };
1401
1402         if (rt6_need_strict(dest))
1403                 flags |= RT6_LOOKUP_F_IFACE;
1404
1405         return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1406                                                    flags, __ip6_route_redirect);
1407 }
1408
1409 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1410                   struct in6_addr *saddr,
1411                   struct neighbour *neigh, u8 *lladdr, int on_link)
1412 {
1413         struct rt6_info *rt, *nrt = NULL;
1414         struct netevent_redirect netevent;
1415
1416         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1417
1418         if (rt == &ip6_null_entry) {
1419                 if (net_ratelimit())
1420                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1421                                "for redirect target\n");
1422                 goto out;
1423         }
1424
1425         /*
1426          *      We have finally decided to accept it.
1427          */
1428
1429         neigh_update(neigh, lladdr, NUD_STALE,
1430                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1431                      NEIGH_UPDATE_F_OVERRIDE|
1432                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1433                                      NEIGH_UPDATE_F_ISROUTER))
1434                      );
1435
1436         /*
1437          * Redirect received -> path was valid.
1438          * Look, redirects are sent only in response to data packets,
1439          * so that this nexthop apparently is reachable. --ANK
1440          */
1441         dst_confirm(&rt->u.dst);
1442
1443         /* Duplicate redirect: silently ignore. */
1444         if (neigh == rt->u.dst.neighbour)
1445                 goto out;
1446
1447         nrt = ip6_rt_copy(rt);
1448         if (nrt == NULL)
1449                 goto out;
1450
1451         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1452         if (on_link)
1453                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1454
1455         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1456         nrt->rt6i_dst.plen = 128;
1457         nrt->u.dst.flags |= DST_HOST;
1458
1459         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1460         nrt->rt6i_nexthop = neigh_clone(neigh);
1461         /* Reset pmtu, it may be better */
1462         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1463         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1464                                                         dst_mtu(&nrt->u.dst));
1465
1466         if (ip6_ins_rt(nrt))
1467                 goto out;
1468
1469         netevent.old = &rt->u.dst;
1470         netevent.new = &nrt->u.dst;
1471         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1472
1473         if (rt->rt6i_flags&RTF_CACHE) {
1474                 ip6_del_rt(rt);
1475                 return;
1476         }
1477
1478 out:
1479         dst_release(&rt->u.dst);
1480         return;
1481 }
1482
1483 /*
1484  *      Handle ICMP "packet too big" messages
1485  *      i.e. Path MTU discovery
1486  */
1487
1488 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1489                         struct net_device *dev, u32 pmtu)
1490 {
1491         struct rt6_info *rt, *nrt;
1492         struct net *net = dev->nd_net;
1493         int allfrag = 0;
1494
1495         rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1496         if (rt == NULL)
1497                 return;
1498
1499         if (pmtu >= dst_mtu(&rt->u.dst))
1500                 goto out;
1501
1502         if (pmtu < IPV6_MIN_MTU) {
1503                 /*
1504                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1505                  * MTU (1280) and a fragment header should always be included
1506                  * after a node receiving Too Big message reporting PMTU is
1507                  * less than the IPv6 Minimum Link MTU.
1508                  */
1509                 pmtu = IPV6_MIN_MTU;
1510                 allfrag = 1;
1511         }
1512
1513         /* New mtu received -> path was valid.
1514            They are sent only in response to data packets,
1515            so that this nexthop apparently is reachable. --ANK
1516          */
1517         dst_confirm(&rt->u.dst);
1518
1519         /* Host route. If it is static, it would be better
1520            not to override it, but add new one, so that
1521            when cache entry will expire old pmtu
1522            would return automatically.
1523          */
1524         if (rt->rt6i_flags & RTF_CACHE) {
1525                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1526                 if (allfrag)
1527                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1528                 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1529                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1530                 goto out;
1531         }
1532
1533         /* Network route.
1534            Two cases are possible:
1535            1. It is connected route. Action: COW
1536            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1537          */
1538         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1539                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1540         else
1541                 nrt = rt6_alloc_clone(rt, daddr);
1542
1543         if (nrt) {
1544                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1545                 if (allfrag)
1546                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1547
1548                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1549                  * happened within 5 mins, the recommended timer is 10 mins.
1550                  * Here this route expiration time is set to ip6_rt_mtu_expires
1551                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1552                  * and detecting PMTU increase will be automatically happened.
1553                  */
1554                 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1555                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1556
1557                 ip6_ins_rt(nrt);
1558         }
1559 out:
1560         dst_release(&rt->u.dst);
1561 }
1562
1563 /*
1564  *      Misc support functions
1565  */
1566
1567 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1568 {
1569         struct rt6_info *rt = ip6_dst_alloc();
1570
1571         if (rt) {
1572                 rt->u.dst.input = ort->u.dst.input;
1573                 rt->u.dst.output = ort->u.dst.output;
1574
1575                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1576                 rt->u.dst.error = ort->u.dst.error;
1577                 rt->u.dst.dev = ort->u.dst.dev;
1578                 if (rt->u.dst.dev)
1579                         dev_hold(rt->u.dst.dev);
1580                 rt->rt6i_idev = ort->rt6i_idev;
1581                 if (rt->rt6i_idev)
1582                         in6_dev_hold(rt->rt6i_idev);
1583                 rt->u.dst.lastuse = jiffies;
1584                 rt->rt6i_expires = 0;
1585
1586                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1587                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1588                 rt->rt6i_metric = 0;
1589
1590                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1591 #ifdef CONFIG_IPV6_SUBTREES
1592                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1593 #endif
1594                 rt->rt6i_table = ort->rt6i_table;
1595         }
1596         return rt;
1597 }
1598
1599 #ifdef CONFIG_IPV6_ROUTE_INFO
1600 static struct rt6_info *rt6_get_route_info(struct net *net,
1601                                            struct in6_addr *prefix, int prefixlen,
1602                                            struct in6_addr *gwaddr, int ifindex)
1603 {
1604         struct fib6_node *fn;
1605         struct rt6_info *rt = NULL;
1606         struct fib6_table *table;
1607
1608         table = fib6_get_table(net, RT6_TABLE_INFO);
1609         if (table == NULL)
1610                 return NULL;
1611
1612         write_lock_bh(&table->tb6_lock);
1613         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1614         if (!fn)
1615                 goto out;
1616
1617         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1618                 if (rt->rt6i_dev->ifindex != ifindex)
1619                         continue;
1620                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1621                         continue;
1622                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1623                         continue;
1624                 dst_hold(&rt->u.dst);
1625                 break;
1626         }
1627 out:
1628         write_unlock_bh(&table->tb6_lock);
1629         return rt;
1630 }
1631
1632 static struct rt6_info *rt6_add_route_info(struct net *net,
1633                                            struct in6_addr *prefix, int prefixlen,
1634                                            struct in6_addr *gwaddr, int ifindex,
1635                                            unsigned pref)
1636 {
1637         struct fib6_config cfg = {
1638                 .fc_table       = RT6_TABLE_INFO,
1639                 .fc_metric      = IP6_RT_PRIO_USER,
1640                 .fc_ifindex     = ifindex,
1641                 .fc_dst_len     = prefixlen,
1642                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1643                                   RTF_UP | RTF_PREF(pref),
1644                 .fc_nlinfo.pid = 0,
1645                 .fc_nlinfo.nlh = NULL,
1646                 .fc_nlinfo.nl_net = net,
1647         };
1648
1649         ipv6_addr_copy(&cfg.fc_dst, prefix);
1650         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1651
1652         /* We should treat it as a default route if prefix length is 0. */
1653         if (!prefixlen)
1654                 cfg.fc_flags |= RTF_DEFAULT;
1655
1656         ip6_route_add(&cfg);
1657
1658         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1659 }
1660 #endif
1661
1662 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1663 {
1664         struct rt6_info *rt;
1665         struct fib6_table *table;
1666
1667         table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
1668         if (table == NULL)
1669                 return NULL;
1670
1671         write_lock_bh(&table->tb6_lock);
1672         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1673                 if (dev == rt->rt6i_dev &&
1674                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1675                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1676                         break;
1677         }
1678         if (rt)
1679                 dst_hold(&rt->u.dst);
1680         write_unlock_bh(&table->tb6_lock);
1681         return rt;
1682 }
1683
1684 EXPORT_SYMBOL(rt6_get_dflt_router);
1685
1686 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1687                                      struct net_device *dev,
1688                                      unsigned int pref)
1689 {
1690         struct fib6_config cfg = {
1691                 .fc_table       = RT6_TABLE_DFLT,
1692                 .fc_metric      = IP6_RT_PRIO_USER,
1693                 .fc_ifindex     = dev->ifindex,
1694                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1695                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1696                 .fc_nlinfo.pid = 0,
1697                 .fc_nlinfo.nlh = NULL,
1698                 .fc_nlinfo.nl_net = dev->nd_net,
1699         };
1700
1701         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1702
1703         ip6_route_add(&cfg);
1704
1705         return rt6_get_dflt_router(gwaddr, dev);
1706 }
1707
1708 void rt6_purge_dflt_routers(struct net *net)
1709 {
1710         struct rt6_info *rt;
1711         struct fib6_table *table;
1712
1713         /* NOTE: Keep consistent with rt6_get_dflt_router */
1714         table = fib6_get_table(net, RT6_TABLE_DFLT);
1715         if (table == NULL)
1716                 return;
1717
1718 restart:
1719         read_lock_bh(&table->tb6_lock);
1720         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1721                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1722                         dst_hold(&rt->u.dst);
1723                         read_unlock_bh(&table->tb6_lock);
1724                         ip6_del_rt(rt);
1725                         goto restart;
1726                 }
1727         }
1728         read_unlock_bh(&table->tb6_lock);
1729 }
1730
1731 static void rtmsg_to_fib6_config(struct net *net,
1732                                  struct in6_rtmsg *rtmsg,
1733                                  struct fib6_config *cfg)
1734 {
1735         memset(cfg, 0, sizeof(*cfg));
1736
1737         cfg->fc_table = RT6_TABLE_MAIN;
1738         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1739         cfg->fc_metric = rtmsg->rtmsg_metric;
1740         cfg->fc_expires = rtmsg->rtmsg_info;
1741         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1742         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1743         cfg->fc_flags = rtmsg->rtmsg_flags;
1744
1745         cfg->fc_nlinfo.nl_net = net;
1746
1747         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1748         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1749         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1750 }
1751
1752 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1753 {
1754         struct fib6_config cfg;
1755         struct in6_rtmsg rtmsg;
1756         int err;
1757
1758         switch(cmd) {
1759         case SIOCADDRT:         /* Add a route */
1760         case SIOCDELRT:         /* Delete a route */
1761                 if (!capable(CAP_NET_ADMIN))
1762                         return -EPERM;
1763                 err = copy_from_user(&rtmsg, arg,
1764                                      sizeof(struct in6_rtmsg));
1765                 if (err)
1766                         return -EFAULT;
1767
1768                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1769
1770                 rtnl_lock();
1771                 switch (cmd) {
1772                 case SIOCADDRT:
1773                         err = ip6_route_add(&cfg);
1774                         break;
1775                 case SIOCDELRT:
1776                         err = ip6_route_del(&cfg);
1777                         break;
1778                 default:
1779                         err = -EINVAL;
1780                 }
1781                 rtnl_unlock();
1782
1783                 return err;
1784         }
1785
1786         return -EINVAL;
1787 }
1788
1789 /*
1790  *      Drop the packet on the floor
1791  */
1792
1793 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1794 {
1795         int type;
1796         switch (ipstats_mib_noroutes) {
1797         case IPSTATS_MIB_INNOROUTES:
1798                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1799                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1800                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1801                         break;
1802                 }
1803                 /* FALLTHROUGH */
1804         case IPSTATS_MIB_OUTNOROUTES:
1805                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1806                 break;
1807         }
1808         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1809         kfree_skb(skb);
1810         return 0;
1811 }
1812
1813 static int ip6_pkt_discard(struct sk_buff *skb)
1814 {
1815         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1816 }
1817
1818 static int ip6_pkt_discard_out(struct sk_buff *skb)
1819 {
1820         skb->dev = skb->dst->dev;
1821         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1822 }
1823
1824 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1825
1826 static int ip6_pkt_prohibit(struct sk_buff *skb)
1827 {
1828         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1829 }
1830
1831 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1832 {
1833         skb->dev = skb->dst->dev;
1834         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1835 }
1836
1837 #endif
1838
1839 /*
1840  *      Allocate a dst for local (unicast / anycast) address.
1841  */
1842
1843 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1844                                     const struct in6_addr *addr,
1845                                     int anycast)
1846 {
1847         struct net *net = idev->dev->nd_net;
1848         struct rt6_info *rt = ip6_dst_alloc();
1849
1850         if (rt == NULL)
1851                 return ERR_PTR(-ENOMEM);
1852
1853         dev_hold(net->loopback_dev);
1854         in6_dev_hold(idev);
1855
1856         rt->u.dst.flags = DST_HOST;
1857         rt->u.dst.input = ip6_input;
1858         rt->u.dst.output = ip6_output;
1859         rt->rt6i_dev = net->loopback_dev;
1860         rt->rt6i_idev = idev;
1861         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1862         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1863         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1864         rt->u.dst.obsolete = -1;
1865
1866         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1867         if (anycast)
1868                 rt->rt6i_flags |= RTF_ANYCAST;
1869         else
1870                 rt->rt6i_flags |= RTF_LOCAL;
1871         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1872         if (rt->rt6i_nexthop == NULL) {
1873                 dst_free(&rt->u.dst);
1874                 return ERR_PTR(-ENOMEM);
1875         }
1876
1877         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1878         rt->rt6i_dst.plen = 128;
1879         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1880
1881         atomic_set(&rt->u.dst.__refcnt, 1);
1882
1883         return rt;
1884 }
1885
1886 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1887 {
1888         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1889             rt != &ip6_null_entry) {
1890                 RT6_TRACE("deleted by ifdown %p\n", rt);
1891                 return -1;
1892         }
1893         return 0;
1894 }
1895
1896 void rt6_ifdown(struct net *net, struct net_device *dev)
1897 {
1898         fib6_clean_all(net, fib6_ifdown, 0, dev);
1899 }
1900
1901 struct rt6_mtu_change_arg
1902 {
1903         struct net_device *dev;
1904         unsigned mtu;
1905 };
1906
1907 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1908 {
1909         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1910         struct inet6_dev *idev;
1911         struct net *net = arg->dev->nd_net;
1912
1913         /* In IPv6 pmtu discovery is not optional,
1914            so that RTAX_MTU lock cannot disable it.
1915            We still use this lock to block changes
1916            caused by addrconf/ndisc.
1917         */
1918
1919         idev = __in6_dev_get(arg->dev);
1920         if (idev == NULL)
1921                 return 0;
1922
1923         /* For administrative MTU increase, there is no way to discover
1924            IPv6 PMTU increase, so PMTU increase should be updated here.
1925            Since RFC 1981 doesn't include administrative MTU increase
1926            update PMTU increase is a MUST. (i.e. jumbo frame)
1927          */
1928         /*
1929            If new MTU is less than route PMTU, this new MTU will be the
1930            lowest MTU in the path, update the route PMTU to reflect PMTU
1931            decreases; if new MTU is greater than route PMTU, and the
1932            old MTU is the lowest MTU in the path, update the route PMTU
1933            to reflect the increase. In this case if the other nodes' MTU
1934            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1935            PMTU discouvery.
1936          */
1937         if (rt->rt6i_dev == arg->dev &&
1938             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1939             (dst_mtu(&rt->u.dst) >= arg->mtu ||
1940              (dst_mtu(&rt->u.dst) < arg->mtu &&
1941               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1942                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1943                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1944         }
1945         return 0;
1946 }
1947
1948 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1949 {
1950         struct rt6_mtu_change_arg arg = {
1951                 .dev = dev,
1952                 .mtu = mtu,
1953         };
1954
1955         fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1956 }
1957
1958 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1959         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1960         [RTA_OIF]               = { .type = NLA_U32 },
1961         [RTA_IIF]               = { .type = NLA_U32 },
1962         [RTA_PRIORITY]          = { .type = NLA_U32 },
1963         [RTA_METRICS]           = { .type = NLA_NESTED },
1964 };
1965
1966 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1967                               struct fib6_config *cfg)
1968 {
1969         struct rtmsg *rtm;
1970         struct nlattr *tb[RTA_MAX+1];
1971         int err;
1972
1973         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1974         if (err < 0)
1975                 goto errout;
1976
1977         err = -EINVAL;
1978         rtm = nlmsg_data(nlh);
1979         memset(cfg, 0, sizeof(*cfg));
1980
1981         cfg->fc_table = rtm->rtm_table;
1982         cfg->fc_dst_len = rtm->rtm_dst_len;
1983         cfg->fc_src_len = rtm->rtm_src_len;
1984         cfg->fc_flags = RTF_UP;
1985         cfg->fc_protocol = rtm->rtm_protocol;
1986
1987         if (rtm->rtm_type == RTN_UNREACHABLE)
1988                 cfg->fc_flags |= RTF_REJECT;
1989
1990         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1991         cfg->fc_nlinfo.nlh = nlh;
1992         cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
1993
1994         if (tb[RTA_GATEWAY]) {
1995                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1996                 cfg->fc_flags |= RTF_GATEWAY;
1997         }
1998
1999         if (tb[RTA_DST]) {
2000                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2001
2002                 if (nla_len(tb[RTA_DST]) < plen)
2003                         goto errout;
2004
2005                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2006         }
2007
2008         if (tb[RTA_SRC]) {
2009                 int plen = (rtm->rtm_src_len + 7) >> 3;
2010
2011                 if (nla_len(tb[RTA_SRC]) < plen)
2012                         goto errout;
2013
2014                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2015         }
2016
2017         if (tb[RTA_OIF])
2018                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2019
2020         if (tb[RTA_PRIORITY])
2021                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2022
2023         if (tb[RTA_METRICS]) {
2024                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2025                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2026         }
2027
2028         if (tb[RTA_TABLE])
2029                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2030
2031         err = 0;
2032 errout:
2033         return err;
2034 }
2035
2036 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2037 {
2038         struct fib6_config cfg;
2039         int err;
2040
2041         err = rtm_to_fib6_config(skb, nlh, &cfg);
2042         if (err < 0)
2043                 return err;
2044
2045         return ip6_route_del(&cfg);
2046 }
2047
2048 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2049 {
2050         struct fib6_config cfg;
2051         int err;
2052
2053         err = rtm_to_fib6_config(skb, nlh, &cfg);
2054         if (err < 0)
2055                 return err;
2056
2057         return ip6_route_add(&cfg);
2058 }
2059
2060 static inline size_t rt6_nlmsg_size(void)
2061 {
2062         return NLMSG_ALIGN(sizeof(struct rtmsg))
2063                + nla_total_size(16) /* RTA_SRC */
2064                + nla_total_size(16) /* RTA_DST */
2065                + nla_total_size(16) /* RTA_GATEWAY */
2066                + nla_total_size(16) /* RTA_PREFSRC */
2067                + nla_total_size(4) /* RTA_TABLE */
2068                + nla_total_size(4) /* RTA_IIF */
2069                + nla_total_size(4) /* RTA_OIF */
2070                + nla_total_size(4) /* RTA_PRIORITY */
2071                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2072                + nla_total_size(sizeof(struct rta_cacheinfo));
2073 }
2074
2075 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2076                          struct in6_addr *dst, struct in6_addr *src,
2077                          int iif, int type, u32 pid, u32 seq,
2078                          int prefix, unsigned int flags)
2079 {
2080         struct rtmsg *rtm;
2081         struct nlmsghdr *nlh;
2082         long expires;
2083         u32 table;
2084
2085         if (prefix) {   /* user wants prefix routes only */
2086                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2087                         /* success since this is not a prefix route */
2088                         return 1;
2089                 }
2090         }
2091
2092         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2093         if (nlh == NULL)
2094                 return -EMSGSIZE;
2095
2096         rtm = nlmsg_data(nlh);
2097         rtm->rtm_family = AF_INET6;
2098         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2099         rtm->rtm_src_len = rt->rt6i_src.plen;
2100         rtm->rtm_tos = 0;
2101         if (rt->rt6i_table)
2102                 table = rt->rt6i_table->tb6_id;
2103         else
2104                 table = RT6_TABLE_UNSPEC;
2105         rtm->rtm_table = table;
2106         NLA_PUT_U32(skb, RTA_TABLE, table);
2107         if (rt->rt6i_flags&RTF_REJECT)
2108                 rtm->rtm_type = RTN_UNREACHABLE;
2109         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2110                 rtm->rtm_type = RTN_LOCAL;
2111         else
2112                 rtm->rtm_type = RTN_UNICAST;
2113         rtm->rtm_flags = 0;
2114         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2115         rtm->rtm_protocol = rt->rt6i_protocol;
2116         if (rt->rt6i_flags&RTF_DYNAMIC)
2117                 rtm->rtm_protocol = RTPROT_REDIRECT;
2118         else if (rt->rt6i_flags & RTF_ADDRCONF)
2119                 rtm->rtm_protocol = RTPROT_KERNEL;
2120         else if (rt->rt6i_flags&RTF_DEFAULT)
2121                 rtm->rtm_protocol = RTPROT_RA;
2122
2123         if (rt->rt6i_flags&RTF_CACHE)
2124                 rtm->rtm_flags |= RTM_F_CLONED;
2125
2126         if (dst) {
2127                 NLA_PUT(skb, RTA_DST, 16, dst);
2128                 rtm->rtm_dst_len = 128;
2129         } else if (rtm->rtm_dst_len)
2130                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2131 #ifdef CONFIG_IPV6_SUBTREES
2132         if (src) {
2133                 NLA_PUT(skb, RTA_SRC, 16, src);
2134                 rtm->rtm_src_len = 128;
2135         } else if (rtm->rtm_src_len)
2136                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2137 #endif
2138         if (iif)
2139                 NLA_PUT_U32(skb, RTA_IIF, iif);
2140         else if (dst) {
2141                 struct in6_addr saddr_buf;
2142                 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2143                                        dst, &saddr_buf) == 0)
2144                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2145         }
2146
2147         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2148                 goto nla_put_failure;
2149
2150         if (rt->u.dst.neighbour)
2151                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2152
2153         if (rt->u.dst.dev)
2154                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2155
2156         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2157
2158         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2159         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2160                                expires, rt->u.dst.error) < 0)
2161                 goto nla_put_failure;
2162
2163         return nlmsg_end(skb, nlh);
2164
2165 nla_put_failure:
2166         nlmsg_cancel(skb, nlh);
2167         return -EMSGSIZE;
2168 }
2169
2170 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2171 {
2172         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2173         int prefix;
2174
2175         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2176                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2177                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2178         } else
2179                 prefix = 0;
2180
2181         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2182                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2183                      prefix, NLM_F_MULTI);
2184 }
2185
2186 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2187 {
2188         struct net *net = in_skb->sk->sk_net;
2189         struct nlattr *tb[RTA_MAX+1];
2190         struct rt6_info *rt;
2191         struct sk_buff *skb;
2192         struct rtmsg *rtm;
2193         struct flowi fl;
2194         int err, iif = 0;
2195
2196         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2197         if (err < 0)
2198                 goto errout;
2199
2200         err = -EINVAL;
2201         memset(&fl, 0, sizeof(fl));
2202
2203         if (tb[RTA_SRC]) {
2204                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2205                         goto errout;
2206
2207                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2208         }
2209
2210         if (tb[RTA_DST]) {
2211                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2212                         goto errout;
2213
2214                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2215         }
2216
2217         if (tb[RTA_IIF])
2218                 iif = nla_get_u32(tb[RTA_IIF]);
2219
2220         if (tb[RTA_OIF])
2221                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2222
2223         if (iif) {
2224                 struct net_device *dev;
2225                 dev = __dev_get_by_index(net, iif);
2226                 if (!dev) {
2227                         err = -ENODEV;
2228                         goto errout;
2229                 }
2230         }
2231
2232         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2233         if (skb == NULL) {
2234                 err = -ENOBUFS;
2235                 goto errout;
2236         }
2237
2238         /* Reserve room for dummy headers, this skb can pass
2239            through good chunk of routing engine.
2240          */
2241         skb_reset_mac_header(skb);
2242         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2243
2244         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2245         skb->dst = &rt->u.dst;
2246
2247         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2248                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2249                             nlh->nlmsg_seq, 0, 0);
2250         if (err < 0) {
2251                 kfree_skb(skb);
2252                 goto errout;
2253         }
2254
2255         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2256 errout:
2257         return err;
2258 }
2259
2260 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2261 {
2262         struct sk_buff *skb;
2263         struct net *net = info->nl_net;
2264         u32 seq;
2265         int err;
2266
2267         err = -ENOBUFS;
2268         seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2269
2270         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2271         if (skb == NULL)
2272                 goto errout;
2273
2274         err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2275                                 event, info->pid, seq, 0, 0);
2276         if (err < 0) {
2277                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2278                 WARN_ON(err == -EMSGSIZE);
2279                 kfree_skb(skb);
2280                 goto errout;
2281         }
2282         err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2283                           info->nlh, gfp_any());
2284 errout:
2285         if (err < 0)
2286                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2287 }
2288
2289 /*
2290  *      /proc
2291  */
2292
2293 #ifdef CONFIG_PROC_FS
2294
2295 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2296
2297 struct rt6_proc_arg
2298 {
2299         char *buffer;
2300         int offset;
2301         int length;
2302         int skip;
2303         int len;
2304 };
2305
2306 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2307 {
2308         struct seq_file *m = p_arg;
2309
2310         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2311                    rt->rt6i_dst.plen);
2312
2313 #ifdef CONFIG_IPV6_SUBTREES
2314         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2315                    rt->rt6i_src.plen);
2316 #else
2317         seq_puts(m, "00000000000000000000000000000000 00 ");
2318 #endif
2319
2320         if (rt->rt6i_nexthop) {
2321                 seq_printf(m, NIP6_SEQFMT,
2322                            NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2323         } else {
2324                 seq_puts(m, "00000000000000000000000000000000");
2325         }
2326         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2327                    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2328                    rt->u.dst.__use, rt->rt6i_flags,
2329                    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2330         return 0;
2331 }
2332
2333 static int ipv6_route_show(struct seq_file *m, void *v)
2334 {
2335         struct net *net = (struct net *)m->private;
2336         fib6_clean_all(net, rt6_info_route, 0, m);
2337         return 0;
2338 }
2339
2340 static int ipv6_route_open(struct inode *inode, struct file *file)
2341 {
2342         struct net *net = get_proc_net(inode);
2343         if (!net)
2344                 return -ENXIO;
2345         return single_open(file, ipv6_route_show, net);
2346 }
2347
2348 static int ipv6_route_release(struct inode *inode, struct file *file)
2349 {
2350         struct seq_file *seq = file->private_data;
2351         struct net *net = seq->private;
2352         put_net(net);
2353         return single_release(inode, file);
2354 }
2355
2356 static const struct file_operations ipv6_route_proc_fops = {
2357         .owner          = THIS_MODULE,
2358         .open           = ipv6_route_open,
2359         .read           = seq_read,
2360         .llseek         = seq_lseek,
2361         .release        = ipv6_route_release,
2362 };
2363
2364 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2365 {
2366         struct net *net = (struct net *)seq->private;
2367         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2368                    net->ipv6.rt6_stats->fib_nodes,
2369                    net->ipv6.rt6_stats->fib_route_nodes,
2370                    net->ipv6.rt6_stats->fib_rt_alloc,
2371                    net->ipv6.rt6_stats->fib_rt_entries,
2372                    net->ipv6.rt6_stats->fib_rt_cache,
2373                    atomic_read(&ip6_dst_ops.entries),
2374                    net->ipv6.rt6_stats->fib_discarded_routes);
2375
2376         return 0;
2377 }
2378
2379 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2380 {
2381         struct net *net = get_proc_net(inode);
2382         return single_open(file, rt6_stats_seq_show, net);
2383 }
2384
2385 static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2386 {
2387         struct seq_file *seq = file->private_data;
2388         struct net *net = (struct net *)seq->private;
2389         put_net(net);
2390         return single_release(inode, file);
2391 }
2392
2393 static const struct file_operations rt6_stats_seq_fops = {
2394         .owner   = THIS_MODULE,
2395         .open    = rt6_stats_seq_open,
2396         .read    = seq_read,
2397         .llseek  = seq_lseek,
2398         .release = rt6_stats_seq_release,
2399 };
2400 #endif  /* CONFIG_PROC_FS */
2401
2402 #ifdef CONFIG_SYSCTL
2403
2404 static
2405 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2406                               void __user *buffer, size_t *lenp, loff_t *ppos)
2407 {
2408         struct net *net = current->nsproxy->net_ns;
2409         int delay = net->ipv6.sysctl.flush_delay;
2410         if (write) {
2411                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2412                 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2413                 return 0;
2414         } else
2415                 return -EINVAL;
2416 }
2417
2418 ctl_table ipv6_route_table_template[] = {
2419         {
2420                 .procname       =       "flush",
2421                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2422                 .maxlen         =       sizeof(int),
2423                 .mode           =       0200,
2424                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2425         },
2426         {
2427                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2428                 .procname       =       "gc_thresh",
2429                 .data           =       &ip6_dst_ops.gc_thresh,
2430                 .maxlen         =       sizeof(int),
2431                 .mode           =       0644,
2432                 .proc_handler   =       &proc_dointvec,
2433         },
2434         {
2435                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2436                 .procname       =       "max_size",
2437                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2438                 .maxlen         =       sizeof(int),
2439                 .mode           =       0644,
2440                 .proc_handler   =       &proc_dointvec,
2441         },
2442         {
2443                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2444                 .procname       =       "gc_min_interval",
2445                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2446                 .maxlen         =       sizeof(int),
2447                 .mode           =       0644,
2448                 .proc_handler   =       &proc_dointvec_jiffies,
2449                 .strategy       =       &sysctl_jiffies,
2450         },
2451         {
2452                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2453                 .procname       =       "gc_timeout",
2454                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2455                 .maxlen         =       sizeof(int),
2456                 .mode           =       0644,
2457                 .proc_handler   =       &proc_dointvec_jiffies,
2458                 .strategy       =       &sysctl_jiffies,
2459         },
2460         {
2461                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2462                 .procname       =       "gc_interval",
2463                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2464                 .maxlen         =       sizeof(int),
2465                 .mode           =       0644,
2466                 .proc_handler   =       &proc_dointvec_jiffies,
2467                 .strategy       =       &sysctl_jiffies,
2468         },
2469         {
2470                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2471                 .procname       =       "gc_elasticity",
2472                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2473                 .maxlen         =       sizeof(int),
2474                 .mode           =       0644,
2475                 .proc_handler   =       &proc_dointvec_jiffies,
2476                 .strategy       =       &sysctl_jiffies,
2477         },
2478         {
2479                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2480                 .procname       =       "mtu_expires",
2481                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2482                 .maxlen         =       sizeof(int),
2483                 .mode           =       0644,
2484                 .proc_handler   =       &proc_dointvec_jiffies,
2485                 .strategy       =       &sysctl_jiffies,
2486         },
2487         {
2488                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2489                 .procname       =       "min_adv_mss",
2490                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2491                 .maxlen         =       sizeof(int),
2492                 .mode           =       0644,
2493                 .proc_handler   =       &proc_dointvec_jiffies,
2494                 .strategy       =       &sysctl_jiffies,
2495         },
2496         {
2497                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2498                 .procname       =       "gc_min_interval_ms",
2499                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2500                 .maxlen         =       sizeof(int),
2501                 .mode           =       0644,
2502                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2503                 .strategy       =       &sysctl_ms_jiffies,
2504         },
2505         { .ctl_name = 0 }
2506 };
2507
2508 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2509 {
2510         struct ctl_table *table;
2511
2512         table = kmemdup(ipv6_route_table_template,
2513                         sizeof(ipv6_route_table_template),
2514                         GFP_KERNEL);
2515
2516         if (table) {
2517                 table[0].data = &net->ipv6.sysctl.flush_delay;
2518                 /* table[1].data will be handled when we have
2519                    routes per namespace */
2520                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2521                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2522                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2523                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2524                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2525                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2526                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2527         }
2528
2529         return table;
2530 }
2531 #endif
2532
2533 static int ip6_route_net_init(struct net *net)
2534 {
2535 #ifdef CONFIG_PROC_FS
2536         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2537         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2538 #endif
2539         return 0;
2540 }
2541
2542 static void ip6_route_net_exit(struct net *net)
2543 {
2544 #ifdef CONFIG_PROC_FS
2545         proc_net_remove(net, "ipv6_route");
2546         proc_net_remove(net, "rt6_stats");
2547 #endif
2548         rt6_ifdown(net, NULL);
2549 }
2550
2551 static struct pernet_operations ip6_route_net_ops = {
2552         .init = ip6_route_net_init,
2553         .exit = ip6_route_net_exit,
2554 };
2555
2556 int __init ip6_route_init(void)
2557 {
2558         int ret;
2559
2560         ip6_dst_ops.kmem_cachep =
2561                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2562                                   SLAB_HWCACHE_ALIGN, NULL);
2563         if (!ip6_dst_ops.kmem_cachep)
2564                 return -ENOMEM;
2565
2566         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2567
2568         ret = fib6_init();
2569         if (ret)
2570                 goto out_kmem_cache;
2571
2572         ret = xfrm6_init();
2573         if (ret)
2574                 goto out_fib6_init;
2575
2576         ret = fib6_rules_init();
2577         if (ret)
2578                 goto xfrm6_init;
2579
2580         ret = -ENOBUFS;
2581         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2582             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2583             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2584                 goto fib6_rules_init;
2585
2586         ret = register_pernet_subsys(&ip6_route_net_ops);
2587         if (ret)
2588                 goto fib6_rules_init;
2589 out:
2590         return ret;
2591
2592 fib6_rules_init:
2593         fib6_rules_cleanup();
2594 xfrm6_init:
2595         xfrm6_fini();
2596 out_fib6_init:
2597         fib6_gc_cleanup();
2598 out_kmem_cache:
2599         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2600         goto out;
2601 }
2602
2603 void ip6_route_cleanup(void)
2604 {
2605         unregister_pernet_subsys(&ip6_route_net_ops);
2606         fib6_rules_cleanup();
2607         xfrm6_fini();
2608         fib6_gc_cleanup();
2609         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2610 }