]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
5bdd9d4010fea7de5fbd9190ea3aa6254719e87d
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 /* Set to 3 to get tracing. */
68 #define RT6_DEBUG 2
69
70 #if RT6_DEBUG >= 3
71 #define RDBG(x) printk x
72 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
73 #else
74 #define RDBG(x)
75 #define RT6_TRACE(x...) do { ; } while (0)
76 #endif
77
78 #define CLONE_OFFLINK_ROUTE 0
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103                                            struct in6_addr *gwaddr, int ifindex,
104                                            unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106                                            struct in6_addr *gwaddr, int ifindex);
107 #endif
108
109 static struct dst_ops ip6_dst_ops = {
110         .family                 =       AF_INET6,
111         .protocol               =       __constant_htons(ETH_P_IPV6),
112         .gc                     =       ip6_dst_gc,
113         .gc_thresh              =       1024,
114         .check                  =       ip6_dst_check,
115         .destroy                =       ip6_dst_destroy,
116         .ifdown                 =       ip6_dst_ifdown,
117         .negative_advice        =       ip6_negative_advice,
118         .link_failure           =       ip6_link_failure,
119         .update_pmtu            =       ip6_rt_update_pmtu,
120         .entry_size             =       sizeof(struct rt6_info),
121 };
122
123 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
124 {
125 }
126
127 static struct dst_ops ip6_dst_blackhole_ops = {
128         .family                 =       AF_INET6,
129         .protocol               =       __constant_htons(ETH_P_IPV6),
130         .destroy                =       ip6_dst_destroy,
131         .check                  =       ip6_dst_check,
132         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
133         .entry_size             =       sizeof(struct rt6_info),
134 };
135
136 struct rt6_info ip6_null_entry = {
137         .u = {
138                 .dst = {
139                         .__refcnt       = ATOMIC_INIT(1),
140                         .__use          = 1,
141                         .dev            = &loopback_dev,
142                         .obsolete       = -1,
143                         .error          = -ENETUNREACH,
144                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
145                         .input          = ip6_pkt_discard,
146                         .output         = ip6_pkt_discard_out,
147                         .ops            = &ip6_dst_ops,
148                         .path           = (struct dst_entry*)&ip6_null_entry,
149                 }
150         },
151         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
152         .rt6i_metric    = ~(u32) 0,
153         .rt6i_ref       = ATOMIC_INIT(1),
154 };
155
156 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
157
158 static int ip6_pkt_prohibit(struct sk_buff *skb);
159 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
160 static int ip6_pkt_blk_hole(struct sk_buff *skb);
161
162 struct rt6_info ip6_prohibit_entry = {
163         .u = {
164                 .dst = {
165                         .__refcnt       = ATOMIC_INIT(1),
166                         .__use          = 1,
167                         .dev            = &loopback_dev,
168                         .obsolete       = -1,
169                         .error          = -EACCES,
170                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
171                         .input          = ip6_pkt_prohibit,
172                         .output         = ip6_pkt_prohibit_out,
173                         .ops            = &ip6_dst_ops,
174                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
175                 }
176         },
177         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
178         .rt6i_metric    = ~(u32) 0,
179         .rt6i_ref       = ATOMIC_INIT(1),
180 };
181
182 struct rt6_info ip6_blk_hole_entry = {
183         .u = {
184                 .dst = {
185                         .__refcnt       = ATOMIC_INIT(1),
186                         .__use          = 1,
187                         .dev            = &loopback_dev,
188                         .obsolete       = -1,
189                         .error          = -EINVAL,
190                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
191                         .input          = ip6_pkt_blk_hole,
192                         .output         = ip6_pkt_blk_hole,
193                         .ops            = &ip6_dst_ops,
194                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
195                 }
196         },
197         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
198         .rt6i_metric    = ~(u32) 0,
199         .rt6i_ref       = ATOMIC_INIT(1),
200 };
201
202 #endif
203
204 /* allocate dst with ip6_dst_ops */
205 static __inline__ struct rt6_info *ip6_dst_alloc(void)
206 {
207         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
208 }
209
210 static void ip6_dst_destroy(struct dst_entry *dst)
211 {
212         struct rt6_info *rt = (struct rt6_info *)dst;
213         struct inet6_dev *idev = rt->rt6i_idev;
214
215         if (idev != NULL) {
216                 rt->rt6i_idev = NULL;
217                 in6_dev_put(idev);
218         }
219 }
220
221 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
222                            int how)
223 {
224         struct rt6_info *rt = (struct rt6_info *)dst;
225         struct inet6_dev *idev = rt->rt6i_idev;
226
227         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
228                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
229                 if (loopback_idev != NULL) {
230                         rt->rt6i_idev = loopback_idev;
231                         in6_dev_put(idev);
232                 }
233         }
234 }
235
236 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
237 {
238         return (rt->rt6i_flags & RTF_EXPIRES &&
239                 time_after(jiffies, rt->rt6i_expires));
240 }
241
242 static inline int rt6_need_strict(struct in6_addr *daddr)
243 {
244         return (ipv6_addr_type(daddr) &
245                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
246 }
247
248 /*
249  *      Route lookup. Any table->tb6_lock is implied.
250  */
251
252 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
253                                                     int oif,
254                                                     int strict)
255 {
256         struct rt6_info *local = NULL;
257         struct rt6_info *sprt;
258
259         if (oif) {
260                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
261                         struct net_device *dev = sprt->rt6i_dev;
262                         if (dev->ifindex == oif)
263                                 return sprt;
264                         if (dev->flags & IFF_LOOPBACK) {
265                                 if (sprt->rt6i_idev == NULL ||
266                                     sprt->rt6i_idev->dev->ifindex != oif) {
267                                         if (strict && oif)
268                                                 continue;
269                                         if (local && (!oif ||
270                                                       local->rt6i_idev->dev->ifindex == oif))
271                                                 continue;
272                                 }
273                                 local = sprt;
274                         }
275                 }
276
277                 if (local)
278                         return local;
279
280                 if (strict)
281                         return &ip6_null_entry;
282         }
283         return rt;
284 }
285
286 #ifdef CONFIG_IPV6_ROUTER_PREF
287 static void rt6_probe(struct rt6_info *rt)
288 {
289         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
290         /*
291          * Okay, this does not seem to be appropriate
292          * for now, however, we need to check if it
293          * is really so; aka Router Reachability Probing.
294          *
295          * Router Reachability Probe MUST be rate-limited
296          * to no more than one per minute.
297          */
298         if (!neigh || (neigh->nud_state & NUD_VALID))
299                 return;
300         read_lock_bh(&neigh->lock);
301         if (!(neigh->nud_state & NUD_VALID) &&
302             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
303                 struct in6_addr mcaddr;
304                 struct in6_addr *target;
305
306                 neigh->updated = jiffies;
307                 read_unlock_bh(&neigh->lock);
308
309                 target = (struct in6_addr *)&neigh->primary_key;
310                 addrconf_addr_solict_mult(target, &mcaddr);
311                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312         } else
313                 read_unlock_bh(&neigh->lock);
314 }
315 #else
316 static inline void rt6_probe(struct rt6_info *rt)
317 {
318         return;
319 }
320 #endif
321
322 /*
323  * Default Router Selection (RFC 2461 6.3.6)
324  */
325 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
326 {
327         struct net_device *dev = rt->rt6i_dev;
328         if (!oif || dev->ifindex == oif)
329                 return 2;
330         if ((dev->flags & IFF_LOOPBACK) &&
331             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332                 return 1;
333         return 0;
334 }
335
336 static inline int rt6_check_neigh(struct rt6_info *rt)
337 {
338         struct neighbour *neigh = rt->rt6i_nexthop;
339         int m = 0;
340         if (rt->rt6i_flags & RTF_NONEXTHOP ||
341             !(rt->rt6i_flags & RTF_GATEWAY))
342                 m = 1;
343         else if (neigh) {
344                 read_lock_bh(&neigh->lock);
345                 if (neigh->nud_state & NUD_VALID)
346                         m = 2;
347                 else if (!(neigh->nud_state & NUD_FAILED))
348                         m = 1;
349                 read_unlock_bh(&neigh->lock);
350         }
351         return m;
352 }
353
354 static int rt6_score_route(struct rt6_info *rt, int oif,
355                            int strict)
356 {
357         int m, n;
358
359         m = rt6_check_dev(rt, oif);
360         if (!m && (strict & RT6_LOOKUP_F_IFACE))
361                 return -1;
362 #ifdef CONFIG_IPV6_ROUTER_PREF
363         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
364 #endif
365         n = rt6_check_neigh(rt);
366         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
367                 return -1;
368         return m;
369 }
370
371 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
372                                    int *mpri, struct rt6_info *match)
373 {
374         int m;
375
376         if (rt6_check_expired(rt))
377                 goto out;
378
379         m = rt6_score_route(rt, oif, strict);
380         if (m < 0)
381                 goto out;
382
383         if (m > *mpri) {
384                 if (strict & RT6_LOOKUP_F_REACHABLE)
385                         rt6_probe(match);
386                 *mpri = m;
387                 match = rt;
388         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
389                 rt6_probe(rt);
390         }
391
392 out:
393         return match;
394 }
395
396 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
397                                      struct rt6_info *rr_head,
398                                      u32 metric, int oif, int strict)
399 {
400         struct rt6_info *rt, *match;
401         int mpri = -1;
402
403         match = NULL;
404         for (rt = rr_head; rt && rt->rt6i_metric == metric;
405              rt = rt->u.dst.rt6_next)
406                 match = find_match(rt, oif, strict, &mpri, match);
407         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
408              rt = rt->u.dst.rt6_next)
409                 match = find_match(rt, oif, strict, &mpri, match);
410
411         return match;
412 }
413
414 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
415 {
416         struct rt6_info *match, *rt0;
417
418         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
419                   __FUNCTION__, fn->leaf, oif);
420
421         rt0 = fn->rr_ptr;
422         if (!rt0)
423                 fn->rr_ptr = rt0 = fn->leaf;
424
425         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
426
427         if (!match &&
428             (strict & RT6_LOOKUP_F_REACHABLE)) {
429                 struct rt6_info *next = rt0->u.dst.rt6_next;
430
431                 /* no entries matched; do round-robin */
432                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
433                         next = fn->leaf;
434
435                 if (next != rt0)
436                         fn->rr_ptr = next;
437         }
438
439         RT6_TRACE("%s() => %p\n",
440                   __FUNCTION__, match);
441
442         return (match ? match : &ip6_null_entry);
443 }
444
445 #ifdef CONFIG_IPV6_ROUTE_INFO
446 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
447                   struct in6_addr *gwaddr)
448 {
449         struct route_info *rinfo = (struct route_info *) opt;
450         struct in6_addr prefix_buf, *prefix;
451         unsigned int pref;
452         u32 lifetime;
453         struct rt6_info *rt;
454
455         if (len < sizeof(struct route_info)) {
456                 return -EINVAL;
457         }
458
459         /* Sanity check for prefix_len and length */
460         if (rinfo->length > 3) {
461                 return -EINVAL;
462         } else if (rinfo->prefix_len > 128) {
463                 return -EINVAL;
464         } else if (rinfo->prefix_len > 64) {
465                 if (rinfo->length < 2) {
466                         return -EINVAL;
467                 }
468         } else if (rinfo->prefix_len > 0) {
469                 if (rinfo->length < 1) {
470                         return -EINVAL;
471                 }
472         }
473
474         pref = rinfo->route_pref;
475         if (pref == ICMPV6_ROUTER_PREF_INVALID)
476                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
477
478         lifetime = ntohl(rinfo->lifetime);
479         if (lifetime == 0xffffffff) {
480                 /* infinity */
481         } else if (lifetime > 0x7fffffff/HZ) {
482                 /* Avoid arithmetic overflow */
483                 lifetime = 0x7fffffff/HZ - 1;
484         }
485
486         if (rinfo->length == 3)
487                 prefix = (struct in6_addr *)rinfo->prefix;
488         else {
489                 /* this function is safe */
490                 ipv6_addr_prefix(&prefix_buf,
491                                  (struct in6_addr *)rinfo->prefix,
492                                  rinfo->prefix_len);
493                 prefix = &prefix_buf;
494         }
495
496         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
497
498         if (rt && !lifetime) {
499                 ip6_del_rt(rt);
500                 rt = NULL;
501         }
502
503         if (!rt && lifetime)
504                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
505                                         pref);
506         else if (rt)
507                 rt->rt6i_flags = RTF_ROUTEINFO |
508                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510         if (rt) {
511                 if (lifetime == 0xffffffff) {
512                         rt->rt6i_flags &= ~RTF_EXPIRES;
513                 } else {
514                         rt->rt6i_expires = jiffies + HZ * lifetime;
515                         rt->rt6i_flags |= RTF_EXPIRES;
516                 }
517                 dst_release(&rt->u.dst);
518         }
519         return 0;
520 }
521 #endif
522
523 #define BACKTRACK(saddr) \
524 do { \
525         if (rt == &ip6_null_entry) { \
526                 struct fib6_node *pn; \
527                 while (1) { \
528                         if (fn->fn_flags & RTN_TL_ROOT) \
529                                 goto out; \
530                         pn = fn->parent; \
531                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
532                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
533                         else \
534                                 fn = pn; \
535                         if (fn->fn_flags & RTN_RTINFO) \
536                                 goto restart; \
537                 } \
538         } \
539 } while(0)
540
541 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
542                                              struct flowi *fl, int flags)
543 {
544         struct fib6_node *fn;
545         struct rt6_info *rt;
546
547         read_lock_bh(&table->tb6_lock);
548         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
549 restart:
550         rt = fn->leaf;
551         rt = rt6_device_match(rt, fl->oif, flags);
552         BACKTRACK(&fl->fl6_src);
553 out:
554         dst_hold(&rt->u.dst);
555         read_unlock_bh(&table->tb6_lock);
556
557         rt->u.dst.lastuse = jiffies;
558         rt->u.dst.__use++;
559
560         return rt;
561
562 }
563
564 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
565                             int oif, int strict)
566 {
567         struct flowi fl = {
568                 .oif = oif,
569                 .nl_u = {
570                         .ip6_u = {
571                                 .daddr = *daddr,
572                         },
573                 },
574         };
575         struct dst_entry *dst;
576         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
577
578         if (saddr) {
579                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580                 flags |= RT6_LOOKUP_F_HAS_SADDR;
581         }
582
583         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
584         if (dst->error == 0)
585                 return (struct rt6_info *) dst;
586
587         dst_release(dst);
588
589         return NULL;
590 }
591
592 EXPORT_SYMBOL(rt6_lookup);
593
594 /* ip6_ins_rt is called with FREE table->tb6_lock.
595    It takes new route entry, the addition fails by any reason the
596    route is freed. In any case, if caller does not hold it, it may
597    be destroyed.
598  */
599
600 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
601 {
602         int err;
603         struct fib6_table *table;
604
605         table = rt->rt6i_table;
606         write_lock_bh(&table->tb6_lock);
607         err = fib6_add(&table->tb6_root, rt, info);
608         write_unlock_bh(&table->tb6_lock);
609
610         return err;
611 }
612
613 int ip6_ins_rt(struct rt6_info *rt)
614 {
615         return __ip6_ins_rt(rt, NULL);
616 }
617
618 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619                                       struct in6_addr *saddr)
620 {
621         struct rt6_info *rt;
622
623         /*
624          *      Clone the route.
625          */
626
627         rt = ip6_rt_copy(ort);
628
629         if (rt) {
630                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631                         if (rt->rt6i_dst.plen != 128 &&
632                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633                                 rt->rt6i_flags |= RTF_ANYCAST;
634                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
635                 }
636
637                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
638                 rt->rt6i_dst.plen = 128;
639                 rt->rt6i_flags |= RTF_CACHE;
640                 rt->u.dst.flags |= DST_HOST;
641
642 #ifdef CONFIG_IPV6_SUBTREES
643                 if (rt->rt6i_src.plen && saddr) {
644                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645                         rt->rt6i_src.plen = 128;
646                 }
647 #endif
648
649                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
650
651         }
652
653         return rt;
654 }
655
656 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
657 {
658         struct rt6_info *rt = ip6_rt_copy(ort);
659         if (rt) {
660                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
661                 rt->rt6i_dst.plen = 128;
662                 rt->rt6i_flags |= RTF_CACHE;
663                 rt->u.dst.flags |= DST_HOST;
664                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
665         }
666         return rt;
667 }
668
669 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
670                                             struct flowi *fl, int flags)
671 {
672         struct fib6_node *fn;
673         struct rt6_info *rt, *nrt;
674         int strict = 0;
675         int attempts = 3;
676         int err;
677         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
678
679         strict |= flags & RT6_LOOKUP_F_IFACE;
680
681 relookup:
682         read_lock_bh(&table->tb6_lock);
683
684 restart_2:
685         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
686
687 restart:
688         rt = rt6_select(fn, fl->iif, strict | reachable);
689         BACKTRACK(&fl->fl6_src);
690         if (rt == &ip6_null_entry ||
691             rt->rt6i_flags & RTF_CACHE)
692                 goto out;
693
694         dst_hold(&rt->u.dst);
695         read_unlock_bh(&table->tb6_lock);
696
697         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
698                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
699         else {
700 #if CLONE_OFFLINK_ROUTE
701                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
702 #else
703                 goto out2;
704 #endif
705         }
706
707         dst_release(&rt->u.dst);
708         rt = nrt ? : &ip6_null_entry;
709
710         dst_hold(&rt->u.dst);
711         if (nrt) {
712                 err = ip6_ins_rt(nrt);
713                 if (!err)
714                         goto out2;
715         }
716
717         if (--attempts <= 0)
718                 goto out2;
719
720         /*
721          * Race condition! In the gap, when table->tb6_lock was
722          * released someone could insert this route.  Relookup.
723          */
724         dst_release(&rt->u.dst);
725         goto relookup;
726
727 out:
728         if (reachable) {
729                 reachable = 0;
730                 goto restart_2;
731         }
732         dst_hold(&rt->u.dst);
733         read_unlock_bh(&table->tb6_lock);
734 out2:
735         rt->u.dst.lastuse = jiffies;
736         rt->u.dst.__use++;
737
738         return rt;
739 }
740
741 void ip6_route_input(struct sk_buff *skb)
742 {
743         struct ipv6hdr *iph = ipv6_hdr(skb);
744         int flags = RT6_LOOKUP_F_HAS_SADDR;
745         struct flowi fl = {
746                 .iif = skb->dev->ifindex,
747                 .nl_u = {
748                         .ip6_u = {
749                                 .daddr = iph->daddr,
750                                 .saddr = iph->saddr,
751                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
752                         },
753                 },
754                 .mark = skb->mark,
755                 .proto = iph->nexthdr,
756         };
757
758         if (rt6_need_strict(&iph->daddr))
759                 flags |= RT6_LOOKUP_F_IFACE;
760
761         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
762 }
763
764 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
765                                              struct flowi *fl, int flags)
766 {
767         struct fib6_node *fn;
768         struct rt6_info *rt, *nrt;
769         int strict = 0;
770         int attempts = 3;
771         int err;
772         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
773
774         strict |= flags & RT6_LOOKUP_F_IFACE;
775
776 relookup:
777         read_lock_bh(&table->tb6_lock);
778
779 restart_2:
780         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
781
782 restart:
783         rt = rt6_select(fn, fl->oif, strict | reachable);
784         BACKTRACK(&fl->fl6_src);
785         if (rt == &ip6_null_entry ||
786             rt->rt6i_flags & RTF_CACHE)
787                 goto out;
788
789         dst_hold(&rt->u.dst);
790         read_unlock_bh(&table->tb6_lock);
791
792         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
793                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
794         else {
795 #if CLONE_OFFLINK_ROUTE
796                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
797 #else
798                 goto out2;
799 #endif
800         }
801
802         dst_release(&rt->u.dst);
803         rt = nrt ? : &ip6_null_entry;
804
805         dst_hold(&rt->u.dst);
806         if (nrt) {
807                 err = ip6_ins_rt(nrt);
808                 if (!err)
809                         goto out2;
810         }
811
812         if (--attempts <= 0)
813                 goto out2;
814
815         /*
816          * Race condition! In the gap, when table->tb6_lock was
817          * released someone could insert this route.  Relookup.
818          */
819         dst_release(&rt->u.dst);
820         goto relookup;
821
822 out:
823         if (reachable) {
824                 reachable = 0;
825                 goto restart_2;
826         }
827         dst_hold(&rt->u.dst);
828         read_unlock_bh(&table->tb6_lock);
829 out2:
830         rt->u.dst.lastuse = jiffies;
831         rt->u.dst.__use++;
832         return rt;
833 }
834
835 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
836 {
837         int flags = 0;
838
839         if (rt6_need_strict(&fl->fl6_dst))
840                 flags |= RT6_LOOKUP_F_IFACE;
841
842         if (!ipv6_addr_any(&fl->fl6_src))
843                 flags |= RT6_LOOKUP_F_HAS_SADDR;
844
845         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
846 }
847
848 EXPORT_SYMBOL(ip6_route_output);
849
850 static int ip6_blackhole_output(struct sk_buff *skb)
851 {
852         kfree_skb(skb);
853         return 0;
854 }
855
856 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
857 {
858         struct rt6_info *ort = (struct rt6_info *) *dstp;
859         struct rt6_info *rt = (struct rt6_info *)
860                 dst_alloc(&ip6_dst_blackhole_ops);
861         struct dst_entry *new = NULL;
862
863         if (rt) {
864                 new = &rt->u.dst;
865
866                 atomic_set(&new->__refcnt, 1);
867                 new->__use = 1;
868                 new->input = ip6_blackhole_output;
869                 new->output = ip6_blackhole_output;
870
871                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
872                 new->dev = ort->u.dst.dev;
873                 if (new->dev)
874                         dev_hold(new->dev);
875                 rt->rt6i_idev = ort->rt6i_idev;
876                 if (rt->rt6i_idev)
877                         in6_dev_hold(rt->rt6i_idev);
878                 rt->rt6i_expires = 0;
879
880                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
881                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
882                 rt->rt6i_metric = 0;
883
884                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
885 #ifdef CONFIG_IPV6_SUBTREES
886                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
887 #endif
888
889                 dst_free(new);
890         }
891
892         dst_release(*dstp);
893         *dstp = new;
894         return (new ? 0 : -ENOMEM);
895 }
896 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
897
898 /*
899  *      Destination cache support functions
900  */
901
902 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
903 {
904         struct rt6_info *rt;
905
906         rt = (struct rt6_info *) dst;
907
908         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
909                 return dst;
910
911         return NULL;
912 }
913
914 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
915 {
916         struct rt6_info *rt = (struct rt6_info *) dst;
917
918         if (rt) {
919                 if (rt->rt6i_flags & RTF_CACHE)
920                         ip6_del_rt(rt);
921                 else
922                         dst_release(dst);
923         }
924         return NULL;
925 }
926
927 static void ip6_link_failure(struct sk_buff *skb)
928 {
929         struct rt6_info *rt;
930
931         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
932
933         rt = (struct rt6_info *) skb->dst;
934         if (rt) {
935                 if (rt->rt6i_flags&RTF_CACHE) {
936                         dst_set_expires(&rt->u.dst, 0);
937                         rt->rt6i_flags |= RTF_EXPIRES;
938                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
939                         rt->rt6i_node->fn_sernum = -1;
940         }
941 }
942
943 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
944 {
945         struct rt6_info *rt6 = (struct rt6_info*)dst;
946
947         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
948                 rt6->rt6i_flags |= RTF_MODIFIED;
949                 if (mtu < IPV6_MIN_MTU) {
950                         mtu = IPV6_MIN_MTU;
951                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
952                 }
953                 dst->metrics[RTAX_MTU-1] = mtu;
954                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
955         }
956 }
957
958 static int ipv6_get_mtu(struct net_device *dev);
959
960 static inline unsigned int ipv6_advmss(unsigned int mtu)
961 {
962         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
963
964         if (mtu < ip6_rt_min_advmss)
965                 mtu = ip6_rt_min_advmss;
966
967         /*
968          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
969          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
970          * IPV6_MAXPLEN is also valid and means: "any MSS,
971          * rely only on pmtu discovery"
972          */
973         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
974                 mtu = IPV6_MAXPLEN;
975         return mtu;
976 }
977
978 static struct dst_entry *ndisc_dst_gc_list;
979 static DEFINE_SPINLOCK(ndisc_lock);
980
981 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
982                                   struct neighbour *neigh,
983                                   struct in6_addr *addr,
984                                   int (*output)(struct sk_buff *))
985 {
986         struct rt6_info *rt;
987         struct inet6_dev *idev = in6_dev_get(dev);
988
989         if (unlikely(idev == NULL))
990                 return NULL;
991
992         rt = ip6_dst_alloc();
993         if (unlikely(rt == NULL)) {
994                 in6_dev_put(idev);
995                 goto out;
996         }
997
998         dev_hold(dev);
999         if (neigh)
1000                 neigh_hold(neigh);
1001         else
1002                 neigh = ndisc_get_neigh(dev, addr);
1003
1004         rt->rt6i_dev      = dev;
1005         rt->rt6i_idev     = idev;
1006         rt->rt6i_nexthop  = neigh;
1007         atomic_set(&rt->u.dst.__refcnt, 1);
1008         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1009         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1010         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1011         rt->u.dst.output  = output;
1012
1013 #if 0   /* there's no chance to use these for ndisc */
1014         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1015                                 ? DST_HOST
1016                                 : 0;
1017         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1018         rt->rt6i_dst.plen = 128;
1019 #endif
1020
1021         spin_lock_bh(&ndisc_lock);
1022         rt->u.dst.next = ndisc_dst_gc_list;
1023         ndisc_dst_gc_list = &rt->u.dst;
1024         spin_unlock_bh(&ndisc_lock);
1025
1026         fib6_force_start_gc();
1027
1028 out:
1029         return &rt->u.dst;
1030 }
1031
1032 int ndisc_dst_gc(int *more)
1033 {
1034         struct dst_entry *dst, *next, **pprev;
1035         int freed;
1036
1037         next = NULL;
1038         freed = 0;
1039
1040         spin_lock_bh(&ndisc_lock);
1041         pprev = &ndisc_dst_gc_list;
1042
1043         while ((dst = *pprev) != NULL) {
1044                 if (!atomic_read(&dst->__refcnt)) {
1045                         *pprev = dst->next;
1046                         dst_free(dst);
1047                         freed++;
1048                 } else {
1049                         pprev = &dst->next;
1050                         (*more)++;
1051                 }
1052         }
1053
1054         spin_unlock_bh(&ndisc_lock);
1055
1056         return freed;
1057 }
1058
1059 static int ip6_dst_gc(void)
1060 {
1061         static unsigned expire = 30*HZ;
1062         static unsigned long last_gc;
1063         unsigned long now = jiffies;
1064
1065         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1066             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1067                 goto out;
1068
1069         expire++;
1070         fib6_run_gc(expire);
1071         last_gc = now;
1072         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1073                 expire = ip6_rt_gc_timeout>>1;
1074
1075 out:
1076         expire -= expire>>ip6_rt_gc_elasticity;
1077         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1078 }
1079
1080 /* Clean host part of a prefix. Not necessary in radix tree,
1081    but results in cleaner routing tables.
1082
1083    Remove it only when all the things will work!
1084  */
1085
1086 static int ipv6_get_mtu(struct net_device *dev)
1087 {
1088         int mtu = IPV6_MIN_MTU;
1089         struct inet6_dev *idev;
1090
1091         idev = in6_dev_get(dev);
1092         if (idev) {
1093                 mtu = idev->cnf.mtu6;
1094                 in6_dev_put(idev);
1095         }
1096         return mtu;
1097 }
1098
1099 int ipv6_get_hoplimit(struct net_device *dev)
1100 {
1101         int hoplimit = ipv6_devconf.hop_limit;
1102         struct inet6_dev *idev;
1103
1104         idev = in6_dev_get(dev);
1105         if (idev) {
1106                 hoplimit = idev->cnf.hop_limit;
1107                 in6_dev_put(idev);
1108         }
1109         return hoplimit;
1110 }
1111
1112 /*
1113  *
1114  */
1115
1116 int ip6_route_add(struct fib6_config *cfg)
1117 {
1118         int err;
1119         struct rt6_info *rt = NULL;
1120         struct net_device *dev = NULL;
1121         struct inet6_dev *idev = NULL;
1122         struct fib6_table *table;
1123         int addr_type;
1124
1125         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1126                 return -EINVAL;
1127 #ifndef CONFIG_IPV6_SUBTREES
1128         if (cfg->fc_src_len)
1129                 return -EINVAL;
1130 #endif
1131         if (cfg->fc_ifindex) {
1132                 err = -ENODEV;
1133                 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1134                 if (!dev)
1135                         goto out;
1136                 idev = in6_dev_get(dev);
1137                 if (!idev)
1138                         goto out;
1139         }
1140
1141         if (cfg->fc_metric == 0)
1142                 cfg->fc_metric = IP6_RT_PRIO_USER;
1143
1144         table = fib6_new_table(cfg->fc_table);
1145         if (table == NULL) {
1146                 err = -ENOBUFS;
1147                 goto out;
1148         }
1149
1150         rt = ip6_dst_alloc();
1151
1152         if (rt == NULL) {
1153                 err = -ENOMEM;
1154                 goto out;
1155         }
1156
1157         rt->u.dst.obsolete = -1;
1158         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1159
1160         if (cfg->fc_protocol == RTPROT_UNSPEC)
1161                 cfg->fc_protocol = RTPROT_BOOT;
1162         rt->rt6i_protocol = cfg->fc_protocol;
1163
1164         addr_type = ipv6_addr_type(&cfg->fc_dst);
1165
1166         if (addr_type & IPV6_ADDR_MULTICAST)
1167                 rt->u.dst.input = ip6_mc_input;
1168         else
1169                 rt->u.dst.input = ip6_forward;
1170
1171         rt->u.dst.output = ip6_output;
1172
1173         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1174         rt->rt6i_dst.plen = cfg->fc_dst_len;
1175         if (rt->rt6i_dst.plen == 128)
1176                rt->u.dst.flags = DST_HOST;
1177
1178 #ifdef CONFIG_IPV6_SUBTREES
1179         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1180         rt->rt6i_src.plen = cfg->fc_src_len;
1181 #endif
1182
1183         rt->rt6i_metric = cfg->fc_metric;
1184
1185         /* We cannot add true routes via loopback here,
1186            they would result in kernel looping; promote them to reject routes
1187          */
1188         if ((cfg->fc_flags & RTF_REJECT) ||
1189             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1190                 /* hold loopback dev/idev if we haven't done so. */
1191                 if (dev != &loopback_dev) {
1192                         if (dev) {
1193                                 dev_put(dev);
1194                                 in6_dev_put(idev);
1195                         }
1196                         dev = &loopback_dev;
1197                         dev_hold(dev);
1198                         idev = in6_dev_get(dev);
1199                         if (!idev) {
1200                                 err = -ENODEV;
1201                                 goto out;
1202                         }
1203                 }
1204                 rt->u.dst.output = ip6_pkt_discard_out;
1205                 rt->u.dst.input = ip6_pkt_discard;
1206                 rt->u.dst.error = -ENETUNREACH;
1207                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1208                 goto install_route;
1209         }
1210
1211         if (cfg->fc_flags & RTF_GATEWAY) {
1212                 struct in6_addr *gw_addr;
1213                 int gwa_type;
1214
1215                 gw_addr = &cfg->fc_gateway;
1216                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1217                 gwa_type = ipv6_addr_type(gw_addr);
1218
1219                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1220                         struct rt6_info *grt;
1221
1222                         /* IPv6 strictly inhibits using not link-local
1223                            addresses as nexthop address.
1224                            Otherwise, router will not able to send redirects.
1225                            It is very good, but in some (rare!) circumstances
1226                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1227                            some exceptions. --ANK
1228                          */
1229                         err = -EINVAL;
1230                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1231                                 goto out;
1232
1233                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1234
1235                         err = -EHOSTUNREACH;
1236                         if (grt == NULL)
1237                                 goto out;
1238                         if (dev) {
1239                                 if (dev != grt->rt6i_dev) {
1240                                         dst_release(&grt->u.dst);
1241                                         goto out;
1242                                 }
1243                         } else {
1244                                 dev = grt->rt6i_dev;
1245                                 idev = grt->rt6i_idev;
1246                                 dev_hold(dev);
1247                                 in6_dev_hold(grt->rt6i_idev);
1248                         }
1249                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1250                                 err = 0;
1251                         dst_release(&grt->u.dst);
1252
1253                         if (err)
1254                                 goto out;
1255                 }
1256                 err = -EINVAL;
1257                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1258                         goto out;
1259         }
1260
1261         err = -ENODEV;
1262         if (dev == NULL)
1263                 goto out;
1264
1265         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1266                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1267                 if (IS_ERR(rt->rt6i_nexthop)) {
1268                         err = PTR_ERR(rt->rt6i_nexthop);
1269                         rt->rt6i_nexthop = NULL;
1270                         goto out;
1271                 }
1272         }
1273
1274         rt->rt6i_flags = cfg->fc_flags;
1275
1276 install_route:
1277         if (cfg->fc_mx) {
1278                 struct nlattr *nla;
1279                 int remaining;
1280
1281                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1282                         int type = nla->nla_type;
1283
1284                         if (type) {
1285                                 if (type > RTAX_MAX) {
1286                                         err = -EINVAL;
1287                                         goto out;
1288                                 }
1289
1290                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1291                         }
1292                 }
1293         }
1294
1295         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1296                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1297         if (!rt->u.dst.metrics[RTAX_MTU-1])
1298                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1299         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1300                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1301         rt->u.dst.dev = dev;
1302         rt->rt6i_idev = idev;
1303         rt->rt6i_table = table;
1304         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1305
1306 out:
1307         if (dev)
1308                 dev_put(dev);
1309         if (idev)
1310                 in6_dev_put(idev);
1311         if (rt)
1312                 dst_free(&rt->u.dst);
1313         return err;
1314 }
1315
1316 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1317 {
1318         int err;
1319         struct fib6_table *table;
1320
1321         if (rt == &ip6_null_entry)
1322                 return -ENOENT;
1323
1324         table = rt->rt6i_table;
1325         write_lock_bh(&table->tb6_lock);
1326
1327         err = fib6_del(rt, info);
1328         dst_release(&rt->u.dst);
1329
1330         write_unlock_bh(&table->tb6_lock);
1331
1332         return err;
1333 }
1334
1335 int ip6_del_rt(struct rt6_info *rt)
1336 {
1337         return __ip6_del_rt(rt, NULL);
1338 }
1339
1340 static int ip6_route_del(struct fib6_config *cfg)
1341 {
1342         struct fib6_table *table;
1343         struct fib6_node *fn;
1344         struct rt6_info *rt;
1345         int err = -ESRCH;
1346
1347         table = fib6_get_table(cfg->fc_table);
1348         if (table == NULL)
1349                 return err;
1350
1351         read_lock_bh(&table->tb6_lock);
1352
1353         fn = fib6_locate(&table->tb6_root,
1354                          &cfg->fc_dst, cfg->fc_dst_len,
1355                          &cfg->fc_src, cfg->fc_src_len);
1356
1357         if (fn) {
1358                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1359                         if (cfg->fc_ifindex &&
1360                             (rt->rt6i_dev == NULL ||
1361                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1362                                 continue;
1363                         if (cfg->fc_flags & RTF_GATEWAY &&
1364                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1365                                 continue;
1366                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1367                                 continue;
1368                         dst_hold(&rt->u.dst);
1369                         read_unlock_bh(&table->tb6_lock);
1370
1371                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1372                 }
1373         }
1374         read_unlock_bh(&table->tb6_lock);
1375
1376         return err;
1377 }
1378
1379 /*
1380  *      Handle redirects
1381  */
1382 struct ip6rd_flowi {
1383         struct flowi fl;
1384         struct in6_addr gateway;
1385 };
1386
1387 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1388                                              struct flowi *fl,
1389                                              int flags)
1390 {
1391         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1392         struct rt6_info *rt;
1393         struct fib6_node *fn;
1394
1395         /*
1396          * Get the "current" route for this destination and
1397          * check if the redirect has come from approriate router.
1398          *
1399          * RFC 2461 specifies that redirects should only be
1400          * accepted if they come from the nexthop to the target.
1401          * Due to the way the routes are chosen, this notion
1402          * is a bit fuzzy and one might need to check all possible
1403          * routes.
1404          */
1405
1406         read_lock_bh(&table->tb6_lock);
1407         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1408 restart:
1409         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1410                 /*
1411                  * Current route is on-link; redirect is always invalid.
1412                  *
1413                  * Seems, previous statement is not true. It could
1414                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1415                  * But then router serving it might decide, that we should
1416                  * know truth 8)8) --ANK (980726).
1417                  */
1418                 if (rt6_check_expired(rt))
1419                         continue;
1420                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1421                         continue;
1422                 if (fl->oif != rt->rt6i_dev->ifindex)
1423                         continue;
1424                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1425                         continue;
1426                 break;
1427         }
1428
1429         if (!rt)
1430                 rt = &ip6_null_entry;
1431         BACKTRACK(&fl->fl6_src);
1432 out:
1433         dst_hold(&rt->u.dst);
1434
1435         read_unlock_bh(&table->tb6_lock);
1436
1437         return rt;
1438 };
1439
1440 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1441                                            struct in6_addr *src,
1442                                            struct in6_addr *gateway,
1443                                            struct net_device *dev)
1444 {
1445         int flags = RT6_LOOKUP_F_HAS_SADDR;
1446         struct ip6rd_flowi rdfl = {
1447                 .fl = {
1448                         .oif = dev->ifindex,
1449                         .nl_u = {
1450                                 .ip6_u = {
1451                                         .daddr = *dest,
1452                                         .saddr = *src,
1453                                 },
1454                         },
1455                 },
1456                 .gateway = *gateway,
1457         };
1458
1459         if (rt6_need_strict(dest))
1460                 flags |= RT6_LOOKUP_F_IFACE;
1461
1462         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1463 }
1464
1465 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1466                   struct in6_addr *saddr,
1467                   struct neighbour *neigh, u8 *lladdr, int on_link)
1468 {
1469         struct rt6_info *rt, *nrt = NULL;
1470         struct netevent_redirect netevent;
1471
1472         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1473
1474         if (rt == &ip6_null_entry) {
1475                 if (net_ratelimit())
1476                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1477                                "for redirect target\n");
1478                 goto out;
1479         }
1480
1481         /*
1482          *      We have finally decided to accept it.
1483          */
1484
1485         neigh_update(neigh, lladdr, NUD_STALE,
1486                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1487                      NEIGH_UPDATE_F_OVERRIDE|
1488                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1489                                      NEIGH_UPDATE_F_ISROUTER))
1490                      );
1491
1492         /*
1493          * Redirect received -> path was valid.
1494          * Look, redirects are sent only in response to data packets,
1495          * so that this nexthop apparently is reachable. --ANK
1496          */
1497         dst_confirm(&rt->u.dst);
1498
1499         /* Duplicate redirect: silently ignore. */
1500         if (neigh == rt->u.dst.neighbour)
1501                 goto out;
1502
1503         nrt = ip6_rt_copy(rt);
1504         if (nrt == NULL)
1505                 goto out;
1506
1507         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1508         if (on_link)
1509                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1510
1511         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1512         nrt->rt6i_dst.plen = 128;
1513         nrt->u.dst.flags |= DST_HOST;
1514
1515         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1516         nrt->rt6i_nexthop = neigh_clone(neigh);
1517         /* Reset pmtu, it may be better */
1518         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1519         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1520
1521         if (ip6_ins_rt(nrt))
1522                 goto out;
1523
1524         netevent.old = &rt->u.dst;
1525         netevent.new = &nrt->u.dst;
1526         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1527
1528         if (rt->rt6i_flags&RTF_CACHE) {
1529                 ip6_del_rt(rt);
1530                 return;
1531         }
1532
1533 out:
1534         dst_release(&rt->u.dst);
1535         return;
1536 }
1537
1538 /*
1539  *      Handle ICMP "packet too big" messages
1540  *      i.e. Path MTU discovery
1541  */
1542
1543 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1544                         struct net_device *dev, u32 pmtu)
1545 {
1546         struct rt6_info *rt, *nrt;
1547         int allfrag = 0;
1548
1549         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1550         if (rt == NULL)
1551                 return;
1552
1553         if (pmtu >= dst_mtu(&rt->u.dst))
1554                 goto out;
1555
1556         if (pmtu < IPV6_MIN_MTU) {
1557                 /*
1558                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1559                  * MTU (1280) and a fragment header should always be included
1560                  * after a node receiving Too Big message reporting PMTU is
1561                  * less than the IPv6 Minimum Link MTU.
1562                  */
1563                 pmtu = IPV6_MIN_MTU;
1564                 allfrag = 1;
1565         }
1566
1567         /* New mtu received -> path was valid.
1568            They are sent only in response to data packets,
1569            so that this nexthop apparently is reachable. --ANK
1570          */
1571         dst_confirm(&rt->u.dst);
1572
1573         /* Host route. If it is static, it would be better
1574            not to override it, but add new one, so that
1575            when cache entry will expire old pmtu
1576            would return automatically.
1577          */
1578         if (rt->rt6i_flags & RTF_CACHE) {
1579                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1580                 if (allfrag)
1581                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1582                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1583                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1584                 goto out;
1585         }
1586
1587         /* Network route.
1588            Two cases are possible:
1589            1. It is connected route. Action: COW
1590            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1591          */
1592         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1593                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1594         else
1595                 nrt = rt6_alloc_clone(rt, daddr);
1596
1597         if (nrt) {
1598                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1599                 if (allfrag)
1600                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1601
1602                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1603                  * happened within 5 mins, the recommended timer is 10 mins.
1604                  * Here this route expiration time is set to ip6_rt_mtu_expires
1605                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1606                  * and detecting PMTU increase will be automatically happened.
1607                  */
1608                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1609                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1610
1611                 ip6_ins_rt(nrt);
1612         }
1613 out:
1614         dst_release(&rt->u.dst);
1615 }
1616
1617 /*
1618  *      Misc support functions
1619  */
1620
1621 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1622 {
1623         struct rt6_info *rt = ip6_dst_alloc();
1624
1625         if (rt) {
1626                 rt->u.dst.input = ort->u.dst.input;
1627                 rt->u.dst.output = ort->u.dst.output;
1628
1629                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1630                 rt->u.dst.error = ort->u.dst.error;
1631                 rt->u.dst.dev = ort->u.dst.dev;
1632                 if (rt->u.dst.dev)
1633                         dev_hold(rt->u.dst.dev);
1634                 rt->rt6i_idev = ort->rt6i_idev;
1635                 if (rt->rt6i_idev)
1636                         in6_dev_hold(rt->rt6i_idev);
1637                 rt->u.dst.lastuse = jiffies;
1638                 rt->rt6i_expires = 0;
1639
1640                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1641                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1642                 rt->rt6i_metric = 0;
1643
1644                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1645 #ifdef CONFIG_IPV6_SUBTREES
1646                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1647 #endif
1648                 rt->rt6i_table = ort->rt6i_table;
1649         }
1650         return rt;
1651 }
1652
1653 #ifdef CONFIG_IPV6_ROUTE_INFO
1654 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1655                                            struct in6_addr *gwaddr, int ifindex)
1656 {
1657         struct fib6_node *fn;
1658         struct rt6_info *rt = NULL;
1659         struct fib6_table *table;
1660
1661         table = fib6_get_table(RT6_TABLE_INFO);
1662         if (table == NULL)
1663                 return NULL;
1664
1665         write_lock_bh(&table->tb6_lock);
1666         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1667         if (!fn)
1668                 goto out;
1669
1670         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1671                 if (rt->rt6i_dev->ifindex != ifindex)
1672                         continue;
1673                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1674                         continue;
1675                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1676                         continue;
1677                 dst_hold(&rt->u.dst);
1678                 break;
1679         }
1680 out:
1681         write_unlock_bh(&table->tb6_lock);
1682         return rt;
1683 }
1684
1685 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1686                                            struct in6_addr *gwaddr, int ifindex,
1687                                            unsigned pref)
1688 {
1689         struct fib6_config cfg = {
1690                 .fc_table       = RT6_TABLE_INFO,
1691                 .fc_metric      = 1024,
1692                 .fc_ifindex     = ifindex,
1693                 .fc_dst_len     = prefixlen,
1694                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1695                                   RTF_UP | RTF_PREF(pref),
1696         };
1697
1698         ipv6_addr_copy(&cfg.fc_dst, prefix);
1699         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1700
1701         /* We should treat it as a default route if prefix length is 0. */
1702         if (!prefixlen)
1703                 cfg.fc_flags |= RTF_DEFAULT;
1704
1705         ip6_route_add(&cfg);
1706
1707         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1708 }
1709 #endif
1710
1711 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1712 {
1713         struct rt6_info *rt;
1714         struct fib6_table *table;
1715
1716         table = fib6_get_table(RT6_TABLE_DFLT);
1717         if (table == NULL)
1718                 return NULL;
1719
1720         write_lock_bh(&table->tb6_lock);
1721         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1722                 if (dev == rt->rt6i_dev &&
1723                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1724                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1725                         break;
1726         }
1727         if (rt)
1728                 dst_hold(&rt->u.dst);
1729         write_unlock_bh(&table->tb6_lock);
1730         return rt;
1731 }
1732
1733 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1734                                      struct net_device *dev,
1735                                      unsigned int pref)
1736 {
1737         struct fib6_config cfg = {
1738                 .fc_table       = RT6_TABLE_DFLT,
1739                 .fc_metric      = 1024,
1740                 .fc_ifindex     = dev->ifindex,
1741                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1742                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1743         };
1744
1745         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1746
1747         ip6_route_add(&cfg);
1748
1749         return rt6_get_dflt_router(gwaddr, dev);
1750 }
1751
1752 void rt6_purge_dflt_routers(void)
1753 {
1754         struct rt6_info *rt;
1755         struct fib6_table *table;
1756
1757         /* NOTE: Keep consistent with rt6_get_dflt_router */
1758         table = fib6_get_table(RT6_TABLE_DFLT);
1759         if (table == NULL)
1760                 return;
1761
1762 restart:
1763         read_lock_bh(&table->tb6_lock);
1764         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1765                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1766                         dst_hold(&rt->u.dst);
1767                         read_unlock_bh(&table->tb6_lock);
1768                         ip6_del_rt(rt);
1769                         goto restart;
1770                 }
1771         }
1772         read_unlock_bh(&table->tb6_lock);
1773 }
1774
1775 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1776                                  struct fib6_config *cfg)
1777 {
1778         memset(cfg, 0, sizeof(*cfg));
1779
1780         cfg->fc_table = RT6_TABLE_MAIN;
1781         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1782         cfg->fc_metric = rtmsg->rtmsg_metric;
1783         cfg->fc_expires = rtmsg->rtmsg_info;
1784         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1785         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1786         cfg->fc_flags = rtmsg->rtmsg_flags;
1787
1788         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1789         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1790         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1791 }
1792
1793 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1794 {
1795         struct fib6_config cfg;
1796         struct in6_rtmsg rtmsg;
1797         int err;
1798
1799         switch(cmd) {
1800         case SIOCADDRT:         /* Add a route */
1801         case SIOCDELRT:         /* Delete a route */
1802                 if (!capable(CAP_NET_ADMIN))
1803                         return -EPERM;
1804                 err = copy_from_user(&rtmsg, arg,
1805                                      sizeof(struct in6_rtmsg));
1806                 if (err)
1807                         return -EFAULT;
1808
1809                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1810
1811                 rtnl_lock();
1812                 switch (cmd) {
1813                 case SIOCADDRT:
1814                         err = ip6_route_add(&cfg);
1815                         break;
1816                 case SIOCDELRT:
1817                         err = ip6_route_del(&cfg);
1818                         break;
1819                 default:
1820                         err = -EINVAL;
1821                 }
1822                 rtnl_unlock();
1823
1824                 return err;
1825         }
1826
1827         return -EINVAL;
1828 }
1829
1830 /*
1831  *      Drop the packet on the floor
1832  */
1833
1834 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1835                                int ipstats_mib_noroutes)
1836 {
1837         int type;
1838         switch (ipstats_mib_noroutes) {
1839         case IPSTATS_MIB_INNOROUTES:
1840                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1841                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1842                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1843                         break;
1844                 }
1845                 /* FALLTHROUGH */
1846         case IPSTATS_MIB_OUTNOROUTES:
1847                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1848                 break;
1849         }
1850         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1851         kfree_skb(skb);
1852         return 0;
1853 }
1854
1855 static int ip6_pkt_discard(struct sk_buff *skb)
1856 {
1857         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1858 }
1859
1860 static int ip6_pkt_discard_out(struct sk_buff *skb)
1861 {
1862         skb->dev = skb->dst->dev;
1863         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1864 }
1865
1866 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1867
1868 static int ip6_pkt_prohibit(struct sk_buff *skb)
1869 {
1870         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1871 }
1872
1873 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1874 {
1875         skb->dev = skb->dst->dev;
1876         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1877 }
1878
1879 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1880 {
1881         kfree_skb(skb);
1882         return 0;
1883 }
1884
1885 #endif
1886
1887 /*
1888  *      Allocate a dst for local (unicast / anycast) address.
1889  */
1890
1891 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1892                                     const struct in6_addr *addr,
1893                                     int anycast)
1894 {
1895         struct rt6_info *rt = ip6_dst_alloc();
1896
1897         if (rt == NULL)
1898                 return ERR_PTR(-ENOMEM);
1899
1900         dev_hold(&loopback_dev);
1901         in6_dev_hold(idev);
1902
1903         rt->u.dst.flags = DST_HOST;
1904         rt->u.dst.input = ip6_input;
1905         rt->u.dst.output = ip6_output;
1906         rt->rt6i_dev = &loopback_dev;
1907         rt->rt6i_idev = idev;
1908         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1909         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1910         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1911         rt->u.dst.obsolete = -1;
1912
1913         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1914         if (anycast)
1915                 rt->rt6i_flags |= RTF_ANYCAST;
1916         else
1917                 rt->rt6i_flags |= RTF_LOCAL;
1918         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1919         if (rt->rt6i_nexthop == NULL) {
1920                 dst_free(&rt->u.dst);
1921                 return ERR_PTR(-ENOMEM);
1922         }
1923
1924         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1925         rt->rt6i_dst.plen = 128;
1926         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1927
1928         atomic_set(&rt->u.dst.__refcnt, 1);
1929
1930         return rt;
1931 }
1932
1933 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1934 {
1935         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1936             rt != &ip6_null_entry) {
1937                 RT6_TRACE("deleted by ifdown %p\n", rt);
1938                 return -1;
1939         }
1940         return 0;
1941 }
1942
1943 void rt6_ifdown(struct net_device *dev)
1944 {
1945         fib6_clean_all(fib6_ifdown, 0, dev);
1946 }
1947
1948 struct rt6_mtu_change_arg
1949 {
1950         struct net_device *dev;
1951         unsigned mtu;
1952 };
1953
1954 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1955 {
1956         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1957         struct inet6_dev *idev;
1958
1959         /* In IPv6 pmtu discovery is not optional,
1960            so that RTAX_MTU lock cannot disable it.
1961            We still use this lock to block changes
1962            caused by addrconf/ndisc.
1963         */
1964
1965         idev = __in6_dev_get(arg->dev);
1966         if (idev == NULL)
1967                 return 0;
1968
1969         /* For administrative MTU increase, there is no way to discover
1970            IPv6 PMTU increase, so PMTU increase should be updated here.
1971            Since RFC 1981 doesn't include administrative MTU increase
1972            update PMTU increase is a MUST. (i.e. jumbo frame)
1973          */
1974         /*
1975            If new MTU is less than route PMTU, this new MTU will be the
1976            lowest MTU in the path, update the route PMTU to reflect PMTU
1977            decreases; if new MTU is greater than route PMTU, and the
1978            old MTU is the lowest MTU in the path, update the route PMTU
1979            to reflect the increase. In this case if the other nodes' MTU
1980            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1981            PMTU discouvery.
1982          */
1983         if (rt->rt6i_dev == arg->dev &&
1984             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1985             (dst_mtu(&rt->u.dst) > arg->mtu ||
1986              (dst_mtu(&rt->u.dst) < arg->mtu &&
1987               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1988                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1989                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1990         }
1991         return 0;
1992 }
1993
1994 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1995 {
1996         struct rt6_mtu_change_arg arg = {
1997                 .dev = dev,
1998                 .mtu = mtu,
1999         };
2000
2001         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
2002 }
2003
2004 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2005         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2006         [RTA_OIF]               = { .type = NLA_U32 },
2007         [RTA_IIF]               = { .type = NLA_U32 },
2008         [RTA_PRIORITY]          = { .type = NLA_U32 },
2009         [RTA_METRICS]           = { .type = NLA_NESTED },
2010 };
2011
2012 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2013                               struct fib6_config *cfg)
2014 {
2015         struct rtmsg *rtm;
2016         struct nlattr *tb[RTA_MAX+1];
2017         int err;
2018
2019         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2020         if (err < 0)
2021                 goto errout;
2022
2023         err = -EINVAL;
2024         rtm = nlmsg_data(nlh);
2025         memset(cfg, 0, sizeof(*cfg));
2026
2027         cfg->fc_table = rtm->rtm_table;
2028         cfg->fc_dst_len = rtm->rtm_dst_len;
2029         cfg->fc_src_len = rtm->rtm_src_len;
2030         cfg->fc_flags = RTF_UP;
2031         cfg->fc_protocol = rtm->rtm_protocol;
2032
2033         if (rtm->rtm_type == RTN_UNREACHABLE)
2034                 cfg->fc_flags |= RTF_REJECT;
2035
2036         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2037         cfg->fc_nlinfo.nlh = nlh;
2038
2039         if (tb[RTA_GATEWAY]) {
2040                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2041                 cfg->fc_flags |= RTF_GATEWAY;
2042         }
2043
2044         if (tb[RTA_DST]) {
2045                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2046
2047                 if (nla_len(tb[RTA_DST]) < plen)
2048                         goto errout;
2049
2050                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2051         }
2052
2053         if (tb[RTA_SRC]) {
2054                 int plen = (rtm->rtm_src_len + 7) >> 3;
2055
2056                 if (nla_len(tb[RTA_SRC]) < plen)
2057                         goto errout;
2058
2059                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2060         }
2061
2062         if (tb[RTA_OIF])
2063                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2064
2065         if (tb[RTA_PRIORITY])
2066                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2067
2068         if (tb[RTA_METRICS]) {
2069                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2070                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2071         }
2072
2073         if (tb[RTA_TABLE])
2074                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2075
2076         err = 0;
2077 errout:
2078         return err;
2079 }
2080
2081 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2082 {
2083         struct fib6_config cfg;
2084         int err;
2085
2086         err = rtm_to_fib6_config(skb, nlh, &cfg);
2087         if (err < 0)
2088                 return err;
2089
2090         return ip6_route_del(&cfg);
2091 }
2092
2093 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2094 {
2095         struct fib6_config cfg;
2096         int err;
2097
2098         err = rtm_to_fib6_config(skb, nlh, &cfg);
2099         if (err < 0)
2100                 return err;
2101
2102         return ip6_route_add(&cfg);
2103 }
2104
2105 static inline size_t rt6_nlmsg_size(void)
2106 {
2107         return NLMSG_ALIGN(sizeof(struct rtmsg))
2108                + nla_total_size(16) /* RTA_SRC */
2109                + nla_total_size(16) /* RTA_DST */
2110                + nla_total_size(16) /* RTA_GATEWAY */
2111                + nla_total_size(16) /* RTA_PREFSRC */
2112                + nla_total_size(4) /* RTA_TABLE */
2113                + nla_total_size(4) /* RTA_IIF */
2114                + nla_total_size(4) /* RTA_OIF */
2115                + nla_total_size(4) /* RTA_PRIORITY */
2116                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2117                + nla_total_size(sizeof(struct rta_cacheinfo));
2118 }
2119
2120 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2121                          struct in6_addr *dst, struct in6_addr *src,
2122                          int iif, int type, u32 pid, u32 seq,
2123                          int prefix, unsigned int flags)
2124 {
2125         struct rtmsg *rtm;
2126         struct nlmsghdr *nlh;
2127         long expires;
2128         u32 table;
2129
2130         if (prefix) {   /* user wants prefix routes only */
2131                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2132                         /* success since this is not a prefix route */
2133                         return 1;
2134                 }
2135         }
2136
2137         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2138         if (nlh == NULL)
2139                 return -EMSGSIZE;
2140
2141         rtm = nlmsg_data(nlh);
2142         rtm->rtm_family = AF_INET6;
2143         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2144         rtm->rtm_src_len = rt->rt6i_src.plen;
2145         rtm->rtm_tos = 0;
2146         if (rt->rt6i_table)
2147                 table = rt->rt6i_table->tb6_id;
2148         else
2149                 table = RT6_TABLE_UNSPEC;
2150         rtm->rtm_table = table;
2151         NLA_PUT_U32(skb, RTA_TABLE, table);
2152         if (rt->rt6i_flags&RTF_REJECT)
2153                 rtm->rtm_type = RTN_UNREACHABLE;
2154         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2155                 rtm->rtm_type = RTN_LOCAL;
2156         else
2157                 rtm->rtm_type = RTN_UNICAST;
2158         rtm->rtm_flags = 0;
2159         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2160         rtm->rtm_protocol = rt->rt6i_protocol;
2161         if (rt->rt6i_flags&RTF_DYNAMIC)
2162                 rtm->rtm_protocol = RTPROT_REDIRECT;
2163         else if (rt->rt6i_flags & RTF_ADDRCONF)
2164                 rtm->rtm_protocol = RTPROT_KERNEL;
2165         else if (rt->rt6i_flags&RTF_DEFAULT)
2166                 rtm->rtm_protocol = RTPROT_RA;
2167
2168         if (rt->rt6i_flags&RTF_CACHE)
2169                 rtm->rtm_flags |= RTM_F_CLONED;
2170
2171         if (dst) {
2172                 NLA_PUT(skb, RTA_DST, 16, dst);
2173                 rtm->rtm_dst_len = 128;
2174         } else if (rtm->rtm_dst_len)
2175                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2176 #ifdef CONFIG_IPV6_SUBTREES
2177         if (src) {
2178                 NLA_PUT(skb, RTA_SRC, 16, src);
2179                 rtm->rtm_src_len = 128;
2180         } else if (rtm->rtm_src_len)
2181                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2182 #endif
2183         if (iif)
2184                 NLA_PUT_U32(skb, RTA_IIF, iif);
2185         else if (dst) {
2186                 struct in6_addr saddr_buf;
2187                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2188                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2189         }
2190
2191         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2192                 goto nla_put_failure;
2193
2194         if (rt->u.dst.neighbour)
2195                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2196
2197         if (rt->u.dst.dev)
2198                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2199
2200         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2201
2202         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2203         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2204                                expires, rt->u.dst.error) < 0)
2205                 goto nla_put_failure;
2206
2207         return nlmsg_end(skb, nlh);
2208
2209 nla_put_failure:
2210         nlmsg_cancel(skb, nlh);
2211         return -EMSGSIZE;
2212 }
2213
2214 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2215 {
2216         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2217         int prefix;
2218
2219         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2220                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2221                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2222         } else
2223                 prefix = 0;
2224
2225         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2226                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2227                      prefix, NLM_F_MULTI);
2228 }
2229
2230 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2231 {
2232         struct nlattr *tb[RTA_MAX+1];
2233         struct rt6_info *rt;
2234         struct sk_buff *skb;
2235         struct rtmsg *rtm;
2236         struct flowi fl;
2237         int err, iif = 0;
2238
2239         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2240         if (err < 0)
2241                 goto errout;
2242
2243         err = -EINVAL;
2244         memset(&fl, 0, sizeof(fl));
2245
2246         if (tb[RTA_SRC]) {
2247                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2248                         goto errout;
2249
2250                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2251         }
2252
2253         if (tb[RTA_DST]) {
2254                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2255                         goto errout;
2256
2257                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2258         }
2259
2260         if (tb[RTA_IIF])
2261                 iif = nla_get_u32(tb[RTA_IIF]);
2262
2263         if (tb[RTA_OIF])
2264                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2265
2266         if (iif) {
2267                 struct net_device *dev;
2268                 dev = __dev_get_by_index(&init_net, iif);
2269                 if (!dev) {
2270                         err = -ENODEV;
2271                         goto errout;
2272                 }
2273         }
2274
2275         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2276         if (skb == NULL) {
2277                 err = -ENOBUFS;
2278                 goto errout;
2279         }
2280
2281         /* Reserve room for dummy headers, this skb can pass
2282            through good chunk of routing engine.
2283          */
2284         skb_reset_mac_header(skb);
2285         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2286
2287         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2288         skb->dst = &rt->u.dst;
2289
2290         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2291                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2292                             nlh->nlmsg_seq, 0, 0);
2293         if (err < 0) {
2294                 kfree_skb(skb);
2295                 goto errout;
2296         }
2297
2298         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2299 errout:
2300         return err;
2301 }
2302
2303 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2304 {
2305         struct sk_buff *skb;
2306         u32 pid = 0, seq = 0;
2307         struct nlmsghdr *nlh = NULL;
2308         int err = -ENOBUFS;
2309
2310         if (info) {
2311                 pid = info->pid;
2312                 nlh = info->nlh;
2313                 if (nlh)
2314                         seq = nlh->nlmsg_seq;
2315         }
2316
2317         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2318         if (skb == NULL)
2319                 goto errout;
2320
2321         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2322         if (err < 0) {
2323                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2324                 WARN_ON(err == -EMSGSIZE);
2325                 kfree_skb(skb);
2326                 goto errout;
2327         }
2328         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2329 errout:
2330         if (err < 0)
2331                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2332 }
2333
2334 /*
2335  *      /proc
2336  */
2337
2338 #ifdef CONFIG_PROC_FS
2339
2340 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2341
2342 struct rt6_proc_arg
2343 {
2344         char *buffer;
2345         int offset;
2346         int length;
2347         int skip;
2348         int len;
2349 };
2350
2351 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2352 {
2353         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2354
2355         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2356                 arg->skip++;
2357                 return 0;
2358         }
2359
2360         if (arg->len >= arg->length)
2361                 return 0;
2362
2363         arg->len += sprintf(arg->buffer + arg->len,
2364                             NIP6_SEQFMT " %02x ",
2365                             NIP6(rt->rt6i_dst.addr),
2366                             rt->rt6i_dst.plen);
2367
2368 #ifdef CONFIG_IPV6_SUBTREES
2369         arg->len += sprintf(arg->buffer + arg->len,
2370                             NIP6_SEQFMT " %02x ",
2371                             NIP6(rt->rt6i_src.addr),
2372                             rt->rt6i_src.plen);
2373 #else
2374         arg->len += sprintf(arg->buffer + arg->len,
2375                             "00000000000000000000000000000000 00 ");
2376 #endif
2377
2378         if (rt->rt6i_nexthop) {
2379                 arg->len += sprintf(arg->buffer + arg->len,
2380                                     NIP6_SEQFMT,
2381                                     NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2382         } else {
2383                 arg->len += sprintf(arg->buffer + arg->len,
2384                                     "00000000000000000000000000000000");
2385         }
2386         arg->len += sprintf(arg->buffer + arg->len,
2387                             " %08x %08x %08x %08x %8s\n",
2388                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2389                             rt->u.dst.__use, rt->rt6i_flags,
2390                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2391         return 0;
2392 }
2393
2394 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2395 {
2396         struct rt6_proc_arg arg = {
2397                 .buffer = buffer,
2398                 .offset = offset,
2399                 .length = length,
2400         };
2401
2402         fib6_clean_all(rt6_info_route, 0, &arg);
2403
2404         *start = buffer;
2405         if (offset)
2406                 *start += offset % RT6_INFO_LEN;
2407
2408         arg.len -= offset % RT6_INFO_LEN;
2409
2410         if (arg.len > length)
2411                 arg.len = length;
2412         if (arg.len < 0)
2413                 arg.len = 0;
2414
2415         return arg.len;
2416 }
2417
2418 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2419 {
2420         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2421                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2422                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2423                       rt6_stats.fib_rt_cache,
2424                       atomic_read(&ip6_dst_ops.entries),
2425                       rt6_stats.fib_discarded_routes);
2426
2427         return 0;
2428 }
2429
2430 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2431 {
2432         return single_open(file, rt6_stats_seq_show, NULL);
2433 }
2434
2435 static const struct file_operations rt6_stats_seq_fops = {
2436         .owner   = THIS_MODULE,
2437         .open    = rt6_stats_seq_open,
2438         .read    = seq_read,
2439         .llseek  = seq_lseek,
2440         .release = single_release,
2441 };
2442 #endif  /* CONFIG_PROC_FS */
2443
2444 #ifdef CONFIG_SYSCTL
2445
2446 static int flush_delay;
2447
2448 static
2449 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2450                               void __user *buffer, size_t *lenp, loff_t *ppos)
2451 {
2452         if (write) {
2453                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2454                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2455                 return 0;
2456         } else
2457                 return -EINVAL;
2458 }
2459
2460 ctl_table ipv6_route_table[] = {
2461         {
2462                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH,
2463                 .procname       =       "flush",
2464                 .data           =       &flush_delay,
2465                 .maxlen         =       sizeof(int),
2466                 .mode           =       0200,
2467                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2468         },
2469         {
2470                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2471                 .procname       =       "gc_thresh",
2472                 .data           =       &ip6_dst_ops.gc_thresh,
2473                 .maxlen         =       sizeof(int),
2474                 .mode           =       0644,
2475                 .proc_handler   =       &proc_dointvec,
2476         },
2477         {
2478                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2479                 .procname       =       "max_size",
2480                 .data           =       &ip6_rt_max_size,
2481                 .maxlen         =       sizeof(int),
2482                 .mode           =       0644,
2483                 .proc_handler   =       &proc_dointvec,
2484         },
2485         {
2486                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2487                 .procname       =       "gc_min_interval",
2488                 .data           =       &ip6_rt_gc_min_interval,
2489                 .maxlen         =       sizeof(int),
2490                 .mode           =       0644,
2491                 .proc_handler   =       &proc_dointvec_jiffies,
2492                 .strategy       =       &sysctl_jiffies,
2493         },
2494         {
2495                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2496                 .procname       =       "gc_timeout",
2497                 .data           =       &ip6_rt_gc_timeout,
2498                 .maxlen         =       sizeof(int),
2499                 .mode           =       0644,
2500                 .proc_handler   =       &proc_dointvec_jiffies,
2501                 .strategy       =       &sysctl_jiffies,
2502         },
2503         {
2504                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2505                 .procname       =       "gc_interval",
2506                 .data           =       &ip6_rt_gc_interval,
2507                 .maxlen         =       sizeof(int),
2508                 .mode           =       0644,
2509                 .proc_handler   =       &proc_dointvec_jiffies,
2510                 .strategy       =       &sysctl_jiffies,
2511         },
2512         {
2513                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2514                 .procname       =       "gc_elasticity",
2515                 .data           =       &ip6_rt_gc_elasticity,
2516                 .maxlen         =       sizeof(int),
2517                 .mode           =       0644,
2518                 .proc_handler   =       &proc_dointvec_jiffies,
2519                 .strategy       =       &sysctl_jiffies,
2520         },
2521         {
2522                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2523                 .procname       =       "mtu_expires",
2524                 .data           =       &ip6_rt_mtu_expires,
2525                 .maxlen         =       sizeof(int),
2526                 .mode           =       0644,
2527                 .proc_handler   =       &proc_dointvec_jiffies,
2528                 .strategy       =       &sysctl_jiffies,
2529         },
2530         {
2531                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2532                 .procname       =       "min_adv_mss",
2533                 .data           =       &ip6_rt_min_advmss,
2534                 .maxlen         =       sizeof(int),
2535                 .mode           =       0644,
2536                 .proc_handler   =       &proc_dointvec_jiffies,
2537                 .strategy       =       &sysctl_jiffies,
2538         },
2539         {
2540                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2541                 .procname       =       "gc_min_interval_ms",
2542                 .data           =       &ip6_rt_gc_min_interval,
2543                 .maxlen         =       sizeof(int),
2544                 .mode           =       0644,
2545                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2546                 .strategy       =       &sysctl_ms_jiffies,
2547         },
2548         { .ctl_name = 0 }
2549 };
2550
2551 #endif
2552
2553 void __init ip6_route_init(void)
2554 {
2555 #ifdef  CONFIG_PROC_FS
2556         struct proc_dir_entry *p;
2557 #endif
2558         ip6_dst_ops.kmem_cachep =
2559                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2560                                   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2561         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2562
2563         fib6_init();
2564 #ifdef  CONFIG_PROC_FS
2565         p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
2566         if (p)
2567                 p->owner = THIS_MODULE;
2568
2569         proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2570 #endif
2571 #ifdef CONFIG_XFRM
2572         xfrm6_init();
2573 #endif
2574 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2575         fib6_rules_init();
2576 #endif
2577
2578         __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2579         __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2580         __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2581 }
2582
2583 void ip6_route_cleanup(void)
2584 {
2585 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2586         fib6_rules_cleanup();
2587 #endif
2588 #ifdef CONFIG_PROC_FS
2589         proc_net_remove(&init_net, "ipv6_route");
2590         proc_net_remove(&init_net, "rt6_stats");
2591 #endif
2592 #ifdef CONFIG_XFRM
2593         xfrm6_fini();
2594 #endif
2595         rt6_ifdown(NULL);
2596         fib6_gc_cleanup();
2597         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2598 }