]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
Merge branch 'master' of git://git.infradead.org/~dedekind/ubi-2.6
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 /* Set to 3 to get tracing. */
68 #define RT6_DEBUG 2
69
70 #if RT6_DEBUG >= 3
71 #define RDBG(x) printk x
72 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
73 #else
74 #define RDBG(x)
75 #define RT6_TRACE(x...) do { ; } while (0)
76 #endif
77
78 #define CLONE_OFFLINK_ROUTE 0
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103                                            struct in6_addr *gwaddr, int ifindex,
104                                            unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106                                            struct in6_addr *gwaddr, int ifindex);
107 #endif
108
109 static struct dst_ops ip6_dst_ops = {
110         .family                 =       AF_INET6,
111         .protocol               =       __constant_htons(ETH_P_IPV6),
112         .gc                     =       ip6_dst_gc,
113         .gc_thresh              =       1024,
114         .check                  =       ip6_dst_check,
115         .destroy                =       ip6_dst_destroy,
116         .ifdown                 =       ip6_dst_ifdown,
117         .negative_advice        =       ip6_negative_advice,
118         .link_failure           =       ip6_link_failure,
119         .update_pmtu            =       ip6_rt_update_pmtu,
120         .entry_size             =       sizeof(struct rt6_info),
121 };
122
123 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
124 {
125 }
126
127 static struct dst_ops ip6_dst_blackhole_ops = {
128         .family                 =       AF_INET6,
129         .protocol               =       __constant_htons(ETH_P_IPV6),
130         .destroy                =       ip6_dst_destroy,
131         .check                  =       ip6_dst_check,
132         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
133         .entry_size             =       sizeof(struct rt6_info),
134 };
135
136 struct rt6_info ip6_null_entry = {
137         .u = {
138                 .dst = {
139                         .__refcnt       = ATOMIC_INIT(1),
140                         .__use          = 1,
141                         .obsolete       = -1,
142                         .error          = -ENETUNREACH,
143                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
144                         .input          = ip6_pkt_discard,
145                         .output         = ip6_pkt_discard_out,
146                         .ops            = &ip6_dst_ops,
147                         .path           = (struct dst_entry*)&ip6_null_entry,
148                 }
149         },
150         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
151         .rt6i_metric    = ~(u32) 0,
152         .rt6i_ref       = ATOMIC_INIT(1),
153 };
154
155 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
156
157 static int ip6_pkt_prohibit(struct sk_buff *skb);
158 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159 static int ip6_pkt_blk_hole(struct sk_buff *skb);
160
161 struct rt6_info ip6_prohibit_entry = {
162         .u = {
163                 .dst = {
164                         .__refcnt       = ATOMIC_INIT(1),
165                         .__use          = 1,
166                         .obsolete       = -1,
167                         .error          = -EACCES,
168                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
169                         .input          = ip6_pkt_prohibit,
170                         .output         = ip6_pkt_prohibit_out,
171                         .ops            = &ip6_dst_ops,
172                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
173                 }
174         },
175         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
176         .rt6i_metric    = ~(u32) 0,
177         .rt6i_ref       = ATOMIC_INIT(1),
178 };
179
180 struct rt6_info ip6_blk_hole_entry = {
181         .u = {
182                 .dst = {
183                         .__refcnt       = ATOMIC_INIT(1),
184                         .__use          = 1,
185                         .obsolete       = -1,
186                         .error          = -EINVAL,
187                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
188                         .input          = ip6_pkt_blk_hole,
189                         .output         = ip6_pkt_blk_hole,
190                         .ops            = &ip6_dst_ops,
191                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
192                 }
193         },
194         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
195         .rt6i_metric    = ~(u32) 0,
196         .rt6i_ref       = ATOMIC_INIT(1),
197 };
198
199 #endif
200
201 /* allocate dst with ip6_dst_ops */
202 static __inline__ struct rt6_info *ip6_dst_alloc(void)
203 {
204         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
205 }
206
207 static void ip6_dst_destroy(struct dst_entry *dst)
208 {
209         struct rt6_info *rt = (struct rt6_info *)dst;
210         struct inet6_dev *idev = rt->rt6i_idev;
211
212         if (idev != NULL) {
213                 rt->rt6i_idev = NULL;
214                 in6_dev_put(idev);
215         }
216 }
217
218 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
219                            int how)
220 {
221         struct rt6_info *rt = (struct rt6_info *)dst;
222         struct inet6_dev *idev = rt->rt6i_idev;
223
224         if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) {
225                 struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
226                 if (loopback_idev != NULL) {
227                         rt->rt6i_idev = loopback_idev;
228                         in6_dev_put(idev);
229                 }
230         }
231 }
232
233 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234 {
235         return (rt->rt6i_flags & RTF_EXPIRES &&
236                 time_after(jiffies, rt->rt6i_expires));
237 }
238
239 static inline int rt6_need_strict(struct in6_addr *daddr)
240 {
241         return (ipv6_addr_type(daddr) &
242                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
243 }
244
245 /*
246  *      Route lookup. Any table->tb6_lock is implied.
247  */
248
249 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
250                                                     int oif,
251                                                     int strict)
252 {
253         struct rt6_info *local = NULL;
254         struct rt6_info *sprt;
255
256         if (oif) {
257                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
258                         struct net_device *dev = sprt->rt6i_dev;
259                         if (dev->ifindex == oif)
260                                 return sprt;
261                         if (dev->flags & IFF_LOOPBACK) {
262                                 if (sprt->rt6i_idev == NULL ||
263                                     sprt->rt6i_idev->dev->ifindex != oif) {
264                                         if (strict && oif)
265                                                 continue;
266                                         if (local && (!oif ||
267                                                       local->rt6i_idev->dev->ifindex == oif))
268                                                 continue;
269                                 }
270                                 local = sprt;
271                         }
272                 }
273
274                 if (local)
275                         return local;
276
277                 if (strict)
278                         return &ip6_null_entry;
279         }
280         return rt;
281 }
282
283 #ifdef CONFIG_IPV6_ROUTER_PREF
284 static void rt6_probe(struct rt6_info *rt)
285 {
286         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
287         /*
288          * Okay, this does not seem to be appropriate
289          * for now, however, we need to check if it
290          * is really so; aka Router Reachability Probing.
291          *
292          * Router Reachability Probe MUST be rate-limited
293          * to no more than one per minute.
294          */
295         if (!neigh || (neigh->nud_state & NUD_VALID))
296                 return;
297         read_lock_bh(&neigh->lock);
298         if (!(neigh->nud_state & NUD_VALID) &&
299             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
300                 struct in6_addr mcaddr;
301                 struct in6_addr *target;
302
303                 neigh->updated = jiffies;
304                 read_unlock_bh(&neigh->lock);
305
306                 target = (struct in6_addr *)&neigh->primary_key;
307                 addrconf_addr_solict_mult(target, &mcaddr);
308                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
309         } else
310                 read_unlock_bh(&neigh->lock);
311 }
312 #else
313 static inline void rt6_probe(struct rt6_info *rt)
314 {
315         return;
316 }
317 #endif
318
319 /*
320  * Default Router Selection (RFC 2461 6.3.6)
321  */
322 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
323 {
324         struct net_device *dev = rt->rt6i_dev;
325         if (!oif || dev->ifindex == oif)
326                 return 2;
327         if ((dev->flags & IFF_LOOPBACK) &&
328             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
329                 return 1;
330         return 0;
331 }
332
333 static inline int rt6_check_neigh(struct rt6_info *rt)
334 {
335         struct neighbour *neigh = rt->rt6i_nexthop;
336         int m = 0;
337         if (rt->rt6i_flags & RTF_NONEXTHOP ||
338             !(rt->rt6i_flags & RTF_GATEWAY))
339                 m = 1;
340         else if (neigh) {
341                 read_lock_bh(&neigh->lock);
342                 if (neigh->nud_state & NUD_VALID)
343                         m = 2;
344                 else if (!(neigh->nud_state & NUD_FAILED))
345                         m = 1;
346                 read_unlock_bh(&neigh->lock);
347         }
348         return m;
349 }
350
351 static int rt6_score_route(struct rt6_info *rt, int oif,
352                            int strict)
353 {
354         int m, n;
355
356         m = rt6_check_dev(rt, oif);
357         if (!m && (strict & RT6_LOOKUP_F_IFACE))
358                 return -1;
359 #ifdef CONFIG_IPV6_ROUTER_PREF
360         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361 #endif
362         n = rt6_check_neigh(rt);
363         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
364                 return -1;
365         return m;
366 }
367
368 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369                                    int *mpri, struct rt6_info *match)
370 {
371         int m;
372
373         if (rt6_check_expired(rt))
374                 goto out;
375
376         m = rt6_score_route(rt, oif, strict);
377         if (m < 0)
378                 goto out;
379
380         if (m > *mpri) {
381                 if (strict & RT6_LOOKUP_F_REACHABLE)
382                         rt6_probe(match);
383                 *mpri = m;
384                 match = rt;
385         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386                 rt6_probe(rt);
387         }
388
389 out:
390         return match;
391 }
392
393 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394                                      struct rt6_info *rr_head,
395                                      u32 metric, int oif, int strict)
396 {
397         struct rt6_info *rt, *match;
398         int mpri = -1;
399
400         match = NULL;
401         for (rt = rr_head; rt && rt->rt6i_metric == metric;
402              rt = rt->u.dst.rt6_next)
403                 match = find_match(rt, oif, strict, &mpri, match);
404         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
405              rt = rt->u.dst.rt6_next)
406                 match = find_match(rt, oif, strict, &mpri, match);
407
408         return match;
409 }
410
411 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412 {
413         struct rt6_info *match, *rt0;
414
415         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416                   __FUNCTION__, fn->leaf, oif);
417
418         rt0 = fn->rr_ptr;
419         if (!rt0)
420                 fn->rr_ptr = rt0 = fn->leaf;
421
422         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
423
424         if (!match &&
425             (strict & RT6_LOOKUP_F_REACHABLE)) {
426                 struct rt6_info *next = rt0->u.dst.rt6_next;
427
428                 /* no entries matched; do round-robin */
429                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
430                         next = fn->leaf;
431
432                 if (next != rt0)
433                         fn->rr_ptr = next;
434         }
435
436         RT6_TRACE("%s() => %p\n",
437                   __FUNCTION__, match);
438
439         return (match ? match : &ip6_null_entry);
440 }
441
442 #ifdef CONFIG_IPV6_ROUTE_INFO
443 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
444                   struct in6_addr *gwaddr)
445 {
446         struct route_info *rinfo = (struct route_info *) opt;
447         struct in6_addr prefix_buf, *prefix;
448         unsigned int pref;
449         u32 lifetime;
450         struct rt6_info *rt;
451
452         if (len < sizeof(struct route_info)) {
453                 return -EINVAL;
454         }
455
456         /* Sanity check for prefix_len and length */
457         if (rinfo->length > 3) {
458                 return -EINVAL;
459         } else if (rinfo->prefix_len > 128) {
460                 return -EINVAL;
461         } else if (rinfo->prefix_len > 64) {
462                 if (rinfo->length < 2) {
463                         return -EINVAL;
464                 }
465         } else if (rinfo->prefix_len > 0) {
466                 if (rinfo->length < 1) {
467                         return -EINVAL;
468                 }
469         }
470
471         pref = rinfo->route_pref;
472         if (pref == ICMPV6_ROUTER_PREF_INVALID)
473                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
474
475         lifetime = ntohl(rinfo->lifetime);
476         if (lifetime == 0xffffffff) {
477                 /* infinity */
478         } else if (lifetime > 0x7fffffff/HZ) {
479                 /* Avoid arithmetic overflow */
480                 lifetime = 0x7fffffff/HZ - 1;
481         }
482
483         if (rinfo->length == 3)
484                 prefix = (struct in6_addr *)rinfo->prefix;
485         else {
486                 /* this function is safe */
487                 ipv6_addr_prefix(&prefix_buf,
488                                  (struct in6_addr *)rinfo->prefix,
489                                  rinfo->prefix_len);
490                 prefix = &prefix_buf;
491         }
492
493         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
494
495         if (rt && !lifetime) {
496                 ip6_del_rt(rt);
497                 rt = NULL;
498         }
499
500         if (!rt && lifetime)
501                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502                                         pref);
503         else if (rt)
504                 rt->rt6i_flags = RTF_ROUTEINFO |
505                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506
507         if (rt) {
508                 if (lifetime == 0xffffffff) {
509                         rt->rt6i_flags &= ~RTF_EXPIRES;
510                 } else {
511                         rt->rt6i_expires = jiffies + HZ * lifetime;
512                         rt->rt6i_flags |= RTF_EXPIRES;
513                 }
514                 dst_release(&rt->u.dst);
515         }
516         return 0;
517 }
518 #endif
519
520 #define BACKTRACK(saddr) \
521 do { \
522         if (rt == &ip6_null_entry) { \
523                 struct fib6_node *pn; \
524                 while (1) { \
525                         if (fn->fn_flags & RTN_TL_ROOT) \
526                                 goto out; \
527                         pn = fn->parent; \
528                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
529                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
530                         else \
531                                 fn = pn; \
532                         if (fn->fn_flags & RTN_RTINFO) \
533                                 goto restart; \
534                 } \
535         } \
536 } while(0)
537
538 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
539                                              struct flowi *fl, int flags)
540 {
541         struct fib6_node *fn;
542         struct rt6_info *rt;
543
544         read_lock_bh(&table->tb6_lock);
545         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
546 restart:
547         rt = fn->leaf;
548         rt = rt6_device_match(rt, fl->oif, flags);
549         BACKTRACK(&fl->fl6_src);
550 out:
551         dst_hold(&rt->u.dst);
552         read_unlock_bh(&table->tb6_lock);
553
554         rt->u.dst.lastuse = jiffies;
555         rt->u.dst.__use++;
556
557         return rt;
558
559 }
560
561 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
562                             int oif, int strict)
563 {
564         struct flowi fl = {
565                 .oif = oif,
566                 .nl_u = {
567                         .ip6_u = {
568                                 .daddr = *daddr,
569                         },
570                 },
571         };
572         struct dst_entry *dst;
573         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
574
575         if (saddr) {
576                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577                 flags |= RT6_LOOKUP_F_HAS_SADDR;
578         }
579
580         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
581         if (dst->error == 0)
582                 return (struct rt6_info *) dst;
583
584         dst_release(dst);
585
586         return NULL;
587 }
588
589 EXPORT_SYMBOL(rt6_lookup);
590
591 /* ip6_ins_rt is called with FREE table->tb6_lock.
592    It takes new route entry, the addition fails by any reason the
593    route is freed. In any case, if caller does not hold it, it may
594    be destroyed.
595  */
596
597 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
598 {
599         int err;
600         struct fib6_table *table;
601
602         table = rt->rt6i_table;
603         write_lock_bh(&table->tb6_lock);
604         err = fib6_add(&table->tb6_root, rt, info);
605         write_unlock_bh(&table->tb6_lock);
606
607         return err;
608 }
609
610 int ip6_ins_rt(struct rt6_info *rt)
611 {
612         return __ip6_ins_rt(rt, NULL);
613 }
614
615 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616                                       struct in6_addr *saddr)
617 {
618         struct rt6_info *rt;
619
620         /*
621          *      Clone the route.
622          */
623
624         rt = ip6_rt_copy(ort);
625
626         if (rt) {
627                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
628                         if (rt->rt6i_dst.plen != 128 &&
629                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
630                                 rt->rt6i_flags |= RTF_ANYCAST;
631                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
632                 }
633
634                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
635                 rt->rt6i_dst.plen = 128;
636                 rt->rt6i_flags |= RTF_CACHE;
637                 rt->u.dst.flags |= DST_HOST;
638
639 #ifdef CONFIG_IPV6_SUBTREES
640                 if (rt->rt6i_src.plen && saddr) {
641                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
642                         rt->rt6i_src.plen = 128;
643                 }
644 #endif
645
646                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
647
648         }
649
650         return rt;
651 }
652
653 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
654 {
655         struct rt6_info *rt = ip6_rt_copy(ort);
656         if (rt) {
657                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
658                 rt->rt6i_dst.plen = 128;
659                 rt->rt6i_flags |= RTF_CACHE;
660                 rt->u.dst.flags |= DST_HOST;
661                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
662         }
663         return rt;
664 }
665
666 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
667                                             struct flowi *fl, int flags)
668 {
669         struct fib6_node *fn;
670         struct rt6_info *rt, *nrt;
671         int strict = 0;
672         int attempts = 3;
673         int err;
674         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
675
676         strict |= flags & RT6_LOOKUP_F_IFACE;
677
678 relookup:
679         read_lock_bh(&table->tb6_lock);
680
681 restart_2:
682         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
683
684 restart:
685         rt = rt6_select(fn, fl->iif, strict | reachable);
686         BACKTRACK(&fl->fl6_src);
687         if (rt == &ip6_null_entry ||
688             rt->rt6i_flags & RTF_CACHE)
689                 goto out;
690
691         dst_hold(&rt->u.dst);
692         read_unlock_bh(&table->tb6_lock);
693
694         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
695                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
696         else {
697 #if CLONE_OFFLINK_ROUTE
698                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
699 #else
700                 goto out2;
701 #endif
702         }
703
704         dst_release(&rt->u.dst);
705         rt = nrt ? : &ip6_null_entry;
706
707         dst_hold(&rt->u.dst);
708         if (nrt) {
709                 err = ip6_ins_rt(nrt);
710                 if (!err)
711                         goto out2;
712         }
713
714         if (--attempts <= 0)
715                 goto out2;
716
717         /*
718          * Race condition! In the gap, when table->tb6_lock was
719          * released someone could insert this route.  Relookup.
720          */
721         dst_release(&rt->u.dst);
722         goto relookup;
723
724 out:
725         if (reachable) {
726                 reachable = 0;
727                 goto restart_2;
728         }
729         dst_hold(&rt->u.dst);
730         read_unlock_bh(&table->tb6_lock);
731 out2:
732         rt->u.dst.lastuse = jiffies;
733         rt->u.dst.__use++;
734
735         return rt;
736 }
737
738 void ip6_route_input(struct sk_buff *skb)
739 {
740         struct ipv6hdr *iph = ipv6_hdr(skb);
741         int flags = RT6_LOOKUP_F_HAS_SADDR;
742         struct flowi fl = {
743                 .iif = skb->dev->ifindex,
744                 .nl_u = {
745                         .ip6_u = {
746                                 .daddr = iph->daddr,
747                                 .saddr = iph->saddr,
748                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
749                         },
750                 },
751                 .mark = skb->mark,
752                 .proto = iph->nexthdr,
753         };
754
755         if (rt6_need_strict(&iph->daddr))
756                 flags |= RT6_LOOKUP_F_IFACE;
757
758         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
759 }
760
761 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
762                                              struct flowi *fl, int flags)
763 {
764         struct fib6_node *fn;
765         struct rt6_info *rt, *nrt;
766         int strict = 0;
767         int attempts = 3;
768         int err;
769         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
770
771         strict |= flags & RT6_LOOKUP_F_IFACE;
772
773 relookup:
774         read_lock_bh(&table->tb6_lock);
775
776 restart_2:
777         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
778
779 restart:
780         rt = rt6_select(fn, fl->oif, strict | reachable);
781         BACKTRACK(&fl->fl6_src);
782         if (rt == &ip6_null_entry ||
783             rt->rt6i_flags & RTF_CACHE)
784                 goto out;
785
786         dst_hold(&rt->u.dst);
787         read_unlock_bh(&table->tb6_lock);
788
789         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
790                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
791         else {
792 #if CLONE_OFFLINK_ROUTE
793                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
794 #else
795                 goto out2;
796 #endif
797         }
798
799         dst_release(&rt->u.dst);
800         rt = nrt ? : &ip6_null_entry;
801
802         dst_hold(&rt->u.dst);
803         if (nrt) {
804                 err = ip6_ins_rt(nrt);
805                 if (!err)
806                         goto out2;
807         }
808
809         if (--attempts <= 0)
810                 goto out2;
811
812         /*
813          * Race condition! In the gap, when table->tb6_lock was
814          * released someone could insert this route.  Relookup.
815          */
816         dst_release(&rt->u.dst);
817         goto relookup;
818
819 out:
820         if (reachable) {
821                 reachable = 0;
822                 goto restart_2;
823         }
824         dst_hold(&rt->u.dst);
825         read_unlock_bh(&table->tb6_lock);
826 out2:
827         rt->u.dst.lastuse = jiffies;
828         rt->u.dst.__use++;
829         return rt;
830 }
831
832 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
833 {
834         int flags = 0;
835
836         if (rt6_need_strict(&fl->fl6_dst))
837                 flags |= RT6_LOOKUP_F_IFACE;
838
839         if (!ipv6_addr_any(&fl->fl6_src))
840                 flags |= RT6_LOOKUP_F_HAS_SADDR;
841
842         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
843 }
844
845 EXPORT_SYMBOL(ip6_route_output);
846
847 static int ip6_blackhole_output(struct sk_buff *skb)
848 {
849         kfree_skb(skb);
850         return 0;
851 }
852
853 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
854 {
855         struct rt6_info *ort = (struct rt6_info *) *dstp;
856         struct rt6_info *rt = (struct rt6_info *)
857                 dst_alloc(&ip6_dst_blackhole_ops);
858         struct dst_entry *new = NULL;
859
860         if (rt) {
861                 new = &rt->u.dst;
862
863                 atomic_set(&new->__refcnt, 1);
864                 new->__use = 1;
865                 new->input = ip6_blackhole_output;
866                 new->output = ip6_blackhole_output;
867
868                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
869                 new->dev = ort->u.dst.dev;
870                 if (new->dev)
871                         dev_hold(new->dev);
872                 rt->rt6i_idev = ort->rt6i_idev;
873                 if (rt->rt6i_idev)
874                         in6_dev_hold(rt->rt6i_idev);
875                 rt->rt6i_expires = 0;
876
877                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
878                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
879                 rt->rt6i_metric = 0;
880
881                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
882 #ifdef CONFIG_IPV6_SUBTREES
883                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
884 #endif
885
886                 dst_free(new);
887         }
888
889         dst_release(*dstp);
890         *dstp = new;
891         return (new ? 0 : -ENOMEM);
892 }
893 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
894
895 /*
896  *      Destination cache support functions
897  */
898
899 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
900 {
901         struct rt6_info *rt;
902
903         rt = (struct rt6_info *) dst;
904
905         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
906                 return dst;
907
908         return NULL;
909 }
910
911 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
912 {
913         struct rt6_info *rt = (struct rt6_info *) dst;
914
915         if (rt) {
916                 if (rt->rt6i_flags & RTF_CACHE)
917                         ip6_del_rt(rt);
918                 else
919                         dst_release(dst);
920         }
921         return NULL;
922 }
923
924 static void ip6_link_failure(struct sk_buff *skb)
925 {
926         struct rt6_info *rt;
927
928         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
929
930         rt = (struct rt6_info *) skb->dst;
931         if (rt) {
932                 if (rt->rt6i_flags&RTF_CACHE) {
933                         dst_set_expires(&rt->u.dst, 0);
934                         rt->rt6i_flags |= RTF_EXPIRES;
935                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
936                         rt->rt6i_node->fn_sernum = -1;
937         }
938 }
939
940 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
941 {
942         struct rt6_info *rt6 = (struct rt6_info*)dst;
943
944         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
945                 rt6->rt6i_flags |= RTF_MODIFIED;
946                 if (mtu < IPV6_MIN_MTU) {
947                         mtu = IPV6_MIN_MTU;
948                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
949                 }
950                 dst->metrics[RTAX_MTU-1] = mtu;
951                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
952         }
953 }
954
955 static int ipv6_get_mtu(struct net_device *dev);
956
957 static inline unsigned int ipv6_advmss(unsigned int mtu)
958 {
959         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
960
961         if (mtu < ip6_rt_min_advmss)
962                 mtu = ip6_rt_min_advmss;
963
964         /*
965          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
966          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
967          * IPV6_MAXPLEN is also valid and means: "any MSS,
968          * rely only on pmtu discovery"
969          */
970         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
971                 mtu = IPV6_MAXPLEN;
972         return mtu;
973 }
974
975 static struct dst_entry *ndisc_dst_gc_list;
976 static DEFINE_SPINLOCK(ndisc_lock);
977
978 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
979                                   struct neighbour *neigh,
980                                   struct in6_addr *addr,
981                                   int (*output)(struct sk_buff *))
982 {
983         struct rt6_info *rt;
984         struct inet6_dev *idev = in6_dev_get(dev);
985
986         if (unlikely(idev == NULL))
987                 return NULL;
988
989         rt = ip6_dst_alloc();
990         if (unlikely(rt == NULL)) {
991                 in6_dev_put(idev);
992                 goto out;
993         }
994
995         dev_hold(dev);
996         if (neigh)
997                 neigh_hold(neigh);
998         else
999                 neigh = ndisc_get_neigh(dev, addr);
1000
1001         rt->rt6i_dev      = dev;
1002         rt->rt6i_idev     = idev;
1003         rt->rt6i_nexthop  = neigh;
1004         atomic_set(&rt->u.dst.__refcnt, 1);
1005         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1006         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1007         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1008         rt->u.dst.output  = output;
1009
1010 #if 0   /* there's no chance to use these for ndisc */
1011         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1012                                 ? DST_HOST
1013                                 : 0;
1014         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1015         rt->rt6i_dst.plen = 128;
1016 #endif
1017
1018         spin_lock_bh(&ndisc_lock);
1019         rt->u.dst.next = ndisc_dst_gc_list;
1020         ndisc_dst_gc_list = &rt->u.dst;
1021         spin_unlock_bh(&ndisc_lock);
1022
1023         fib6_force_start_gc();
1024
1025 out:
1026         return &rt->u.dst;
1027 }
1028
1029 int ndisc_dst_gc(int *more)
1030 {
1031         struct dst_entry *dst, *next, **pprev;
1032         int freed;
1033
1034         next = NULL;
1035         freed = 0;
1036
1037         spin_lock_bh(&ndisc_lock);
1038         pprev = &ndisc_dst_gc_list;
1039
1040         while ((dst = *pprev) != NULL) {
1041                 if (!atomic_read(&dst->__refcnt)) {
1042                         *pprev = dst->next;
1043                         dst_free(dst);
1044                         freed++;
1045                 } else {
1046                         pprev = &dst->next;
1047                         (*more)++;
1048                 }
1049         }
1050
1051         spin_unlock_bh(&ndisc_lock);
1052
1053         return freed;
1054 }
1055
1056 static int ip6_dst_gc(void)
1057 {
1058         static unsigned expire = 30*HZ;
1059         static unsigned long last_gc;
1060         unsigned long now = jiffies;
1061
1062         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1063             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1064                 goto out;
1065
1066         expire++;
1067         fib6_run_gc(expire);
1068         last_gc = now;
1069         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1070                 expire = ip6_rt_gc_timeout>>1;
1071
1072 out:
1073         expire -= expire>>ip6_rt_gc_elasticity;
1074         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1075 }
1076
1077 /* Clean host part of a prefix. Not necessary in radix tree,
1078    but results in cleaner routing tables.
1079
1080    Remove it only when all the things will work!
1081  */
1082
1083 static int ipv6_get_mtu(struct net_device *dev)
1084 {
1085         int mtu = IPV6_MIN_MTU;
1086         struct inet6_dev *idev;
1087
1088         idev = in6_dev_get(dev);
1089         if (idev) {
1090                 mtu = idev->cnf.mtu6;
1091                 in6_dev_put(idev);
1092         }
1093         return mtu;
1094 }
1095
1096 int ipv6_get_hoplimit(struct net_device *dev)
1097 {
1098         int hoplimit = ipv6_devconf.hop_limit;
1099         struct inet6_dev *idev;
1100
1101         idev = in6_dev_get(dev);
1102         if (idev) {
1103                 hoplimit = idev->cnf.hop_limit;
1104                 in6_dev_put(idev);
1105         }
1106         return hoplimit;
1107 }
1108
1109 /*
1110  *
1111  */
1112
1113 int ip6_route_add(struct fib6_config *cfg)
1114 {
1115         int err;
1116         struct rt6_info *rt = NULL;
1117         struct net_device *dev = NULL;
1118         struct inet6_dev *idev = NULL;
1119         struct fib6_table *table;
1120         int addr_type;
1121
1122         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1123                 return -EINVAL;
1124 #ifndef CONFIG_IPV6_SUBTREES
1125         if (cfg->fc_src_len)
1126                 return -EINVAL;
1127 #endif
1128         if (cfg->fc_ifindex) {
1129                 err = -ENODEV;
1130                 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1131                 if (!dev)
1132                         goto out;
1133                 idev = in6_dev_get(dev);
1134                 if (!idev)
1135                         goto out;
1136         }
1137
1138         if (cfg->fc_metric == 0)
1139                 cfg->fc_metric = IP6_RT_PRIO_USER;
1140
1141         table = fib6_new_table(cfg->fc_table);
1142         if (table == NULL) {
1143                 err = -ENOBUFS;
1144                 goto out;
1145         }
1146
1147         rt = ip6_dst_alloc();
1148
1149         if (rt == NULL) {
1150                 err = -ENOMEM;
1151                 goto out;
1152         }
1153
1154         rt->u.dst.obsolete = -1;
1155         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1156
1157         if (cfg->fc_protocol == RTPROT_UNSPEC)
1158                 cfg->fc_protocol = RTPROT_BOOT;
1159         rt->rt6i_protocol = cfg->fc_protocol;
1160
1161         addr_type = ipv6_addr_type(&cfg->fc_dst);
1162
1163         if (addr_type & IPV6_ADDR_MULTICAST)
1164                 rt->u.dst.input = ip6_mc_input;
1165         else
1166                 rt->u.dst.input = ip6_forward;
1167
1168         rt->u.dst.output = ip6_output;
1169
1170         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1171         rt->rt6i_dst.plen = cfg->fc_dst_len;
1172         if (rt->rt6i_dst.plen == 128)
1173                rt->u.dst.flags = DST_HOST;
1174
1175 #ifdef CONFIG_IPV6_SUBTREES
1176         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1177         rt->rt6i_src.plen = cfg->fc_src_len;
1178 #endif
1179
1180         rt->rt6i_metric = cfg->fc_metric;
1181
1182         /* We cannot add true routes via loopback here,
1183            they would result in kernel looping; promote them to reject routes
1184          */
1185         if ((cfg->fc_flags & RTF_REJECT) ||
1186             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1187                 /* hold loopback dev/idev if we haven't done so. */
1188                 if (dev != init_net.loopback_dev) {
1189                         if (dev) {
1190                                 dev_put(dev);
1191                                 in6_dev_put(idev);
1192                         }
1193                         dev = init_net.loopback_dev;
1194                         dev_hold(dev);
1195                         idev = in6_dev_get(dev);
1196                         if (!idev) {
1197                                 err = -ENODEV;
1198                                 goto out;
1199                         }
1200                 }
1201                 rt->u.dst.output = ip6_pkt_discard_out;
1202                 rt->u.dst.input = ip6_pkt_discard;
1203                 rt->u.dst.error = -ENETUNREACH;
1204                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1205                 goto install_route;
1206         }
1207
1208         if (cfg->fc_flags & RTF_GATEWAY) {
1209                 struct in6_addr *gw_addr;
1210                 int gwa_type;
1211
1212                 gw_addr = &cfg->fc_gateway;
1213                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1214                 gwa_type = ipv6_addr_type(gw_addr);
1215
1216                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1217                         struct rt6_info *grt;
1218
1219                         /* IPv6 strictly inhibits using not link-local
1220                            addresses as nexthop address.
1221                            Otherwise, router will not able to send redirects.
1222                            It is very good, but in some (rare!) circumstances
1223                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1224                            some exceptions. --ANK
1225                          */
1226                         err = -EINVAL;
1227                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1228                                 goto out;
1229
1230                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1231
1232                         err = -EHOSTUNREACH;
1233                         if (grt == NULL)
1234                                 goto out;
1235                         if (dev) {
1236                                 if (dev != grt->rt6i_dev) {
1237                                         dst_release(&grt->u.dst);
1238                                         goto out;
1239                                 }
1240                         } else {
1241                                 dev = grt->rt6i_dev;
1242                                 idev = grt->rt6i_idev;
1243                                 dev_hold(dev);
1244                                 in6_dev_hold(grt->rt6i_idev);
1245                         }
1246                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1247                                 err = 0;
1248                         dst_release(&grt->u.dst);
1249
1250                         if (err)
1251                                 goto out;
1252                 }
1253                 err = -EINVAL;
1254                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1255                         goto out;
1256         }
1257
1258         err = -ENODEV;
1259         if (dev == NULL)
1260                 goto out;
1261
1262         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1263                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1264                 if (IS_ERR(rt->rt6i_nexthop)) {
1265                         err = PTR_ERR(rt->rt6i_nexthop);
1266                         rt->rt6i_nexthop = NULL;
1267                         goto out;
1268                 }
1269         }
1270
1271         rt->rt6i_flags = cfg->fc_flags;
1272
1273 install_route:
1274         if (cfg->fc_mx) {
1275                 struct nlattr *nla;
1276                 int remaining;
1277
1278                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1279                         int type = nla_type(nla);
1280
1281                         if (type) {
1282                                 if (type > RTAX_MAX) {
1283                                         err = -EINVAL;
1284                                         goto out;
1285                                 }
1286
1287                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1288                         }
1289                 }
1290         }
1291
1292         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1293                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1294         if (!rt->u.dst.metrics[RTAX_MTU-1])
1295                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1296         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1297                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1298         rt->u.dst.dev = dev;
1299         rt->rt6i_idev = idev;
1300         rt->rt6i_table = table;
1301         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1302
1303 out:
1304         if (dev)
1305                 dev_put(dev);
1306         if (idev)
1307                 in6_dev_put(idev);
1308         if (rt)
1309                 dst_free(&rt->u.dst);
1310         return err;
1311 }
1312
1313 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1314 {
1315         int err;
1316         struct fib6_table *table;
1317
1318         if (rt == &ip6_null_entry)
1319                 return -ENOENT;
1320
1321         table = rt->rt6i_table;
1322         write_lock_bh(&table->tb6_lock);
1323
1324         err = fib6_del(rt, info);
1325         dst_release(&rt->u.dst);
1326
1327         write_unlock_bh(&table->tb6_lock);
1328
1329         return err;
1330 }
1331
1332 int ip6_del_rt(struct rt6_info *rt)
1333 {
1334         return __ip6_del_rt(rt, NULL);
1335 }
1336
1337 static int ip6_route_del(struct fib6_config *cfg)
1338 {
1339         struct fib6_table *table;
1340         struct fib6_node *fn;
1341         struct rt6_info *rt;
1342         int err = -ESRCH;
1343
1344         table = fib6_get_table(cfg->fc_table);
1345         if (table == NULL)
1346                 return err;
1347
1348         read_lock_bh(&table->tb6_lock);
1349
1350         fn = fib6_locate(&table->tb6_root,
1351                          &cfg->fc_dst, cfg->fc_dst_len,
1352                          &cfg->fc_src, cfg->fc_src_len);
1353
1354         if (fn) {
1355                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1356                         if (cfg->fc_ifindex &&
1357                             (rt->rt6i_dev == NULL ||
1358                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1359                                 continue;
1360                         if (cfg->fc_flags & RTF_GATEWAY &&
1361                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1362                                 continue;
1363                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1364                                 continue;
1365                         dst_hold(&rt->u.dst);
1366                         read_unlock_bh(&table->tb6_lock);
1367
1368                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1369                 }
1370         }
1371         read_unlock_bh(&table->tb6_lock);
1372
1373         return err;
1374 }
1375
1376 /*
1377  *      Handle redirects
1378  */
1379 struct ip6rd_flowi {
1380         struct flowi fl;
1381         struct in6_addr gateway;
1382 };
1383
1384 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1385                                              struct flowi *fl,
1386                                              int flags)
1387 {
1388         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1389         struct rt6_info *rt;
1390         struct fib6_node *fn;
1391
1392         /*
1393          * Get the "current" route for this destination and
1394          * check if the redirect has come from approriate router.
1395          *
1396          * RFC 2461 specifies that redirects should only be
1397          * accepted if they come from the nexthop to the target.
1398          * Due to the way the routes are chosen, this notion
1399          * is a bit fuzzy and one might need to check all possible
1400          * routes.
1401          */
1402
1403         read_lock_bh(&table->tb6_lock);
1404         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1405 restart:
1406         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1407                 /*
1408                  * Current route is on-link; redirect is always invalid.
1409                  *
1410                  * Seems, previous statement is not true. It could
1411                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1412                  * But then router serving it might decide, that we should
1413                  * know truth 8)8) --ANK (980726).
1414                  */
1415                 if (rt6_check_expired(rt))
1416                         continue;
1417                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1418                         continue;
1419                 if (fl->oif != rt->rt6i_dev->ifindex)
1420                         continue;
1421                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1422                         continue;
1423                 break;
1424         }
1425
1426         if (!rt)
1427                 rt = &ip6_null_entry;
1428         BACKTRACK(&fl->fl6_src);
1429 out:
1430         dst_hold(&rt->u.dst);
1431
1432         read_unlock_bh(&table->tb6_lock);
1433
1434         return rt;
1435 };
1436
1437 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1438                                            struct in6_addr *src,
1439                                            struct in6_addr *gateway,
1440                                            struct net_device *dev)
1441 {
1442         int flags = RT6_LOOKUP_F_HAS_SADDR;
1443         struct ip6rd_flowi rdfl = {
1444                 .fl = {
1445                         .oif = dev->ifindex,
1446                         .nl_u = {
1447                                 .ip6_u = {
1448                                         .daddr = *dest,
1449                                         .saddr = *src,
1450                                 },
1451                         },
1452                 },
1453                 .gateway = *gateway,
1454         };
1455
1456         if (rt6_need_strict(dest))
1457                 flags |= RT6_LOOKUP_F_IFACE;
1458
1459         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1460 }
1461
1462 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1463                   struct in6_addr *saddr,
1464                   struct neighbour *neigh, u8 *lladdr, int on_link)
1465 {
1466         struct rt6_info *rt, *nrt = NULL;
1467         struct netevent_redirect netevent;
1468
1469         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1470
1471         if (rt == &ip6_null_entry) {
1472                 if (net_ratelimit())
1473                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1474                                "for redirect target\n");
1475                 goto out;
1476         }
1477
1478         /*
1479          *      We have finally decided to accept it.
1480          */
1481
1482         neigh_update(neigh, lladdr, NUD_STALE,
1483                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1484                      NEIGH_UPDATE_F_OVERRIDE|
1485                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1486                                      NEIGH_UPDATE_F_ISROUTER))
1487                      );
1488
1489         /*
1490          * Redirect received -> path was valid.
1491          * Look, redirects are sent only in response to data packets,
1492          * so that this nexthop apparently is reachable. --ANK
1493          */
1494         dst_confirm(&rt->u.dst);
1495
1496         /* Duplicate redirect: silently ignore. */
1497         if (neigh == rt->u.dst.neighbour)
1498                 goto out;
1499
1500         nrt = ip6_rt_copy(rt);
1501         if (nrt == NULL)
1502                 goto out;
1503
1504         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1505         if (on_link)
1506                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1507
1508         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1509         nrt->rt6i_dst.plen = 128;
1510         nrt->u.dst.flags |= DST_HOST;
1511
1512         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1513         nrt->rt6i_nexthop = neigh_clone(neigh);
1514         /* Reset pmtu, it may be better */
1515         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1516         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1517
1518         if (ip6_ins_rt(nrt))
1519                 goto out;
1520
1521         netevent.old = &rt->u.dst;
1522         netevent.new = &nrt->u.dst;
1523         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1524
1525         if (rt->rt6i_flags&RTF_CACHE) {
1526                 ip6_del_rt(rt);
1527                 return;
1528         }
1529
1530 out:
1531         dst_release(&rt->u.dst);
1532         return;
1533 }
1534
1535 /*
1536  *      Handle ICMP "packet too big" messages
1537  *      i.e. Path MTU discovery
1538  */
1539
1540 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1541                         struct net_device *dev, u32 pmtu)
1542 {
1543         struct rt6_info *rt, *nrt;
1544         int allfrag = 0;
1545
1546         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1547         if (rt == NULL)
1548                 return;
1549
1550         if (pmtu >= dst_mtu(&rt->u.dst))
1551                 goto out;
1552
1553         if (pmtu < IPV6_MIN_MTU) {
1554                 /*
1555                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1556                  * MTU (1280) and a fragment header should always be included
1557                  * after a node receiving Too Big message reporting PMTU is
1558                  * less than the IPv6 Minimum Link MTU.
1559                  */
1560                 pmtu = IPV6_MIN_MTU;
1561                 allfrag = 1;
1562         }
1563
1564         /* New mtu received -> path was valid.
1565            They are sent only in response to data packets,
1566            so that this nexthop apparently is reachable. --ANK
1567          */
1568         dst_confirm(&rt->u.dst);
1569
1570         /* Host route. If it is static, it would be better
1571            not to override it, but add new one, so that
1572            when cache entry will expire old pmtu
1573            would return automatically.
1574          */
1575         if (rt->rt6i_flags & RTF_CACHE) {
1576                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1577                 if (allfrag)
1578                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1579                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1580                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1581                 goto out;
1582         }
1583
1584         /* Network route.
1585            Two cases are possible:
1586            1. It is connected route. Action: COW
1587            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1588          */
1589         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1590                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1591         else
1592                 nrt = rt6_alloc_clone(rt, daddr);
1593
1594         if (nrt) {
1595                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1596                 if (allfrag)
1597                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1598
1599                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1600                  * happened within 5 mins, the recommended timer is 10 mins.
1601                  * Here this route expiration time is set to ip6_rt_mtu_expires
1602                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1603                  * and detecting PMTU increase will be automatically happened.
1604                  */
1605                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1606                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1607
1608                 ip6_ins_rt(nrt);
1609         }
1610 out:
1611         dst_release(&rt->u.dst);
1612 }
1613
1614 /*
1615  *      Misc support functions
1616  */
1617
1618 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1619 {
1620         struct rt6_info *rt = ip6_dst_alloc();
1621
1622         if (rt) {
1623                 rt->u.dst.input = ort->u.dst.input;
1624                 rt->u.dst.output = ort->u.dst.output;
1625
1626                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1627                 rt->u.dst.error = ort->u.dst.error;
1628                 rt->u.dst.dev = ort->u.dst.dev;
1629                 if (rt->u.dst.dev)
1630                         dev_hold(rt->u.dst.dev);
1631                 rt->rt6i_idev = ort->rt6i_idev;
1632                 if (rt->rt6i_idev)
1633                         in6_dev_hold(rt->rt6i_idev);
1634                 rt->u.dst.lastuse = jiffies;
1635                 rt->rt6i_expires = 0;
1636
1637                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1638                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1639                 rt->rt6i_metric = 0;
1640
1641                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1642 #ifdef CONFIG_IPV6_SUBTREES
1643                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1644 #endif
1645                 rt->rt6i_table = ort->rt6i_table;
1646         }
1647         return rt;
1648 }
1649
1650 #ifdef CONFIG_IPV6_ROUTE_INFO
1651 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1652                                            struct in6_addr *gwaddr, int ifindex)
1653 {
1654         struct fib6_node *fn;
1655         struct rt6_info *rt = NULL;
1656         struct fib6_table *table;
1657
1658         table = fib6_get_table(RT6_TABLE_INFO);
1659         if (table == NULL)
1660                 return NULL;
1661
1662         write_lock_bh(&table->tb6_lock);
1663         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1664         if (!fn)
1665                 goto out;
1666
1667         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1668                 if (rt->rt6i_dev->ifindex != ifindex)
1669                         continue;
1670                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1671                         continue;
1672                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1673                         continue;
1674                 dst_hold(&rt->u.dst);
1675                 break;
1676         }
1677 out:
1678         write_unlock_bh(&table->tb6_lock);
1679         return rt;
1680 }
1681
1682 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1683                                            struct in6_addr *gwaddr, int ifindex,
1684                                            unsigned pref)
1685 {
1686         struct fib6_config cfg = {
1687                 .fc_table       = RT6_TABLE_INFO,
1688                 .fc_metric      = 1024,
1689                 .fc_ifindex     = ifindex,
1690                 .fc_dst_len     = prefixlen,
1691                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1692                                   RTF_UP | RTF_PREF(pref),
1693         };
1694
1695         ipv6_addr_copy(&cfg.fc_dst, prefix);
1696         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1697
1698         /* We should treat it as a default route if prefix length is 0. */
1699         if (!prefixlen)
1700                 cfg.fc_flags |= RTF_DEFAULT;
1701
1702         ip6_route_add(&cfg);
1703
1704         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1705 }
1706 #endif
1707
1708 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1709 {
1710         struct rt6_info *rt;
1711         struct fib6_table *table;
1712
1713         table = fib6_get_table(RT6_TABLE_DFLT);
1714         if (table == NULL)
1715                 return NULL;
1716
1717         write_lock_bh(&table->tb6_lock);
1718         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1719                 if (dev == rt->rt6i_dev &&
1720                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1721                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1722                         break;
1723         }
1724         if (rt)
1725                 dst_hold(&rt->u.dst);
1726         write_unlock_bh(&table->tb6_lock);
1727         return rt;
1728 }
1729
1730 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1731                                      struct net_device *dev,
1732                                      unsigned int pref)
1733 {
1734         struct fib6_config cfg = {
1735                 .fc_table       = RT6_TABLE_DFLT,
1736                 .fc_metric      = 1024,
1737                 .fc_ifindex     = dev->ifindex,
1738                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1739                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1740         };
1741
1742         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1743
1744         ip6_route_add(&cfg);
1745
1746         return rt6_get_dflt_router(gwaddr, dev);
1747 }
1748
1749 void rt6_purge_dflt_routers(void)
1750 {
1751         struct rt6_info *rt;
1752         struct fib6_table *table;
1753
1754         /* NOTE: Keep consistent with rt6_get_dflt_router */
1755         table = fib6_get_table(RT6_TABLE_DFLT);
1756         if (table == NULL)
1757                 return;
1758
1759 restart:
1760         read_lock_bh(&table->tb6_lock);
1761         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1762                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1763                         dst_hold(&rt->u.dst);
1764                         read_unlock_bh(&table->tb6_lock);
1765                         ip6_del_rt(rt);
1766                         goto restart;
1767                 }
1768         }
1769         read_unlock_bh(&table->tb6_lock);
1770 }
1771
1772 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1773                                  struct fib6_config *cfg)
1774 {
1775         memset(cfg, 0, sizeof(*cfg));
1776
1777         cfg->fc_table = RT6_TABLE_MAIN;
1778         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1779         cfg->fc_metric = rtmsg->rtmsg_metric;
1780         cfg->fc_expires = rtmsg->rtmsg_info;
1781         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1782         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1783         cfg->fc_flags = rtmsg->rtmsg_flags;
1784
1785         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1786         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1787         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1788 }
1789
1790 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1791 {
1792         struct fib6_config cfg;
1793         struct in6_rtmsg rtmsg;
1794         int err;
1795
1796         switch(cmd) {
1797         case SIOCADDRT:         /* Add a route */
1798         case SIOCDELRT:         /* Delete a route */
1799                 if (!capable(CAP_NET_ADMIN))
1800                         return -EPERM;
1801                 err = copy_from_user(&rtmsg, arg,
1802                                      sizeof(struct in6_rtmsg));
1803                 if (err)
1804                         return -EFAULT;
1805
1806                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1807
1808                 rtnl_lock();
1809                 switch (cmd) {
1810                 case SIOCADDRT:
1811                         err = ip6_route_add(&cfg);
1812                         break;
1813                 case SIOCDELRT:
1814                         err = ip6_route_del(&cfg);
1815                         break;
1816                 default:
1817                         err = -EINVAL;
1818                 }
1819                 rtnl_unlock();
1820
1821                 return err;
1822         }
1823
1824         return -EINVAL;
1825 }
1826
1827 /*
1828  *      Drop the packet on the floor
1829  */
1830
1831 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1832                                int ipstats_mib_noroutes)
1833 {
1834         int type;
1835         switch (ipstats_mib_noroutes) {
1836         case IPSTATS_MIB_INNOROUTES:
1837                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1838                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1839                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1840                         break;
1841                 }
1842                 /* FALLTHROUGH */
1843         case IPSTATS_MIB_OUTNOROUTES:
1844                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1845                 break;
1846         }
1847         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1848         kfree_skb(skb);
1849         return 0;
1850 }
1851
1852 static int ip6_pkt_discard(struct sk_buff *skb)
1853 {
1854         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1855 }
1856
1857 static int ip6_pkt_discard_out(struct sk_buff *skb)
1858 {
1859         skb->dev = skb->dst->dev;
1860         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1861 }
1862
1863 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1864
1865 static int ip6_pkt_prohibit(struct sk_buff *skb)
1866 {
1867         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1868 }
1869
1870 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1871 {
1872         skb->dev = skb->dst->dev;
1873         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1874 }
1875
1876 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1877 {
1878         kfree_skb(skb);
1879         return 0;
1880 }
1881
1882 #endif
1883
1884 /*
1885  *      Allocate a dst for local (unicast / anycast) address.
1886  */
1887
1888 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1889                                     const struct in6_addr *addr,
1890                                     int anycast)
1891 {
1892         struct rt6_info *rt = ip6_dst_alloc();
1893
1894         if (rt == NULL)
1895                 return ERR_PTR(-ENOMEM);
1896
1897         dev_hold(init_net.loopback_dev);
1898         in6_dev_hold(idev);
1899
1900         rt->u.dst.flags = DST_HOST;
1901         rt->u.dst.input = ip6_input;
1902         rt->u.dst.output = ip6_output;
1903         rt->rt6i_dev = init_net.loopback_dev;
1904         rt->rt6i_idev = idev;
1905         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1906         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1907         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1908         rt->u.dst.obsolete = -1;
1909
1910         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1911         if (anycast)
1912                 rt->rt6i_flags |= RTF_ANYCAST;
1913         else
1914                 rt->rt6i_flags |= RTF_LOCAL;
1915         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1916         if (rt->rt6i_nexthop == NULL) {
1917                 dst_free(&rt->u.dst);
1918                 return ERR_PTR(-ENOMEM);
1919         }
1920
1921         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1922         rt->rt6i_dst.plen = 128;
1923         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1924
1925         atomic_set(&rt->u.dst.__refcnt, 1);
1926
1927         return rt;
1928 }
1929
1930 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1931 {
1932         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1933             rt != &ip6_null_entry) {
1934                 RT6_TRACE("deleted by ifdown %p\n", rt);
1935                 return -1;
1936         }
1937         return 0;
1938 }
1939
1940 void rt6_ifdown(struct net_device *dev)
1941 {
1942         fib6_clean_all(fib6_ifdown, 0, dev);
1943 }
1944
1945 struct rt6_mtu_change_arg
1946 {
1947         struct net_device *dev;
1948         unsigned mtu;
1949 };
1950
1951 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1952 {
1953         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1954         struct inet6_dev *idev;
1955
1956         /* In IPv6 pmtu discovery is not optional,
1957            so that RTAX_MTU lock cannot disable it.
1958            We still use this lock to block changes
1959            caused by addrconf/ndisc.
1960         */
1961
1962         idev = __in6_dev_get(arg->dev);
1963         if (idev == NULL)
1964                 return 0;
1965
1966         /* For administrative MTU increase, there is no way to discover
1967            IPv6 PMTU increase, so PMTU increase should be updated here.
1968            Since RFC 1981 doesn't include administrative MTU increase
1969            update PMTU increase is a MUST. (i.e. jumbo frame)
1970          */
1971         /*
1972            If new MTU is less than route PMTU, this new MTU will be the
1973            lowest MTU in the path, update the route PMTU to reflect PMTU
1974            decreases; if new MTU is greater than route PMTU, and the
1975            old MTU is the lowest MTU in the path, update the route PMTU
1976            to reflect the increase. In this case if the other nodes' MTU
1977            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1978            PMTU discouvery.
1979          */
1980         if (rt->rt6i_dev == arg->dev &&
1981             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1982             (dst_mtu(&rt->u.dst) > arg->mtu ||
1983              (dst_mtu(&rt->u.dst) < arg->mtu &&
1984               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1985                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1986                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1987         }
1988         return 0;
1989 }
1990
1991 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1992 {
1993         struct rt6_mtu_change_arg arg = {
1994                 .dev = dev,
1995                 .mtu = mtu,
1996         };
1997
1998         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1999 }
2000
2001 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2002         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2003         [RTA_OIF]               = { .type = NLA_U32 },
2004         [RTA_IIF]               = { .type = NLA_U32 },
2005         [RTA_PRIORITY]          = { .type = NLA_U32 },
2006         [RTA_METRICS]           = { .type = NLA_NESTED },
2007 };
2008
2009 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2010                               struct fib6_config *cfg)
2011 {
2012         struct rtmsg *rtm;
2013         struct nlattr *tb[RTA_MAX+1];
2014         int err;
2015
2016         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2017         if (err < 0)
2018                 goto errout;
2019
2020         err = -EINVAL;
2021         rtm = nlmsg_data(nlh);
2022         memset(cfg, 0, sizeof(*cfg));
2023
2024         cfg->fc_table = rtm->rtm_table;
2025         cfg->fc_dst_len = rtm->rtm_dst_len;
2026         cfg->fc_src_len = rtm->rtm_src_len;
2027         cfg->fc_flags = RTF_UP;
2028         cfg->fc_protocol = rtm->rtm_protocol;
2029
2030         if (rtm->rtm_type == RTN_UNREACHABLE)
2031                 cfg->fc_flags |= RTF_REJECT;
2032
2033         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2034         cfg->fc_nlinfo.nlh = nlh;
2035
2036         if (tb[RTA_GATEWAY]) {
2037                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2038                 cfg->fc_flags |= RTF_GATEWAY;
2039         }
2040
2041         if (tb[RTA_DST]) {
2042                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2043
2044                 if (nla_len(tb[RTA_DST]) < plen)
2045                         goto errout;
2046
2047                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2048         }
2049
2050         if (tb[RTA_SRC]) {
2051                 int plen = (rtm->rtm_src_len + 7) >> 3;
2052
2053                 if (nla_len(tb[RTA_SRC]) < plen)
2054                         goto errout;
2055
2056                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2057         }
2058
2059         if (tb[RTA_OIF])
2060                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2061
2062         if (tb[RTA_PRIORITY])
2063                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2064
2065         if (tb[RTA_METRICS]) {
2066                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2067                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2068         }
2069
2070         if (tb[RTA_TABLE])
2071                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2072
2073         err = 0;
2074 errout:
2075         return err;
2076 }
2077
2078 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2079 {
2080         struct fib6_config cfg;
2081         int err;
2082
2083         err = rtm_to_fib6_config(skb, nlh, &cfg);
2084         if (err < 0)
2085                 return err;
2086
2087         return ip6_route_del(&cfg);
2088 }
2089
2090 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2091 {
2092         struct fib6_config cfg;
2093         int err;
2094
2095         err = rtm_to_fib6_config(skb, nlh, &cfg);
2096         if (err < 0)
2097                 return err;
2098
2099         return ip6_route_add(&cfg);
2100 }
2101
2102 static inline size_t rt6_nlmsg_size(void)
2103 {
2104         return NLMSG_ALIGN(sizeof(struct rtmsg))
2105                + nla_total_size(16) /* RTA_SRC */
2106                + nla_total_size(16) /* RTA_DST */
2107                + nla_total_size(16) /* RTA_GATEWAY */
2108                + nla_total_size(16) /* RTA_PREFSRC */
2109                + nla_total_size(4) /* RTA_TABLE */
2110                + nla_total_size(4) /* RTA_IIF */
2111                + nla_total_size(4) /* RTA_OIF */
2112                + nla_total_size(4) /* RTA_PRIORITY */
2113                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2114                + nla_total_size(sizeof(struct rta_cacheinfo));
2115 }
2116
2117 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2118                          struct in6_addr *dst, struct in6_addr *src,
2119                          int iif, int type, u32 pid, u32 seq,
2120                          int prefix, unsigned int flags)
2121 {
2122         struct rtmsg *rtm;
2123         struct nlmsghdr *nlh;
2124         long expires;
2125         u32 table;
2126
2127         if (prefix) {   /* user wants prefix routes only */
2128                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2129                         /* success since this is not a prefix route */
2130                         return 1;
2131                 }
2132         }
2133
2134         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2135         if (nlh == NULL)
2136                 return -EMSGSIZE;
2137
2138         rtm = nlmsg_data(nlh);
2139         rtm->rtm_family = AF_INET6;
2140         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2141         rtm->rtm_src_len = rt->rt6i_src.plen;
2142         rtm->rtm_tos = 0;
2143         if (rt->rt6i_table)
2144                 table = rt->rt6i_table->tb6_id;
2145         else
2146                 table = RT6_TABLE_UNSPEC;
2147         rtm->rtm_table = table;
2148         NLA_PUT_U32(skb, RTA_TABLE, table);
2149         if (rt->rt6i_flags&RTF_REJECT)
2150                 rtm->rtm_type = RTN_UNREACHABLE;
2151         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2152                 rtm->rtm_type = RTN_LOCAL;
2153         else
2154                 rtm->rtm_type = RTN_UNICAST;
2155         rtm->rtm_flags = 0;
2156         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2157         rtm->rtm_protocol = rt->rt6i_protocol;
2158         if (rt->rt6i_flags&RTF_DYNAMIC)
2159                 rtm->rtm_protocol = RTPROT_REDIRECT;
2160         else if (rt->rt6i_flags & RTF_ADDRCONF)
2161                 rtm->rtm_protocol = RTPROT_KERNEL;
2162         else if (rt->rt6i_flags&RTF_DEFAULT)
2163                 rtm->rtm_protocol = RTPROT_RA;
2164
2165         if (rt->rt6i_flags&RTF_CACHE)
2166                 rtm->rtm_flags |= RTM_F_CLONED;
2167
2168         if (dst) {
2169                 NLA_PUT(skb, RTA_DST, 16, dst);
2170                 rtm->rtm_dst_len = 128;
2171         } else if (rtm->rtm_dst_len)
2172                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2173 #ifdef CONFIG_IPV6_SUBTREES
2174         if (src) {
2175                 NLA_PUT(skb, RTA_SRC, 16, src);
2176                 rtm->rtm_src_len = 128;
2177         } else if (rtm->rtm_src_len)
2178                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2179 #endif
2180         if (iif)
2181                 NLA_PUT_U32(skb, RTA_IIF, iif);
2182         else if (dst) {
2183                 struct in6_addr saddr_buf;
2184                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2185                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2186         }
2187
2188         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2189                 goto nla_put_failure;
2190
2191         if (rt->u.dst.neighbour)
2192                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2193
2194         if (rt->u.dst.dev)
2195                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2196
2197         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2198
2199         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2200         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2201                                expires, rt->u.dst.error) < 0)
2202                 goto nla_put_failure;
2203
2204         return nlmsg_end(skb, nlh);
2205
2206 nla_put_failure:
2207         nlmsg_cancel(skb, nlh);
2208         return -EMSGSIZE;
2209 }
2210
2211 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2212 {
2213         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2214         int prefix;
2215
2216         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2217                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2218                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2219         } else
2220                 prefix = 0;
2221
2222         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2223                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2224                      prefix, NLM_F_MULTI);
2225 }
2226
2227 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2228 {
2229         struct nlattr *tb[RTA_MAX+1];
2230         struct rt6_info *rt;
2231         struct sk_buff *skb;
2232         struct rtmsg *rtm;
2233         struct flowi fl;
2234         int err, iif = 0;
2235
2236         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2237         if (err < 0)
2238                 goto errout;
2239
2240         err = -EINVAL;
2241         memset(&fl, 0, sizeof(fl));
2242
2243         if (tb[RTA_SRC]) {
2244                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2245                         goto errout;
2246
2247                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2248         }
2249
2250         if (tb[RTA_DST]) {
2251                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2252                         goto errout;
2253
2254                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2255         }
2256
2257         if (tb[RTA_IIF])
2258                 iif = nla_get_u32(tb[RTA_IIF]);
2259
2260         if (tb[RTA_OIF])
2261                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2262
2263         if (iif) {
2264                 struct net_device *dev;
2265                 dev = __dev_get_by_index(&init_net, iif);
2266                 if (!dev) {
2267                         err = -ENODEV;
2268                         goto errout;
2269                 }
2270         }
2271
2272         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2273         if (skb == NULL) {
2274                 err = -ENOBUFS;
2275                 goto errout;
2276         }
2277
2278         /* Reserve room for dummy headers, this skb can pass
2279            through good chunk of routing engine.
2280          */
2281         skb_reset_mac_header(skb);
2282         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2283
2284         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2285         skb->dst = &rt->u.dst;
2286
2287         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2288                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2289                             nlh->nlmsg_seq, 0, 0);
2290         if (err < 0) {
2291                 kfree_skb(skb);
2292                 goto errout;
2293         }
2294
2295         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2296 errout:
2297         return err;
2298 }
2299
2300 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2301 {
2302         struct sk_buff *skb;
2303         u32 pid = 0, seq = 0;
2304         struct nlmsghdr *nlh = NULL;
2305         int err = -ENOBUFS;
2306
2307         if (info) {
2308                 pid = info->pid;
2309                 nlh = info->nlh;
2310                 if (nlh)
2311                         seq = nlh->nlmsg_seq;
2312         }
2313
2314         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2315         if (skb == NULL)
2316                 goto errout;
2317
2318         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2319         if (err < 0) {
2320                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2321                 WARN_ON(err == -EMSGSIZE);
2322                 kfree_skb(skb);
2323                 goto errout;
2324         }
2325         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2326 errout:
2327         if (err < 0)
2328                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2329 }
2330
2331 /*
2332  *      /proc
2333  */
2334
2335 #ifdef CONFIG_PROC_FS
2336
2337 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2338
2339 struct rt6_proc_arg
2340 {
2341         char *buffer;
2342         int offset;
2343         int length;
2344         int skip;
2345         int len;
2346 };
2347
2348 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2349 {
2350         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2351
2352         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2353                 arg->skip++;
2354                 return 0;
2355         }
2356
2357         if (arg->len >= arg->length)
2358                 return 0;
2359
2360         arg->len += sprintf(arg->buffer + arg->len,
2361                             NIP6_SEQFMT " %02x ",
2362                             NIP6(rt->rt6i_dst.addr),
2363                             rt->rt6i_dst.plen);
2364
2365 #ifdef CONFIG_IPV6_SUBTREES
2366         arg->len += sprintf(arg->buffer + arg->len,
2367                             NIP6_SEQFMT " %02x ",
2368                             NIP6(rt->rt6i_src.addr),
2369                             rt->rt6i_src.plen);
2370 #else
2371         arg->len += sprintf(arg->buffer + arg->len,
2372                             "00000000000000000000000000000000 00 ");
2373 #endif
2374
2375         if (rt->rt6i_nexthop) {
2376                 arg->len += sprintf(arg->buffer + arg->len,
2377                                     NIP6_SEQFMT,
2378                                     NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2379         } else {
2380                 arg->len += sprintf(arg->buffer + arg->len,
2381                                     "00000000000000000000000000000000");
2382         }
2383         arg->len += sprintf(arg->buffer + arg->len,
2384                             " %08x %08x %08x %08x %8s\n",
2385                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2386                             rt->u.dst.__use, rt->rt6i_flags,
2387                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2388         return 0;
2389 }
2390
2391 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2392 {
2393         struct rt6_proc_arg arg = {
2394                 .buffer = buffer,
2395                 .offset = offset,
2396                 .length = length,
2397         };
2398
2399         fib6_clean_all(rt6_info_route, 0, &arg);
2400
2401         *start = buffer;
2402         if (offset)
2403                 *start += offset % RT6_INFO_LEN;
2404
2405         arg.len -= offset % RT6_INFO_LEN;
2406
2407         if (arg.len > length)
2408                 arg.len = length;
2409         if (arg.len < 0)
2410                 arg.len = 0;
2411
2412         return arg.len;
2413 }
2414
2415 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2416 {
2417         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2418                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2419                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2420                       rt6_stats.fib_rt_cache,
2421                       atomic_read(&ip6_dst_ops.entries),
2422                       rt6_stats.fib_discarded_routes);
2423
2424         return 0;
2425 }
2426
2427 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2428 {
2429         return single_open(file, rt6_stats_seq_show, NULL);
2430 }
2431
2432 static const struct file_operations rt6_stats_seq_fops = {
2433         .owner   = THIS_MODULE,
2434         .open    = rt6_stats_seq_open,
2435         .read    = seq_read,
2436         .llseek  = seq_lseek,
2437         .release = single_release,
2438 };
2439 #endif  /* CONFIG_PROC_FS */
2440
2441 #ifdef CONFIG_SYSCTL
2442
2443 static int flush_delay;
2444
2445 static
2446 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2447                               void __user *buffer, size_t *lenp, loff_t *ppos)
2448 {
2449         if (write) {
2450                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2451                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2452                 return 0;
2453         } else
2454                 return -EINVAL;
2455 }
2456
2457 ctl_table ipv6_route_table[] = {
2458         {
2459                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH,
2460                 .procname       =       "flush",
2461                 .data           =       &flush_delay,
2462                 .maxlen         =       sizeof(int),
2463                 .mode           =       0200,
2464                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2465         },
2466         {
2467                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2468                 .procname       =       "gc_thresh",
2469                 .data           =       &ip6_dst_ops.gc_thresh,
2470                 .maxlen         =       sizeof(int),
2471                 .mode           =       0644,
2472                 .proc_handler   =       &proc_dointvec,
2473         },
2474         {
2475                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2476                 .procname       =       "max_size",
2477                 .data           =       &ip6_rt_max_size,
2478                 .maxlen         =       sizeof(int),
2479                 .mode           =       0644,
2480                 .proc_handler   =       &proc_dointvec,
2481         },
2482         {
2483                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2484                 .procname       =       "gc_min_interval",
2485                 .data           =       &ip6_rt_gc_min_interval,
2486                 .maxlen         =       sizeof(int),
2487                 .mode           =       0644,
2488                 .proc_handler   =       &proc_dointvec_jiffies,
2489                 .strategy       =       &sysctl_jiffies,
2490         },
2491         {
2492                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2493                 .procname       =       "gc_timeout",
2494                 .data           =       &ip6_rt_gc_timeout,
2495                 .maxlen         =       sizeof(int),
2496                 .mode           =       0644,
2497                 .proc_handler   =       &proc_dointvec_jiffies,
2498                 .strategy       =       &sysctl_jiffies,
2499         },
2500         {
2501                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2502                 .procname       =       "gc_interval",
2503                 .data           =       &ip6_rt_gc_interval,
2504                 .maxlen         =       sizeof(int),
2505                 .mode           =       0644,
2506                 .proc_handler   =       &proc_dointvec_jiffies,
2507                 .strategy       =       &sysctl_jiffies,
2508         },
2509         {
2510                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2511                 .procname       =       "gc_elasticity",
2512                 .data           =       &ip6_rt_gc_elasticity,
2513                 .maxlen         =       sizeof(int),
2514                 .mode           =       0644,
2515                 .proc_handler   =       &proc_dointvec_jiffies,
2516                 .strategy       =       &sysctl_jiffies,
2517         },
2518         {
2519                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2520                 .procname       =       "mtu_expires",
2521                 .data           =       &ip6_rt_mtu_expires,
2522                 .maxlen         =       sizeof(int),
2523                 .mode           =       0644,
2524                 .proc_handler   =       &proc_dointvec_jiffies,
2525                 .strategy       =       &sysctl_jiffies,
2526         },
2527         {
2528                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2529                 .procname       =       "min_adv_mss",
2530                 .data           =       &ip6_rt_min_advmss,
2531                 .maxlen         =       sizeof(int),
2532                 .mode           =       0644,
2533                 .proc_handler   =       &proc_dointvec_jiffies,
2534                 .strategy       =       &sysctl_jiffies,
2535         },
2536         {
2537                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2538                 .procname       =       "gc_min_interval_ms",
2539                 .data           =       &ip6_rt_gc_min_interval,
2540                 .maxlen         =       sizeof(int),
2541                 .mode           =       0644,
2542                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2543                 .strategy       =       &sysctl_ms_jiffies,
2544         },
2545         { .ctl_name = 0 }
2546 };
2547
2548 #endif
2549
2550 void __init ip6_route_init(void)
2551 {
2552 #ifdef  CONFIG_PROC_FS
2553         struct proc_dir_entry *p;
2554 #endif
2555         ip6_dst_ops.kmem_cachep =
2556                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2557                                   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2558         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2559
2560         fib6_init();
2561 #ifdef  CONFIG_PROC_FS
2562         p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
2563         if (p)
2564                 p->owner = THIS_MODULE;
2565
2566         proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2567 #endif
2568 #ifdef CONFIG_XFRM
2569         xfrm6_init();
2570 #endif
2571 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2572         fib6_rules_init();
2573 #endif
2574
2575         __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2576         __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2577         __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2578 }
2579
2580 void ip6_route_cleanup(void)
2581 {
2582 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2583         fib6_rules_cleanup();
2584 #endif
2585 #ifdef CONFIG_PROC_FS
2586         proc_net_remove(&init_net, "ipv6_route");
2587         proc_net_remove(&init_net, "rt6_stats");
2588 #endif
2589 #ifdef CONFIG_XFRM
2590         xfrm6_fini();
2591 #endif
2592         rt6_ifdown(NULL);
2593         fib6_gc_cleanup();
2594         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2595 }