]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
[PATCH] capable/capability.h (net/)
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void             ip6_dst_destroy(struct dst_entry *);
88 static void             ip6_dst_ifdown(struct dst_entry *,
89                                        struct net_device *dev, int how);
90 static int               ip6_dst_gc(void);
91
92 static int              ip6_pkt_discard(struct sk_buff *skb);
93 static int              ip6_pkt_discard_out(struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97 static struct dst_ops ip6_dst_ops = {
98         .family                 =       AF_INET6,
99         .protocol               =       __constant_htons(ETH_P_IPV6),
100         .gc                     =       ip6_dst_gc,
101         .gc_thresh              =       1024,
102         .check                  =       ip6_dst_check,
103         .destroy                =       ip6_dst_destroy,
104         .ifdown                 =       ip6_dst_ifdown,
105         .negative_advice        =       ip6_negative_advice,
106         .link_failure           =       ip6_link_failure,
107         .update_pmtu            =       ip6_rt_update_pmtu,
108         .entry_size             =       sizeof(struct rt6_info),
109 };
110
111 struct rt6_info ip6_null_entry = {
112         .u = {
113                 .dst = {
114                         .__refcnt       = ATOMIC_INIT(1),
115                         .__use          = 1,
116                         .dev            = &loopback_dev,
117                         .obsolete       = -1,
118                         .error          = -ENETUNREACH,
119                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
120                         .input          = ip6_pkt_discard,
121                         .output         = ip6_pkt_discard_out,
122                         .ops            = &ip6_dst_ops,
123                         .path           = (struct dst_entry*)&ip6_null_entry,
124                 }
125         },
126         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
127         .rt6i_metric    = ~(u32) 0,
128         .rt6i_ref       = ATOMIC_INIT(1),
129 };
130
131 struct fib6_node ip6_routing_table = {
132         .leaf           = &ip6_null_entry,
133         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 };
135
136 /* Protects all the ip6 fib */
137
138 DEFINE_RWLOCK(rt6_lock);
139
140
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
143 {
144         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 }
146
147 static void ip6_dst_destroy(struct dst_entry *dst)
148 {
149         struct rt6_info *rt = (struct rt6_info *)dst;
150         struct inet6_dev *idev = rt->rt6i_idev;
151
152         if (idev != NULL) {
153                 rt->rt6i_idev = NULL;
154                 in6_dev_put(idev);
155         }       
156 }
157
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159                            int how)
160 {
161         struct rt6_info *rt = (struct rt6_info *)dst;
162         struct inet6_dev *idev = rt->rt6i_idev;
163
164         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166                 if (loopback_idev != NULL) {
167                         rt->rt6i_idev = loopback_idev;
168                         in6_dev_put(idev);
169                 }
170         }
171 }
172
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174 {
175         return (rt->rt6i_flags & RTF_EXPIRES &&
176                 time_after(jiffies, rt->rt6i_expires));
177 }
178
179 /*
180  *      Route lookup. Any rt6_lock is implied.
181  */
182
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184                                                     int oif,
185                                                     int strict)
186 {
187         struct rt6_info *local = NULL;
188         struct rt6_info *sprt;
189
190         if (oif) {
191                 for (sprt = rt; sprt; sprt = sprt->u.next) {
192                         struct net_device *dev = sprt->rt6i_dev;
193                         if (dev->ifindex == oif)
194                                 return sprt;
195                         if (dev->flags & IFF_LOOPBACK) {
196                                 if (sprt->rt6i_idev == NULL ||
197                                     sprt->rt6i_idev->dev->ifindex != oif) {
198                                         if (strict && oif)
199                                                 continue;
200                                         if (local && (!oif || 
201                                                       local->rt6i_idev->dev->ifindex == oif))
202                                                 continue;
203                                 }
204                                 local = sprt;
205                         }
206                 }
207
208                 if (local)
209                         return local;
210
211                 if (strict)
212                         return &ip6_null_entry;
213         }
214         return rt;
215 }
216
217 /*
218  *      pointer to the last default router chosen. BH is disabled locally.
219  */
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
224 {
225         spin_lock_bh(&rt6_dflt_lock);
226         if (rt == NULL || rt == rt6_dflt_pointer) {
227                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228                 rt6_dflt_pointer = NULL;
229         }
230         spin_unlock_bh(&rt6_dflt_lock);
231 }
232
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235 {
236         struct rt6_info *match = NULL;
237         struct rt6_info *sprt;
238         int mpri = 0;
239
240         for (sprt = rt; sprt; sprt = sprt->u.next) {
241                 struct neighbour *neigh;
242                 int m = 0;
243
244                 if (!oif ||
245                     (sprt->rt6i_dev &&
246                      sprt->rt6i_dev->ifindex == oif))
247                         m += 8;
248
249                 if (rt6_check_expired(sprt))
250                         continue;
251
252                 if (sprt == rt6_dflt_pointer)
253                         m += 4;
254
255                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256                         read_lock_bh(&neigh->lock);
257                         switch (neigh->nud_state) {
258                         case NUD_REACHABLE:
259                                 m += 3;
260                                 break;
261
262                         case NUD_STALE:
263                         case NUD_DELAY:
264                         case NUD_PROBE:
265                                 m += 2;
266                                 break;
267
268                         case NUD_NOARP:
269                         case NUD_PERMANENT:
270                                 m += 1;
271                                 break;
272
273                         case NUD_INCOMPLETE:
274                         default:
275                                 read_unlock_bh(&neigh->lock);
276                                 continue;
277                         }
278                         read_unlock_bh(&neigh->lock);
279                 } else {
280                         continue;
281                 }
282
283                 if (m > mpri || m >= 12) {
284                         match = sprt;
285                         mpri = m;
286                         if (m >= 12) {
287                                 /* we choose the last default router if it
288                                  * is in (probably) reachable state.
289                                  * If route changed, we should do pmtu
290                                  * discovery. --yoshfuji
291                                  */
292                                 break;
293                         }
294                 }
295         }
296
297         spin_lock(&rt6_dflt_lock);
298         if (!match) {
299                 /*
300                  *      No default routers are known to be reachable.
301                  *      SHOULD round robin
302                  */
303                 if (rt6_dflt_pointer) {
304                         for (sprt = rt6_dflt_pointer->u.next;
305                              sprt; sprt = sprt->u.next) {
306                                 if (sprt->u.dst.obsolete <= 0 &&
307                                     sprt->u.dst.error == 0 &&
308                                     !rt6_check_expired(sprt)) {
309                                         match = sprt;
310                                         break;
311                                 }
312                         }
313                         for (sprt = rt;
314                              !match && sprt;
315                              sprt = sprt->u.next) {
316                                 if (sprt->u.dst.obsolete <= 0 &&
317                                     sprt->u.dst.error == 0 &&
318                                     !rt6_check_expired(sprt)) {
319                                         match = sprt;
320                                         break;
321                                 }
322                                 if (sprt == rt6_dflt_pointer)
323                                         break;
324                         }
325                 }
326         }
327
328         if (match) {
329                 if (rt6_dflt_pointer != match)
330                         RT6_TRACE("changed default router: %p->%p\n",
331                                   rt6_dflt_pointer, match);
332                 rt6_dflt_pointer = match;
333         }
334         spin_unlock(&rt6_dflt_lock);
335
336         if (!match) {
337                 /*
338                  * Last Resort: if no default routers found, 
339                  * use addrconf default route.
340                  * We don't record this route.
341                  */
342                 for (sprt = ip6_routing_table.leaf;
343                      sprt; sprt = sprt->u.next) {
344                         if (!rt6_check_expired(sprt) &&
345                             (sprt->rt6i_flags & RTF_DEFAULT) &&
346                             (!oif ||
347                              (sprt->rt6i_dev &&
348                               sprt->rt6i_dev->ifindex == oif))) {
349                                 match = sprt;
350                                 break;
351                         }
352                 }
353                 if (!match) {
354                         /* no default route.  give up. */
355                         match = &ip6_null_entry;
356                 }
357         }
358
359         return match;
360 }
361
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363                             int oif, int strict)
364 {
365         struct fib6_node *fn;
366         struct rt6_info *rt;
367
368         read_lock_bh(&rt6_lock);
369         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370         rt = rt6_device_match(fn->leaf, oif, strict);
371         dst_hold(&rt->u.dst);
372         rt->u.dst.__use++;
373         read_unlock_bh(&rt6_lock);
374
375         rt->u.dst.lastuse = jiffies;
376         if (rt->u.dst.error == 0)
377                 return rt;
378         dst_release(&rt->u.dst);
379         return NULL;
380 }
381
382 /* ip6_ins_rt is called with FREE rt6_lock.
383    It takes new route entry, the addition fails by any reason the
384    route is freed. In any case, if caller does not hold it, it may
385    be destroyed.
386  */
387
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389                 void *_rtattr, struct netlink_skb_parms *req)
390 {
391         int err;
392
393         write_lock_bh(&rt6_lock);
394         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395         write_unlock_bh(&rt6_lock);
396
397         return err;
398 }
399
400 /* No rt6_lock! If COW failed, the function returns dead route entry
401    with dst->error set to errno value.
402  */
403
404 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
405                                 struct in6_addr *saddr, struct netlink_skb_parms *req)
406 {
407         int err;
408         struct rt6_info *rt;
409
410         /*
411          *      Clone the route.
412          */
413
414         rt = ip6_rt_copy(ort);
415
416         if (rt) {
417                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
418                         if (rt->rt6i_dst.plen != 128 &&
419                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
420                                 rt->rt6i_flags |= RTF_ANYCAST;
421                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
422                 }
423
424                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
425                 rt->rt6i_dst.plen = 128;
426                 rt->rt6i_flags |= RTF_CACHE;
427                 rt->u.dst.flags |= DST_HOST;
428
429 #ifdef CONFIG_IPV6_SUBTREES
430                 if (rt->rt6i_src.plen && saddr) {
431                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
432                         rt->rt6i_src.plen = 128;
433                 }
434 #endif
435
436                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
437
438                 dst_hold(&rt->u.dst);
439
440                 err = ip6_ins_rt(rt, NULL, NULL, req);
441                 if (err == 0)
442                         return rt;
443
444                 rt->u.dst.error = err;
445
446                 return rt;
447         }
448         dst_hold(&ip6_null_entry.u.dst);
449         return &ip6_null_entry;
450 }
451
452 #define BACKTRACK() \
453 if (rt == &ip6_null_entry && strict) { \
454        while ((fn = fn->parent) != NULL) { \
455                 if (fn->fn_flags & RTN_ROOT) { \
456                         dst_hold(&rt->u.dst); \
457                         goto out; \
458                 } \
459                 if (fn->fn_flags & RTN_RTINFO) \
460                         goto restart; \
461         } \
462 }
463
464
465 void ip6_route_input(struct sk_buff *skb)
466 {
467         struct fib6_node *fn;
468         struct rt6_info *rt;
469         int strict;
470         int attempts = 3;
471
472         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
473
474 relookup:
475         read_lock_bh(&rt6_lock);
476
477         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
478                          &skb->nh.ipv6h->saddr);
479
480 restart:
481         rt = fn->leaf;
482
483         if ((rt->rt6i_flags & RTF_CACHE)) {
484                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
485                 BACKTRACK();
486                 dst_hold(&rt->u.dst);
487                 goto out;
488         }
489
490         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
491         BACKTRACK();
492
493         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
494                 struct rt6_info *nrt;
495                 dst_hold(&rt->u.dst);
496                 read_unlock_bh(&rt6_lock);
497
498                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
499                               &skb->nh.ipv6h->saddr,
500                               &NETLINK_CB(skb));
501
502                 dst_release(&rt->u.dst);
503                 rt = nrt;
504
505                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
506                         goto out2;
507
508                 /* Race condition! In the gap, when rt6_lock was
509                    released someone could insert this route.  Relookup.
510                 */
511                 dst_release(&rt->u.dst);
512                 goto relookup;
513         }
514         dst_hold(&rt->u.dst);
515
516 out:
517         read_unlock_bh(&rt6_lock);
518 out2:
519         rt->u.dst.lastuse = jiffies;
520         rt->u.dst.__use++;
521         skb->dst = (struct dst_entry *) rt;
522 }
523
524 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
525 {
526         struct fib6_node *fn;
527         struct rt6_info *rt;
528         int strict;
529         int attempts = 3;
530
531         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
532
533 relookup:
534         read_lock_bh(&rt6_lock);
535
536         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
537
538 restart:
539         rt = fn->leaf;
540
541         if ((rt->rt6i_flags & RTF_CACHE)) {
542                 rt = rt6_device_match(rt, fl->oif, strict);
543                 BACKTRACK();
544                 dst_hold(&rt->u.dst);
545                 goto out;
546         }
547         if (rt->rt6i_flags & RTF_DEFAULT) {
548                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
549                         rt = rt6_best_dflt(rt, fl->oif);
550         } else {
551                 rt = rt6_device_match(rt, fl->oif, strict);
552                 BACKTRACK();
553         }
554
555         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
556                 struct rt6_info *nrt;
557                 dst_hold(&rt->u.dst);
558                 read_unlock_bh(&rt6_lock);
559
560                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
561
562                 dst_release(&rt->u.dst);
563                 rt = nrt;
564
565                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
566                         goto out2;
567
568                 /* Race condition! In the gap, when rt6_lock was
569                    released someone could insert this route.  Relookup.
570                 */
571                 dst_release(&rt->u.dst);
572                 goto relookup;
573         }
574         dst_hold(&rt->u.dst);
575
576 out:
577         read_unlock_bh(&rt6_lock);
578 out2:
579         rt->u.dst.lastuse = jiffies;
580         rt->u.dst.__use++;
581         return &rt->u.dst;
582 }
583
584
585 /*
586  *      Destination cache support functions
587  */
588
589 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
590 {
591         struct rt6_info *rt;
592
593         rt = (struct rt6_info *) dst;
594
595         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
596                 return dst;
597
598         return NULL;
599 }
600
601 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
602 {
603         struct rt6_info *rt = (struct rt6_info *) dst;
604
605         if (rt) {
606                 if (rt->rt6i_flags & RTF_CACHE)
607                         ip6_del_rt(rt, NULL, NULL, NULL);
608                 else
609                         dst_release(dst);
610         }
611         return NULL;
612 }
613
614 static void ip6_link_failure(struct sk_buff *skb)
615 {
616         struct rt6_info *rt;
617
618         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
619
620         rt = (struct rt6_info *) skb->dst;
621         if (rt) {
622                 if (rt->rt6i_flags&RTF_CACHE) {
623                         dst_set_expires(&rt->u.dst, 0);
624                         rt->rt6i_flags |= RTF_EXPIRES;
625                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
626                         rt->rt6i_node->fn_sernum = -1;
627         }
628 }
629
630 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
631 {
632         struct rt6_info *rt6 = (struct rt6_info*)dst;
633
634         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
635                 rt6->rt6i_flags |= RTF_MODIFIED;
636                 if (mtu < IPV6_MIN_MTU) {
637                         mtu = IPV6_MIN_MTU;
638                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
639                 }
640                 dst->metrics[RTAX_MTU-1] = mtu;
641         }
642 }
643
644 /* Protected by rt6_lock.  */
645 static struct dst_entry *ndisc_dst_gc_list;
646 static int ipv6_get_mtu(struct net_device *dev);
647
648 static inline unsigned int ipv6_advmss(unsigned int mtu)
649 {
650         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
651
652         if (mtu < ip6_rt_min_advmss)
653                 mtu = ip6_rt_min_advmss;
654
655         /*
656          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
657          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
658          * IPV6_MAXPLEN is also valid and means: "any MSS, 
659          * rely only on pmtu discovery"
660          */
661         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
662                 mtu = IPV6_MAXPLEN;
663         return mtu;
664 }
665
666 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
667                                   struct neighbour *neigh,
668                                   struct in6_addr *addr,
669                                   int (*output)(struct sk_buff *))
670 {
671         struct rt6_info *rt;
672         struct inet6_dev *idev = in6_dev_get(dev);
673
674         if (unlikely(idev == NULL))
675                 return NULL;
676
677         rt = ip6_dst_alloc();
678         if (unlikely(rt == NULL)) {
679                 in6_dev_put(idev);
680                 goto out;
681         }
682
683         dev_hold(dev);
684         if (neigh)
685                 neigh_hold(neigh);
686         else
687                 neigh = ndisc_get_neigh(dev, addr);
688
689         rt->rt6i_dev      = dev;
690         rt->rt6i_idev     = idev;
691         rt->rt6i_nexthop  = neigh;
692         atomic_set(&rt->u.dst.__refcnt, 1);
693         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
694         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
695         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
696         rt->u.dst.output  = output;
697
698 #if 0   /* there's no chance to use these for ndisc */
699         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
700                                 ? DST_HOST 
701                                 : 0;
702         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
703         rt->rt6i_dst.plen = 128;
704 #endif
705
706         write_lock_bh(&rt6_lock);
707         rt->u.dst.next = ndisc_dst_gc_list;
708         ndisc_dst_gc_list = &rt->u.dst;
709         write_unlock_bh(&rt6_lock);
710
711         fib6_force_start_gc();
712
713 out:
714         return (struct dst_entry *)rt;
715 }
716
717 int ndisc_dst_gc(int *more)
718 {
719         struct dst_entry *dst, *next, **pprev;
720         int freed;
721
722         next = NULL;
723         pprev = &ndisc_dst_gc_list;
724         freed = 0;
725         while ((dst = *pprev) != NULL) {
726                 if (!atomic_read(&dst->__refcnt)) {
727                         *pprev = dst->next;
728                         dst_free(dst);
729                         freed++;
730                 } else {
731                         pprev = &dst->next;
732                         (*more)++;
733                 }
734         }
735
736         return freed;
737 }
738
739 static int ip6_dst_gc(void)
740 {
741         static unsigned expire = 30*HZ;
742         static unsigned long last_gc;
743         unsigned long now = jiffies;
744
745         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
746             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
747                 goto out;
748
749         expire++;
750         fib6_run_gc(expire);
751         last_gc = now;
752         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
753                 expire = ip6_rt_gc_timeout>>1;
754
755 out:
756         expire -= expire>>ip6_rt_gc_elasticity;
757         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
758 }
759
760 /* Clean host part of a prefix. Not necessary in radix tree,
761    but results in cleaner routing tables.
762
763    Remove it only when all the things will work!
764  */
765
766 static int ipv6_get_mtu(struct net_device *dev)
767 {
768         int mtu = IPV6_MIN_MTU;
769         struct inet6_dev *idev;
770
771         idev = in6_dev_get(dev);
772         if (idev) {
773                 mtu = idev->cnf.mtu6;
774                 in6_dev_put(idev);
775         }
776         return mtu;
777 }
778
779 int ipv6_get_hoplimit(struct net_device *dev)
780 {
781         int hoplimit = ipv6_devconf.hop_limit;
782         struct inet6_dev *idev;
783
784         idev = in6_dev_get(dev);
785         if (idev) {
786                 hoplimit = idev->cnf.hop_limit;
787                 in6_dev_put(idev);
788         }
789         return hoplimit;
790 }
791
792 /*
793  *
794  */
795
796 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
797                 void *_rtattr, struct netlink_skb_parms *req)
798 {
799         int err;
800         struct rtmsg *r;
801         struct rtattr **rta;
802         struct rt6_info *rt = NULL;
803         struct net_device *dev = NULL;
804         struct inet6_dev *idev = NULL;
805         int addr_type;
806
807         rta = (struct rtattr **) _rtattr;
808
809         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
810                 return -EINVAL;
811 #ifndef CONFIG_IPV6_SUBTREES
812         if (rtmsg->rtmsg_src_len)
813                 return -EINVAL;
814 #endif
815         if (rtmsg->rtmsg_ifindex) {
816                 err = -ENODEV;
817                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
818                 if (!dev)
819                         goto out;
820                 idev = in6_dev_get(dev);
821                 if (!idev)
822                         goto out;
823         }
824
825         if (rtmsg->rtmsg_metric == 0)
826                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
827
828         rt = ip6_dst_alloc();
829
830         if (rt == NULL) {
831                 err = -ENOMEM;
832                 goto out;
833         }
834
835         rt->u.dst.obsolete = -1;
836         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
837         if (nlh && (r = NLMSG_DATA(nlh))) {
838                 rt->rt6i_protocol = r->rtm_protocol;
839         } else {
840                 rt->rt6i_protocol = RTPROT_BOOT;
841         }
842
843         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
844
845         if (addr_type & IPV6_ADDR_MULTICAST)
846                 rt->u.dst.input = ip6_mc_input;
847         else
848                 rt->u.dst.input = ip6_forward;
849
850         rt->u.dst.output = ip6_output;
851
852         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
853                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
854         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
855         if (rt->rt6i_dst.plen == 128)
856                rt->u.dst.flags = DST_HOST;
857
858 #ifdef CONFIG_IPV6_SUBTREES
859         ipv6_addr_prefix(&rt->rt6i_src.addr, 
860                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
861         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
862 #endif
863
864         rt->rt6i_metric = rtmsg->rtmsg_metric;
865
866         /* We cannot add true routes via loopback here,
867            they would result in kernel looping; promote them to reject routes
868          */
869         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
870             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
871                 /* hold loopback dev/idev if we haven't done so. */
872                 if (dev != &loopback_dev) {
873                         if (dev) {
874                                 dev_put(dev);
875                                 in6_dev_put(idev);
876                         }
877                         dev = &loopback_dev;
878                         dev_hold(dev);
879                         idev = in6_dev_get(dev);
880                         if (!idev) {
881                                 err = -ENODEV;
882                                 goto out;
883                         }
884                 }
885                 rt->u.dst.output = ip6_pkt_discard_out;
886                 rt->u.dst.input = ip6_pkt_discard;
887                 rt->u.dst.error = -ENETUNREACH;
888                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
889                 goto install_route;
890         }
891
892         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
893                 struct in6_addr *gw_addr;
894                 int gwa_type;
895
896                 gw_addr = &rtmsg->rtmsg_gateway;
897                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
898                 gwa_type = ipv6_addr_type(gw_addr);
899
900                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
901                         struct rt6_info *grt;
902
903                         /* IPv6 strictly inhibits using not link-local
904                            addresses as nexthop address.
905                            Otherwise, router will not able to send redirects.
906                            It is very good, but in some (rare!) circumstances
907                            (SIT, PtP, NBMA NOARP links) it is handy to allow
908                            some exceptions. --ANK
909                          */
910                         err = -EINVAL;
911                         if (!(gwa_type&IPV6_ADDR_UNICAST))
912                                 goto out;
913
914                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
915
916                         err = -EHOSTUNREACH;
917                         if (grt == NULL)
918                                 goto out;
919                         if (dev) {
920                                 if (dev != grt->rt6i_dev) {
921                                         dst_release(&grt->u.dst);
922                                         goto out;
923                                 }
924                         } else {
925                                 dev = grt->rt6i_dev;
926                                 idev = grt->rt6i_idev;
927                                 dev_hold(dev);
928                                 in6_dev_hold(grt->rt6i_idev);
929                         }
930                         if (!(grt->rt6i_flags&RTF_GATEWAY))
931                                 err = 0;
932                         dst_release(&grt->u.dst);
933
934                         if (err)
935                                 goto out;
936                 }
937                 err = -EINVAL;
938                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
939                         goto out;
940         }
941
942         err = -ENODEV;
943         if (dev == NULL)
944                 goto out;
945
946         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
947                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
948                 if (IS_ERR(rt->rt6i_nexthop)) {
949                         err = PTR_ERR(rt->rt6i_nexthop);
950                         rt->rt6i_nexthop = NULL;
951                         goto out;
952                 }
953         }
954
955         rt->rt6i_flags = rtmsg->rtmsg_flags;
956
957 install_route:
958         if (rta && rta[RTA_METRICS-1]) {
959                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
960                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
961
962                 while (RTA_OK(attr, attrlen)) {
963                         unsigned flavor = attr->rta_type;
964                         if (flavor) {
965                                 if (flavor > RTAX_MAX) {
966                                         err = -EINVAL;
967                                         goto out;
968                                 }
969                                 rt->u.dst.metrics[flavor-1] =
970                                         *(u32 *)RTA_DATA(attr);
971                         }
972                         attr = RTA_NEXT(attr, attrlen);
973                 }
974         }
975
976         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
977                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
978         if (!rt->u.dst.metrics[RTAX_MTU-1])
979                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
980         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
981                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
982         rt->u.dst.dev = dev;
983         rt->rt6i_idev = idev;
984         return ip6_ins_rt(rt, nlh, _rtattr, req);
985
986 out:
987         if (dev)
988                 dev_put(dev);
989         if (idev)
990                 in6_dev_put(idev);
991         if (rt)
992                 dst_free((struct dst_entry *) rt);
993         return err;
994 }
995
996 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
997 {
998         int err;
999
1000         write_lock_bh(&rt6_lock);
1001
1002         rt6_reset_dflt_pointer(NULL);
1003
1004         err = fib6_del(rt, nlh, _rtattr, req);
1005         dst_release(&rt->u.dst);
1006
1007         write_unlock_bh(&rt6_lock);
1008
1009         return err;
1010 }
1011
1012 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1013 {
1014         struct fib6_node *fn;
1015         struct rt6_info *rt;
1016         int err = -ESRCH;
1017
1018         read_lock_bh(&rt6_lock);
1019
1020         fn = fib6_locate(&ip6_routing_table,
1021                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1022                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1023         
1024         if (fn) {
1025                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1026                         if (rtmsg->rtmsg_ifindex &&
1027                             (rt->rt6i_dev == NULL ||
1028                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1029                                 continue;
1030                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1031                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1032                                 continue;
1033                         if (rtmsg->rtmsg_metric &&
1034                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1035                                 continue;
1036                         dst_hold(&rt->u.dst);
1037                         read_unlock_bh(&rt6_lock);
1038
1039                         return ip6_del_rt(rt, nlh, _rtattr, req);
1040                 }
1041         }
1042         read_unlock_bh(&rt6_lock);
1043
1044         return err;
1045 }
1046
1047 /*
1048  *      Handle redirects
1049  */
1050 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1051                   struct neighbour *neigh, u8 *lladdr, int on_link)
1052 {
1053         struct rt6_info *rt, *nrt;
1054
1055         /* Locate old route to this destination. */
1056         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1057
1058         if (rt == NULL)
1059                 return;
1060
1061         if (neigh->dev != rt->rt6i_dev)
1062                 goto out;
1063
1064         /*
1065          * Current route is on-link; redirect is always invalid.
1066          * 
1067          * Seems, previous statement is not true. It could
1068          * be node, which looks for us as on-link (f.e. proxy ndisc)
1069          * But then router serving it might decide, that we should
1070          * know truth 8)8) --ANK (980726).
1071          */
1072         if (!(rt->rt6i_flags&RTF_GATEWAY))
1073                 goto out;
1074
1075         /*
1076          *      RFC 2461 specifies that redirects should only be
1077          *      accepted if they come from the nexthop to the target.
1078          *      Due to the way default routers are chosen, this notion
1079          *      is a bit fuzzy and one might need to check all default
1080          *      routers.
1081          */
1082         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1083                 if (rt->rt6i_flags & RTF_DEFAULT) {
1084                         struct rt6_info *rt1;
1085
1086                         read_lock(&rt6_lock);
1087                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1088                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1089                                         dst_hold(&rt1->u.dst);
1090                                         dst_release(&rt->u.dst);
1091                                         read_unlock(&rt6_lock);
1092                                         rt = rt1;
1093                                         goto source_ok;
1094                                 }
1095                         }
1096                         read_unlock(&rt6_lock);
1097                 }
1098                 if (net_ratelimit())
1099                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1100                                "for redirect target\n");
1101                 goto out;
1102         }
1103
1104 source_ok:
1105
1106         /*
1107          *      We have finally decided to accept it.
1108          */
1109
1110         neigh_update(neigh, lladdr, NUD_STALE, 
1111                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1112                      NEIGH_UPDATE_F_OVERRIDE|
1113                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1114                                      NEIGH_UPDATE_F_ISROUTER))
1115                      );
1116
1117         /*
1118          * Redirect received -> path was valid.
1119          * Look, redirects are sent only in response to data packets,
1120          * so that this nexthop apparently is reachable. --ANK
1121          */
1122         dst_confirm(&rt->u.dst);
1123
1124         /* Duplicate redirect: silently ignore. */
1125         if (neigh == rt->u.dst.neighbour)
1126                 goto out;
1127
1128         nrt = ip6_rt_copy(rt);
1129         if (nrt == NULL)
1130                 goto out;
1131
1132         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1133         if (on_link)
1134                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1135
1136         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1137         nrt->rt6i_dst.plen = 128;
1138         nrt->u.dst.flags |= DST_HOST;
1139
1140         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1141         nrt->rt6i_nexthop = neigh_clone(neigh);
1142         /* Reset pmtu, it may be better */
1143         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1144         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1145
1146         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1147                 goto out;
1148
1149         if (rt->rt6i_flags&RTF_CACHE) {
1150                 ip6_del_rt(rt, NULL, NULL, NULL);
1151                 return;
1152         }
1153
1154 out:
1155         dst_release(&rt->u.dst);
1156         return;
1157 }
1158
1159 /*
1160  *      Handle ICMP "packet too big" messages
1161  *      i.e. Path MTU discovery
1162  */
1163
1164 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1165                         struct net_device *dev, u32 pmtu)
1166 {
1167         struct rt6_info *rt, *nrt;
1168         int allfrag = 0;
1169
1170         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1171         if (rt == NULL)
1172                 return;
1173
1174         if (pmtu >= dst_mtu(&rt->u.dst))
1175                 goto out;
1176
1177         if (pmtu < IPV6_MIN_MTU) {
1178                 /*
1179                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1180                  * MTU (1280) and a fragment header should always be included
1181                  * after a node receiving Too Big message reporting PMTU is
1182                  * less than the IPv6 Minimum Link MTU.
1183                  */
1184                 pmtu = IPV6_MIN_MTU;
1185                 allfrag = 1;
1186         }
1187
1188         /* New mtu received -> path was valid.
1189            They are sent only in response to data packets,
1190            so that this nexthop apparently is reachable. --ANK
1191          */
1192         dst_confirm(&rt->u.dst);
1193
1194         /* Host route. If it is static, it would be better
1195            not to override it, but add new one, so that
1196            when cache entry will expire old pmtu
1197            would return automatically.
1198          */
1199         if (rt->rt6i_flags & RTF_CACHE) {
1200                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1201                 if (allfrag)
1202                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1203                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1204                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1205                 goto out;
1206         }
1207
1208         /* Network route.
1209            Two cases are possible:
1210            1. It is connected route. Action: COW
1211            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1212          */
1213         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1214                 nrt = rt6_cow(rt, daddr, saddr, NULL);
1215                 if (!nrt->u.dst.error) {
1216                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1217                         if (allfrag)
1218                                 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1219                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1220                            happened within 5 mins, the recommended timer is 10 mins.
1221                            Here this route expiration time is set to ip6_rt_mtu_expires
1222                            which is 10 mins. After 10 mins the decreased pmtu is expired
1223                            and detecting PMTU increase will be automatically happened.
1224                          */
1225                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1226                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1227                 }
1228                 dst_release(&nrt->u.dst);
1229         } else {
1230                 nrt = ip6_rt_copy(rt);
1231                 if (nrt == NULL)
1232                         goto out;
1233                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1234                 nrt->rt6i_dst.plen = 128;
1235                 nrt->u.dst.flags |= DST_HOST;
1236                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1237                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1238                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1239                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1240                 if (allfrag)
1241                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1242                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1243         }
1244
1245 out:
1246         dst_release(&rt->u.dst);
1247 }
1248
1249 /*
1250  *      Misc support functions
1251  */
1252
1253 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1254 {
1255         struct rt6_info *rt = ip6_dst_alloc();
1256
1257         if (rt) {
1258                 rt->u.dst.input = ort->u.dst.input;
1259                 rt->u.dst.output = ort->u.dst.output;
1260
1261                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1262                 rt->u.dst.dev = ort->u.dst.dev;
1263                 if (rt->u.dst.dev)
1264                         dev_hold(rt->u.dst.dev);
1265                 rt->rt6i_idev = ort->rt6i_idev;
1266                 if (rt->rt6i_idev)
1267                         in6_dev_hold(rt->rt6i_idev);
1268                 rt->u.dst.lastuse = jiffies;
1269                 rt->rt6i_expires = 0;
1270
1271                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1272                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1273                 rt->rt6i_metric = 0;
1274
1275                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1276 #ifdef CONFIG_IPV6_SUBTREES
1277                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1278 #endif
1279         }
1280         return rt;
1281 }
1282
1283 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1284 {       
1285         struct rt6_info *rt;
1286         struct fib6_node *fn;
1287
1288         fn = &ip6_routing_table;
1289
1290         write_lock_bh(&rt6_lock);
1291         for (rt = fn->leaf; rt; rt=rt->u.next) {
1292                 if (dev == rt->rt6i_dev &&
1293                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1294                         break;
1295         }
1296         if (rt)
1297                 dst_hold(&rt->u.dst);
1298         write_unlock_bh(&rt6_lock);
1299         return rt;
1300 }
1301
1302 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1303                                      struct net_device *dev)
1304 {
1305         struct in6_rtmsg rtmsg;
1306
1307         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1308         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1309         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1310         rtmsg.rtmsg_metric = 1024;
1311         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1312
1313         rtmsg.rtmsg_ifindex = dev->ifindex;
1314
1315         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1316         return rt6_get_dflt_router(gwaddr, dev);
1317 }
1318
1319 void rt6_purge_dflt_routers(void)
1320 {
1321         struct rt6_info *rt;
1322
1323 restart:
1324         read_lock_bh(&rt6_lock);
1325         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1326                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1327                         dst_hold(&rt->u.dst);
1328
1329                         rt6_reset_dflt_pointer(NULL);
1330
1331                         read_unlock_bh(&rt6_lock);
1332
1333                         ip6_del_rt(rt, NULL, NULL, NULL);
1334
1335                         goto restart;
1336                 }
1337         }
1338         read_unlock_bh(&rt6_lock);
1339 }
1340
1341 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1342 {
1343         struct in6_rtmsg rtmsg;
1344         int err;
1345
1346         switch(cmd) {
1347         case SIOCADDRT:         /* Add a route */
1348         case SIOCDELRT:         /* Delete a route */
1349                 if (!capable(CAP_NET_ADMIN))
1350                         return -EPERM;
1351                 err = copy_from_user(&rtmsg, arg,
1352                                      sizeof(struct in6_rtmsg));
1353                 if (err)
1354                         return -EFAULT;
1355                         
1356                 rtnl_lock();
1357                 switch (cmd) {
1358                 case SIOCADDRT:
1359                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1360                         break;
1361                 case SIOCDELRT:
1362                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1363                         break;
1364                 default:
1365                         err = -EINVAL;
1366                 }
1367                 rtnl_unlock();
1368
1369                 return err;
1370         };
1371
1372         return -EINVAL;
1373 }
1374
1375 /*
1376  *      Drop the packet on the floor
1377  */
1378
1379 static int ip6_pkt_discard(struct sk_buff *skb)
1380 {
1381         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1382         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1383         kfree_skb(skb);
1384         return 0;
1385 }
1386
1387 static int ip6_pkt_discard_out(struct sk_buff *skb)
1388 {
1389         skb->dev = skb->dst->dev;
1390         return ip6_pkt_discard(skb);
1391 }
1392
1393 /*
1394  *      Allocate a dst for local (unicast / anycast) address.
1395  */
1396
1397 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1398                                     const struct in6_addr *addr,
1399                                     int anycast)
1400 {
1401         struct rt6_info *rt = ip6_dst_alloc();
1402
1403         if (rt == NULL)
1404                 return ERR_PTR(-ENOMEM);
1405
1406         dev_hold(&loopback_dev);
1407         in6_dev_hold(idev);
1408
1409         rt->u.dst.flags = DST_HOST;
1410         rt->u.dst.input = ip6_input;
1411         rt->u.dst.output = ip6_output;
1412         rt->rt6i_dev = &loopback_dev;
1413         rt->rt6i_idev = idev;
1414         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1415         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1416         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1417         rt->u.dst.obsolete = -1;
1418
1419         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1420         if (anycast)
1421                 rt->rt6i_flags |= RTF_ANYCAST;
1422         else
1423                 rt->rt6i_flags |= RTF_LOCAL;
1424         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1425         if (rt->rt6i_nexthop == NULL) {
1426                 dst_free((struct dst_entry *) rt);
1427                 return ERR_PTR(-ENOMEM);
1428         }
1429
1430         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1431         rt->rt6i_dst.plen = 128;
1432
1433         atomic_set(&rt->u.dst.__refcnt, 1);
1434
1435         return rt;
1436 }
1437
1438 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1439 {
1440         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1441             rt != &ip6_null_entry) {
1442                 RT6_TRACE("deleted by ifdown %p\n", rt);
1443                 return -1;
1444         }
1445         return 0;
1446 }
1447
1448 void rt6_ifdown(struct net_device *dev)
1449 {
1450         write_lock_bh(&rt6_lock);
1451         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1452         write_unlock_bh(&rt6_lock);
1453 }
1454
1455 struct rt6_mtu_change_arg
1456 {
1457         struct net_device *dev;
1458         unsigned mtu;
1459 };
1460
1461 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1462 {
1463         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1464         struct inet6_dev *idev;
1465
1466         /* In IPv6 pmtu discovery is not optional,
1467            so that RTAX_MTU lock cannot disable it.
1468            We still use this lock to block changes
1469            caused by addrconf/ndisc.
1470         */
1471
1472         idev = __in6_dev_get(arg->dev);
1473         if (idev == NULL)
1474                 return 0;
1475
1476         /* For administrative MTU increase, there is no way to discover
1477            IPv6 PMTU increase, so PMTU increase should be updated here.
1478            Since RFC 1981 doesn't include administrative MTU increase
1479            update PMTU increase is a MUST. (i.e. jumbo frame)
1480          */
1481         /*
1482            If new MTU is less than route PMTU, this new MTU will be the
1483            lowest MTU in the path, update the route PMTU to reflect PMTU
1484            decreases; if new MTU is greater than route PMTU, and the
1485            old MTU is the lowest MTU in the path, update the route PMTU
1486            to reflect the increase. In this case if the other nodes' MTU
1487            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1488            PMTU discouvery.
1489          */
1490         if (rt->rt6i_dev == arg->dev &&
1491             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1492             (dst_mtu(&rt->u.dst) > arg->mtu ||
1493              (dst_mtu(&rt->u.dst) < arg->mtu &&
1494               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1495                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1496         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1497         return 0;
1498 }
1499
1500 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1501 {
1502         struct rt6_mtu_change_arg arg;
1503
1504         arg.dev = dev;
1505         arg.mtu = mtu;
1506         read_lock_bh(&rt6_lock);
1507         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1508         read_unlock_bh(&rt6_lock);
1509 }
1510
1511 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1512                               struct in6_rtmsg *rtmsg)
1513 {
1514         memset(rtmsg, 0, sizeof(*rtmsg));
1515
1516         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1517         rtmsg->rtmsg_src_len = r->rtm_src_len;
1518         rtmsg->rtmsg_flags = RTF_UP;
1519         if (r->rtm_type == RTN_UNREACHABLE)
1520                 rtmsg->rtmsg_flags |= RTF_REJECT;
1521
1522         if (rta[RTA_GATEWAY-1]) {
1523                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1524                         return -EINVAL;
1525                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1526                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1527         }
1528         if (rta[RTA_DST-1]) {
1529                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1530                         return -EINVAL;
1531                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1532         }
1533         if (rta[RTA_SRC-1]) {
1534                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1535                         return -EINVAL;
1536                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1537         }
1538         if (rta[RTA_OIF-1]) {
1539                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1540                         return -EINVAL;
1541                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1542         }
1543         if (rta[RTA_PRIORITY-1]) {
1544                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1545                         return -EINVAL;
1546                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1547         }
1548         return 0;
1549 }
1550
1551 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1552 {
1553         struct rtmsg *r = NLMSG_DATA(nlh);
1554         struct in6_rtmsg rtmsg;
1555
1556         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1557                 return -EINVAL;
1558         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1559 }
1560
1561 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1562 {
1563         struct rtmsg *r = NLMSG_DATA(nlh);
1564         struct in6_rtmsg rtmsg;
1565
1566         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1567                 return -EINVAL;
1568         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1569 }
1570
1571 struct rt6_rtnl_dump_arg
1572 {
1573         struct sk_buff *skb;
1574         struct netlink_callback *cb;
1575 };
1576
1577 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1578                          struct in6_addr *dst, struct in6_addr *src,
1579                          int iif, int type, u32 pid, u32 seq,
1580                          int prefix, unsigned int flags)
1581 {
1582         struct rtmsg *rtm;
1583         struct nlmsghdr  *nlh;
1584         unsigned char    *b = skb->tail;
1585         struct rta_cacheinfo ci;
1586
1587         if (prefix) {   /* user wants prefix routes only */
1588                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1589                         /* success since this is not a prefix route */
1590                         return 1;
1591                 }
1592         }
1593
1594         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1595         rtm = NLMSG_DATA(nlh);
1596         rtm->rtm_family = AF_INET6;
1597         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1598         rtm->rtm_src_len = rt->rt6i_src.plen;
1599         rtm->rtm_tos = 0;
1600         rtm->rtm_table = RT_TABLE_MAIN;
1601         if (rt->rt6i_flags&RTF_REJECT)
1602                 rtm->rtm_type = RTN_UNREACHABLE;
1603         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1604                 rtm->rtm_type = RTN_LOCAL;
1605         else
1606                 rtm->rtm_type = RTN_UNICAST;
1607         rtm->rtm_flags = 0;
1608         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1609         rtm->rtm_protocol = rt->rt6i_protocol;
1610         if (rt->rt6i_flags&RTF_DYNAMIC)
1611                 rtm->rtm_protocol = RTPROT_REDIRECT;
1612         else if (rt->rt6i_flags & RTF_ADDRCONF)
1613                 rtm->rtm_protocol = RTPROT_KERNEL;
1614         else if (rt->rt6i_flags&RTF_DEFAULT)
1615                 rtm->rtm_protocol = RTPROT_RA;
1616
1617         if (rt->rt6i_flags&RTF_CACHE)
1618                 rtm->rtm_flags |= RTM_F_CLONED;
1619
1620         if (dst) {
1621                 RTA_PUT(skb, RTA_DST, 16, dst);
1622                 rtm->rtm_dst_len = 128;
1623         } else if (rtm->rtm_dst_len)
1624                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1625 #ifdef CONFIG_IPV6_SUBTREES
1626         if (src) {
1627                 RTA_PUT(skb, RTA_SRC, 16, src);
1628                 rtm->rtm_src_len = 128;
1629         } else if (rtm->rtm_src_len)
1630                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1631 #endif
1632         if (iif)
1633                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1634         else if (dst) {
1635                 struct in6_addr saddr_buf;
1636                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1637                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1638         }
1639         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1640                 goto rtattr_failure;
1641         if (rt->u.dst.neighbour)
1642                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1643         if (rt->u.dst.dev)
1644                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1645         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1646         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1647         if (rt->rt6i_expires)
1648                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1649         else
1650                 ci.rta_expires = 0;
1651         ci.rta_used = rt->u.dst.__use;
1652         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1653         ci.rta_error = rt->u.dst.error;
1654         ci.rta_id = 0;
1655         ci.rta_ts = 0;
1656         ci.rta_tsage = 0;
1657         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1658         nlh->nlmsg_len = skb->tail - b;
1659         return skb->len;
1660
1661 nlmsg_failure:
1662 rtattr_failure:
1663         skb_trim(skb, b - skb->data);
1664         return -1;
1665 }
1666
1667 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1668 {
1669         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1670         int prefix;
1671
1672         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1673                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1674                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1675         } else
1676                 prefix = 0;
1677
1678         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1679                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1680                      prefix, NLM_F_MULTI);
1681 }
1682
1683 static int fib6_dump_node(struct fib6_walker_t *w)
1684 {
1685         int res;
1686         struct rt6_info *rt;
1687
1688         for (rt = w->leaf; rt; rt = rt->u.next) {
1689                 res = rt6_dump_route(rt, w->args);
1690                 if (res < 0) {
1691                         /* Frame is full, suspend walking */
1692                         w->leaf = rt;
1693                         return 1;
1694                 }
1695                 BUG_TRAP(res!=0);
1696         }
1697         w->leaf = NULL;
1698         return 0;
1699 }
1700
1701 static void fib6_dump_end(struct netlink_callback *cb)
1702 {
1703         struct fib6_walker_t *w = (void*)cb->args[0];
1704
1705         if (w) {
1706                 cb->args[0] = 0;
1707                 fib6_walker_unlink(w);
1708                 kfree(w);
1709         }
1710         cb->done = (void*)cb->args[1];
1711         cb->args[1] = 0;
1712 }
1713
1714 static int fib6_dump_done(struct netlink_callback *cb)
1715 {
1716         fib6_dump_end(cb);
1717         return cb->done ? cb->done(cb) : 0;
1718 }
1719
1720 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1721 {
1722         struct rt6_rtnl_dump_arg arg;
1723         struct fib6_walker_t *w;
1724         int res;
1725
1726         arg.skb = skb;
1727         arg.cb = cb;
1728
1729         w = (void*)cb->args[0];
1730         if (w == NULL) {
1731                 /* New dump:
1732                  * 
1733                  * 1. hook callback destructor.
1734                  */
1735                 cb->args[1] = (long)cb->done;
1736                 cb->done = fib6_dump_done;
1737
1738                 /*
1739                  * 2. allocate and initialize walker.
1740                  */
1741                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1742                 if (w == NULL)
1743                         return -ENOMEM;
1744                 RT6_TRACE("dump<%p", w);
1745                 memset(w, 0, sizeof(*w));
1746                 w->root = &ip6_routing_table;
1747                 w->func = fib6_dump_node;
1748                 w->args = &arg;
1749                 cb->args[0] = (long)w;
1750                 read_lock_bh(&rt6_lock);
1751                 res = fib6_walk(w);
1752                 read_unlock_bh(&rt6_lock);
1753         } else {
1754                 w->args = &arg;
1755                 read_lock_bh(&rt6_lock);
1756                 res = fib6_walk_continue(w);
1757                 read_unlock_bh(&rt6_lock);
1758         }
1759 #if RT6_DEBUG >= 3
1760         if (res <= 0 && skb->len == 0)
1761                 RT6_TRACE("%p>dump end\n", w);
1762 #endif
1763         res = res < 0 ? res : skb->len;
1764         /* res < 0 is an error. (really, impossible)
1765            res == 0 means that dump is complete, but skb still can contain data.
1766            res > 0 dump is not complete, but frame is full.
1767          */
1768         /* Destroy walker, if dump of this table is complete. */
1769         if (res <= 0)
1770                 fib6_dump_end(cb);
1771         return res;
1772 }
1773
1774 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1775 {
1776         struct rtattr **rta = arg;
1777         int iif = 0;
1778         int err = -ENOBUFS;
1779         struct sk_buff *skb;
1780         struct flowi fl;
1781         struct rt6_info *rt;
1782
1783         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1784         if (skb == NULL)
1785                 goto out;
1786
1787         /* Reserve room for dummy headers, this skb can pass
1788            through good chunk of routing engine.
1789          */
1790         skb->mac.raw = skb->data;
1791         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1792
1793         memset(&fl, 0, sizeof(fl));
1794         if (rta[RTA_SRC-1])
1795                 ipv6_addr_copy(&fl.fl6_src,
1796                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1797         if (rta[RTA_DST-1])
1798                 ipv6_addr_copy(&fl.fl6_dst,
1799                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1800
1801         if (rta[RTA_IIF-1])
1802                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1803
1804         if (iif) {
1805                 struct net_device *dev;
1806                 dev = __dev_get_by_index(iif);
1807                 if (!dev) {
1808                         err = -ENODEV;
1809                         goto out_free;
1810                 }
1811         }
1812
1813         fl.oif = 0;
1814         if (rta[RTA_OIF-1])
1815                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1816
1817         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1818
1819         skb->dst = &rt->u.dst;
1820
1821         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1822         err = rt6_fill_node(skb, rt, 
1823                             &fl.fl6_dst, &fl.fl6_src,
1824                             iif,
1825                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1826                             nlh->nlmsg_seq, 0, 0);
1827         if (err < 0) {
1828                 err = -EMSGSIZE;
1829                 goto out_free;
1830         }
1831
1832         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1833         if (err > 0)
1834                 err = 0;
1835 out:
1836         return err;
1837 out_free:
1838         kfree_skb(skb);
1839         goto out;       
1840 }
1841
1842 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1843                         struct netlink_skb_parms *req)
1844 {
1845         struct sk_buff *skb;
1846         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1847         u32 pid = current->pid;
1848         u32 seq = 0;
1849
1850         if (req)
1851                 pid = req->pid;
1852         if (nlh)
1853                 seq = nlh->nlmsg_seq;
1854         
1855         skb = alloc_skb(size, gfp_any());
1856         if (!skb) {
1857                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1858                 return;
1859         }
1860         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1861                 kfree_skb(skb);
1862                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1863                 return;
1864         }
1865         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1866         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1867 }
1868
1869 /*
1870  *      /proc
1871  */
1872
1873 #ifdef CONFIG_PROC_FS
1874
1875 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1876
1877 struct rt6_proc_arg
1878 {
1879         char *buffer;
1880         int offset;
1881         int length;
1882         int skip;
1883         int len;
1884 };
1885
1886 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1887 {
1888         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1889         int i;
1890
1891         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1892                 arg->skip++;
1893                 return 0;
1894         }
1895
1896         if (arg->len >= arg->length)
1897                 return 0;
1898
1899         for (i=0; i<16; i++) {
1900                 sprintf(arg->buffer + arg->len, "%02x",
1901                         rt->rt6i_dst.addr.s6_addr[i]);
1902                 arg->len += 2;
1903         }
1904         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1905                             rt->rt6i_dst.plen);
1906
1907 #ifdef CONFIG_IPV6_SUBTREES
1908         for (i=0; i<16; i++) {
1909                 sprintf(arg->buffer + arg->len, "%02x",
1910                         rt->rt6i_src.addr.s6_addr[i]);
1911                 arg->len += 2;
1912         }
1913         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1914                             rt->rt6i_src.plen);
1915 #else
1916         sprintf(arg->buffer + arg->len,
1917                 "00000000000000000000000000000000 00 ");
1918         arg->len += 36;
1919 #endif
1920
1921         if (rt->rt6i_nexthop) {
1922                 for (i=0; i<16; i++) {
1923                         sprintf(arg->buffer + arg->len, "%02x",
1924                                 rt->rt6i_nexthop->primary_key[i]);
1925                         arg->len += 2;
1926                 }
1927         } else {
1928                 sprintf(arg->buffer + arg->len,
1929                         "00000000000000000000000000000000");
1930                 arg->len += 32;
1931         }
1932         arg->len += sprintf(arg->buffer + arg->len,
1933                             " %08x %08x %08x %08x %8s\n",
1934                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1935                             rt->u.dst.__use, rt->rt6i_flags, 
1936                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1937         return 0;
1938 }
1939
1940 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1941 {
1942         struct rt6_proc_arg arg;
1943         arg.buffer = buffer;
1944         arg.offset = offset;
1945         arg.length = length;
1946         arg.skip = 0;
1947         arg.len = 0;
1948
1949         read_lock_bh(&rt6_lock);
1950         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1951         read_unlock_bh(&rt6_lock);
1952
1953         *start = buffer;
1954         if (offset)
1955                 *start += offset % RT6_INFO_LEN;
1956
1957         arg.len -= offset % RT6_INFO_LEN;
1958
1959         if (arg.len > length)
1960                 arg.len = length;
1961         if (arg.len < 0)
1962                 arg.len = 0;
1963
1964         return arg.len;
1965 }
1966
1967 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1968 {
1969         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1970                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1971                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1972                       rt6_stats.fib_rt_cache,
1973                       atomic_read(&ip6_dst_ops.entries),
1974                       rt6_stats.fib_discarded_routes);
1975
1976         return 0;
1977 }
1978
1979 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1980 {
1981         return single_open(file, rt6_stats_seq_show, NULL);
1982 }
1983
1984 static struct file_operations rt6_stats_seq_fops = {
1985         .owner   = THIS_MODULE,
1986         .open    = rt6_stats_seq_open,
1987         .read    = seq_read,
1988         .llseek  = seq_lseek,
1989         .release = single_release,
1990 };
1991 #endif  /* CONFIG_PROC_FS */
1992
1993 #ifdef CONFIG_SYSCTL
1994
1995 static int flush_delay;
1996
1997 static
1998 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1999                               void __user *buffer, size_t *lenp, loff_t *ppos)
2000 {
2001         if (write) {
2002                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2003                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2004                 return 0;
2005         } else
2006                 return -EINVAL;
2007 }
2008
2009 ctl_table ipv6_route_table[] = {
2010         {
2011                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2012                 .procname       =       "flush",
2013                 .data           =       &flush_delay,
2014                 .maxlen         =       sizeof(int),
2015                 .mode           =       0200,
2016                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2017         },
2018         {
2019                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2020                 .procname       =       "gc_thresh",
2021                 .data           =       &ip6_dst_ops.gc_thresh,
2022                 .maxlen         =       sizeof(int),
2023                 .mode           =       0644,
2024                 .proc_handler   =       &proc_dointvec,
2025         },
2026         {
2027                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2028                 .procname       =       "max_size",
2029                 .data           =       &ip6_rt_max_size,
2030                 .maxlen         =       sizeof(int),
2031                 .mode           =       0644,
2032                 .proc_handler   =       &proc_dointvec,
2033         },
2034         {
2035                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2036                 .procname       =       "gc_min_interval",
2037                 .data           =       &ip6_rt_gc_min_interval,
2038                 .maxlen         =       sizeof(int),
2039                 .mode           =       0644,
2040                 .proc_handler   =       &proc_dointvec_jiffies,
2041                 .strategy       =       &sysctl_jiffies,
2042         },
2043         {
2044                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2045                 .procname       =       "gc_timeout",
2046                 .data           =       &ip6_rt_gc_timeout,
2047                 .maxlen         =       sizeof(int),
2048                 .mode           =       0644,
2049                 .proc_handler   =       &proc_dointvec_jiffies,
2050                 .strategy       =       &sysctl_jiffies,
2051         },
2052         {
2053                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2054                 .procname       =       "gc_interval",
2055                 .data           =       &ip6_rt_gc_interval,
2056                 .maxlen         =       sizeof(int),
2057                 .mode           =       0644,
2058                 .proc_handler   =       &proc_dointvec_jiffies,
2059                 .strategy       =       &sysctl_jiffies,
2060         },
2061         {
2062                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2063                 .procname       =       "gc_elasticity",
2064                 .data           =       &ip6_rt_gc_elasticity,
2065                 .maxlen         =       sizeof(int),
2066                 .mode           =       0644,
2067                 .proc_handler   =       &proc_dointvec_jiffies,
2068                 .strategy       =       &sysctl_jiffies,
2069         },
2070         {
2071                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2072                 .procname       =       "mtu_expires",
2073                 .data           =       &ip6_rt_mtu_expires,
2074                 .maxlen         =       sizeof(int),
2075                 .mode           =       0644,
2076                 .proc_handler   =       &proc_dointvec_jiffies,
2077                 .strategy       =       &sysctl_jiffies,
2078         },
2079         {
2080                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2081                 .procname       =       "min_adv_mss",
2082                 .data           =       &ip6_rt_min_advmss,
2083                 .maxlen         =       sizeof(int),
2084                 .mode           =       0644,
2085                 .proc_handler   =       &proc_dointvec_jiffies,
2086                 .strategy       =       &sysctl_jiffies,
2087         },
2088         {
2089                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2090                 .procname       =       "gc_min_interval_ms",
2091                 .data           =       &ip6_rt_gc_min_interval,
2092                 .maxlen         =       sizeof(int),
2093                 .mode           =       0644,
2094                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2095                 .strategy       =       &sysctl_ms_jiffies,
2096         },
2097         { .ctl_name = 0 }
2098 };
2099
2100 #endif
2101
2102 void __init ip6_route_init(void)
2103 {
2104         struct proc_dir_entry *p;
2105
2106         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2107                                                      sizeof(struct rt6_info),
2108                                                      0, SLAB_HWCACHE_ALIGN,
2109                                                      NULL, NULL);
2110         if (!ip6_dst_ops.kmem_cachep)
2111                 panic("cannot create ip6_dst_cache");
2112
2113         fib6_init();
2114 #ifdef  CONFIG_PROC_FS
2115         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2116         if (p)
2117                 p->owner = THIS_MODULE;
2118
2119         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2120 #endif
2121 #ifdef CONFIG_XFRM
2122         xfrm6_init();
2123 #endif
2124 }
2125
2126 void ip6_route_cleanup(void)
2127 {
2128 #ifdef CONFIG_PROC_FS
2129         proc_net_remove("ipv6_route");
2130         proc_net_remove("rt6_stats");
2131 #endif
2132 #ifdef CONFIG_XFRM
2133         xfrm6_fini();
2134 #endif
2135         rt6_ifdown(NULL);
2136         fib6_gc_cleanup();
2137         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2138 }