]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
[IPV4]: Add LC-Trie FIB lookup algorithm.
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *,
88                                        struct net_device *dev, int how);
89 static int               ip6_dst_gc(void);
90
91 static int              ip6_pkt_discard(struct sk_buff *skb);
92 static int              ip6_pkt_discard_out(struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96 static struct dst_ops ip6_dst_ops = {
97         .family                 =       AF_INET6,
98         .protocol               =       __constant_htons(ETH_P_IPV6),
99         .gc                     =       ip6_dst_gc,
100         .gc_thresh              =       1024,
101         .check                  =       ip6_dst_check,
102         .destroy                =       ip6_dst_destroy,
103         .ifdown                 =       ip6_dst_ifdown,
104         .negative_advice        =       ip6_negative_advice,
105         .link_failure           =       ip6_link_failure,
106         .update_pmtu            =       ip6_rt_update_pmtu,
107         .entry_size             =       sizeof(struct rt6_info),
108 };
109
110 struct rt6_info ip6_null_entry = {
111         .u = {
112                 .dst = {
113                         .__refcnt       = ATOMIC_INIT(1),
114                         .__use          = 1,
115                         .dev            = &loopback_dev,
116                         .obsolete       = -1,
117                         .error          = -ENETUNREACH,
118                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
119                         .input          = ip6_pkt_discard,
120                         .output         = ip6_pkt_discard_out,
121                         .ops            = &ip6_dst_ops,
122                         .path           = (struct dst_entry*)&ip6_null_entry,
123                 }
124         },
125         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
126         .rt6i_metric    = ~(u32) 0,
127         .rt6i_ref       = ATOMIC_INIT(1),
128 };
129
130 struct fib6_node ip6_routing_table = {
131         .leaf           = &ip6_null_entry,
132         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133 };
134
135 /* Protects all the ip6 fib */
136
137 DEFINE_RWLOCK(rt6_lock);
138
139
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 {
143         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144 }
145
146 static void ip6_dst_destroy(struct dst_entry *dst)
147 {
148         struct rt6_info *rt = (struct rt6_info *)dst;
149         struct inet6_dev *idev = rt->rt6i_idev;
150
151         if (idev != NULL) {
152                 rt->rt6i_idev = NULL;
153                 in6_dev_put(idev);
154         }       
155 }
156
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158                            int how)
159 {
160         struct rt6_info *rt = (struct rt6_info *)dst;
161         struct inet6_dev *idev = rt->rt6i_idev;
162
163         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165                 if (loopback_idev != NULL) {
166                         rt->rt6i_idev = loopback_idev;
167                         in6_dev_put(idev);
168                 }
169         }
170 }
171
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173 {
174         return (rt->rt6i_flags & RTF_EXPIRES &&
175                 time_after(jiffies, rt->rt6i_expires));
176 }
177
178 /*
179  *      Route lookup. Any rt6_lock is implied.
180  */
181
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183                                                     int oif,
184                                                     int strict)
185 {
186         struct rt6_info *local = NULL;
187         struct rt6_info *sprt;
188
189         if (oif) {
190                 for (sprt = rt; sprt; sprt = sprt->u.next) {
191                         struct net_device *dev = sprt->rt6i_dev;
192                         if (dev->ifindex == oif)
193                                 return sprt;
194                         if (dev->flags & IFF_LOOPBACK) {
195                                 if (sprt->rt6i_idev == NULL ||
196                                     sprt->rt6i_idev->dev->ifindex != oif) {
197                                         if (strict && oif)
198                                                 continue;
199                                         if (local && (!oif || 
200                                                       local->rt6i_idev->dev->ifindex == oif))
201                                                 continue;
202                                 }
203                                 local = sprt;
204                         }
205                 }
206
207                 if (local)
208                         return local;
209
210                 if (strict)
211                         return &ip6_null_entry;
212         }
213         return rt;
214 }
215
216 /*
217  *      pointer to the last default router chosen. BH is disabled locally.
218  */
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
223 {
224         spin_lock_bh(&rt6_dflt_lock);
225         if (rt == NULL || rt == rt6_dflt_pointer) {
226                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227                 rt6_dflt_pointer = NULL;
228         }
229         spin_unlock_bh(&rt6_dflt_lock);
230 }
231
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234 {
235         struct rt6_info *match = NULL;
236         struct rt6_info *sprt;
237         int mpri = 0;
238
239         for (sprt = rt; sprt; sprt = sprt->u.next) {
240                 struct neighbour *neigh;
241                 int m = 0;
242
243                 if (!oif ||
244                     (sprt->rt6i_dev &&
245                      sprt->rt6i_dev->ifindex == oif))
246                         m += 8;
247
248                 if (rt6_check_expired(sprt))
249                         continue;
250
251                 if (sprt == rt6_dflt_pointer)
252                         m += 4;
253
254                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255                         read_lock_bh(&neigh->lock);
256                         switch (neigh->nud_state) {
257                         case NUD_REACHABLE:
258                                 m += 3;
259                                 break;
260
261                         case NUD_STALE:
262                         case NUD_DELAY:
263                         case NUD_PROBE:
264                                 m += 2;
265                                 break;
266
267                         case NUD_NOARP:
268                         case NUD_PERMANENT:
269                                 m += 1;
270                                 break;
271
272                         case NUD_INCOMPLETE:
273                         default:
274                                 read_unlock_bh(&neigh->lock);
275                                 continue;
276                         }
277                         read_unlock_bh(&neigh->lock);
278                 } else {
279                         continue;
280                 }
281
282                 if (m > mpri || m >= 12) {
283                         match = sprt;
284                         mpri = m;
285                         if (m >= 12) {
286                                 /* we choose the last default router if it
287                                  * is in (probably) reachable state.
288                                  * If route changed, we should do pmtu
289                                  * discovery. --yoshfuji
290                                  */
291                                 break;
292                         }
293                 }
294         }
295
296         spin_lock(&rt6_dflt_lock);
297         if (!match) {
298                 /*
299                  *      No default routers are known to be reachable.
300                  *      SHOULD round robin
301                  */
302                 if (rt6_dflt_pointer) {
303                         for (sprt = rt6_dflt_pointer->u.next;
304                              sprt; sprt = sprt->u.next) {
305                                 if (sprt->u.dst.obsolete <= 0 &&
306                                     sprt->u.dst.error == 0 &&
307                                     !rt6_check_expired(sprt)) {
308                                         match = sprt;
309                                         break;
310                                 }
311                         }
312                         for (sprt = rt;
313                              !match && sprt;
314                              sprt = sprt->u.next) {
315                                 if (sprt->u.dst.obsolete <= 0 &&
316                                     sprt->u.dst.error == 0 &&
317                                     !rt6_check_expired(sprt)) {
318                                         match = sprt;
319                                         break;
320                                 }
321                                 if (sprt == rt6_dflt_pointer)
322                                         break;
323                         }
324                 }
325         }
326
327         if (match) {
328                 if (rt6_dflt_pointer != match)
329                         RT6_TRACE("changed default router: %p->%p\n",
330                                   rt6_dflt_pointer, match);
331                 rt6_dflt_pointer = match;
332         }
333         spin_unlock(&rt6_dflt_lock);
334
335         if (!match) {
336                 /*
337                  * Last Resort: if no default routers found, 
338                  * use addrconf default route.
339                  * We don't record this route.
340                  */
341                 for (sprt = ip6_routing_table.leaf;
342                      sprt; sprt = sprt->u.next) {
343                         if (!rt6_check_expired(sprt) &&
344                             (sprt->rt6i_flags & RTF_DEFAULT) &&
345                             (!oif ||
346                              (sprt->rt6i_dev &&
347                               sprt->rt6i_dev->ifindex == oif))) {
348                                 match = sprt;
349                                 break;
350                         }
351                 }
352                 if (!match) {
353                         /* no default route.  give up. */
354                         match = &ip6_null_entry;
355                 }
356         }
357
358         return match;
359 }
360
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362                             int oif, int strict)
363 {
364         struct fib6_node *fn;
365         struct rt6_info *rt;
366
367         read_lock_bh(&rt6_lock);
368         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369         rt = rt6_device_match(fn->leaf, oif, strict);
370         dst_hold(&rt->u.dst);
371         rt->u.dst.__use++;
372         read_unlock_bh(&rt6_lock);
373
374         rt->u.dst.lastuse = jiffies;
375         if (rt->u.dst.error == 0)
376                 return rt;
377         dst_release(&rt->u.dst);
378         return NULL;
379 }
380
381 /* ip6_ins_rt is called with FREE rt6_lock.
382    It takes new route entry, the addition fails by any reason the
383    route is freed. In any case, if caller does not hold it, it may
384    be destroyed.
385  */
386
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
388 {
389         int err;
390
391         write_lock_bh(&rt6_lock);
392         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
393         write_unlock_bh(&rt6_lock);
394
395         return err;
396 }
397
398 /* No rt6_lock! If COW failed, the function returns dead route entry
399    with dst->error set to errno value.
400  */
401
402 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
403                                 struct in6_addr *saddr)
404 {
405         int err;
406         struct rt6_info *rt;
407
408         /*
409          *      Clone the route.
410          */
411
412         rt = ip6_rt_copy(ort);
413
414         if (rt) {
415                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
416
417                 if (!(rt->rt6i_flags&RTF_GATEWAY))
418                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
419
420                 rt->rt6i_dst.plen = 128;
421                 rt->rt6i_flags |= RTF_CACHE;
422                 rt->u.dst.flags |= DST_HOST;
423
424 #ifdef CONFIG_IPV6_SUBTREES
425                 if (rt->rt6i_src.plen && saddr) {
426                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427                         rt->rt6i_src.plen = 128;
428                 }
429 #endif
430
431                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
432
433                 dst_hold(&rt->u.dst);
434
435                 err = ip6_ins_rt(rt, NULL, NULL);
436                 if (err == 0)
437                         return rt;
438
439                 rt->u.dst.error = err;
440
441                 return rt;
442         }
443         dst_hold(&ip6_null_entry.u.dst);
444         return &ip6_null_entry;
445 }
446
447 #define BACKTRACK() \
448 if (rt == &ip6_null_entry && strict) { \
449        while ((fn = fn->parent) != NULL) { \
450                 if (fn->fn_flags & RTN_ROOT) { \
451                         dst_hold(&rt->u.dst); \
452                         goto out; \
453                 } \
454                 if (fn->fn_flags & RTN_RTINFO) \
455                         goto restart; \
456         } \
457 }
458
459
460 void ip6_route_input(struct sk_buff *skb)
461 {
462         struct fib6_node *fn;
463         struct rt6_info *rt;
464         int strict;
465         int attempts = 3;
466
467         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
468
469 relookup:
470         read_lock_bh(&rt6_lock);
471
472         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
473                          &skb->nh.ipv6h->saddr);
474
475 restart:
476         rt = fn->leaf;
477
478         if ((rt->rt6i_flags & RTF_CACHE)) {
479                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
480                 BACKTRACK();
481                 dst_hold(&rt->u.dst);
482                 goto out;
483         }
484
485         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
486         BACKTRACK();
487
488         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
489                 struct rt6_info *nrt;
490                 dst_hold(&rt->u.dst);
491                 read_unlock_bh(&rt6_lock);
492
493                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
494                               &skb->nh.ipv6h->saddr);
495
496                 dst_release(&rt->u.dst);
497                 rt = nrt;
498
499                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500                         goto out2;
501
502                 /* Race condition! In the gap, when rt6_lock was
503                    released someone could insert this route.  Relookup.
504                 */
505                 dst_release(&rt->u.dst);
506                 goto relookup;
507         }
508         dst_hold(&rt->u.dst);
509
510 out:
511         read_unlock_bh(&rt6_lock);
512 out2:
513         rt->u.dst.lastuse = jiffies;
514         rt->u.dst.__use++;
515         skb->dst = (struct dst_entry *) rt;
516 }
517
518 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
519 {
520         struct fib6_node *fn;
521         struct rt6_info *rt;
522         int strict;
523         int attempts = 3;
524
525         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
526
527 relookup:
528         read_lock_bh(&rt6_lock);
529
530         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
531
532 restart:
533         rt = fn->leaf;
534
535         if ((rt->rt6i_flags & RTF_CACHE)) {
536                 rt = rt6_device_match(rt, fl->oif, strict);
537                 BACKTRACK();
538                 dst_hold(&rt->u.dst);
539                 goto out;
540         }
541         if (rt->rt6i_flags & RTF_DEFAULT) {
542                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
543                         rt = rt6_best_dflt(rt, fl->oif);
544         } else {
545                 rt = rt6_device_match(rt, fl->oif, strict);
546                 BACKTRACK();
547         }
548
549         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
550                 struct rt6_info *nrt;
551                 dst_hold(&rt->u.dst);
552                 read_unlock_bh(&rt6_lock);
553
554                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
555
556                 dst_release(&rt->u.dst);
557                 rt = nrt;
558
559                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
560                         goto out2;
561
562                 /* Race condition! In the gap, when rt6_lock was
563                    released someone could insert this route.  Relookup.
564                 */
565                 dst_release(&rt->u.dst);
566                 goto relookup;
567         }
568         dst_hold(&rt->u.dst);
569
570 out:
571         read_unlock_bh(&rt6_lock);
572 out2:
573         rt->u.dst.lastuse = jiffies;
574         rt->u.dst.__use++;
575         return &rt->u.dst;
576 }
577
578
579 /*
580  *      Destination cache support functions
581  */
582
583 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
584 {
585         struct rt6_info *rt;
586
587         rt = (struct rt6_info *) dst;
588
589         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
590                 return dst;
591
592         return NULL;
593 }
594
595 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
596 {
597         struct rt6_info *rt = (struct rt6_info *) dst;
598
599         if (rt) {
600                 if (rt->rt6i_flags & RTF_CACHE)
601                         ip6_del_rt(rt, NULL, NULL);
602                 else
603                         dst_release(dst);
604         }
605         return NULL;
606 }
607
608 static void ip6_link_failure(struct sk_buff *skb)
609 {
610         struct rt6_info *rt;
611
612         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
613
614         rt = (struct rt6_info *) skb->dst;
615         if (rt) {
616                 if (rt->rt6i_flags&RTF_CACHE) {
617                         dst_set_expires(&rt->u.dst, 0);
618                         rt->rt6i_flags |= RTF_EXPIRES;
619                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
620                         rt->rt6i_node->fn_sernum = -1;
621         }
622 }
623
624 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
625 {
626         struct rt6_info *rt6 = (struct rt6_info*)dst;
627
628         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
629                 rt6->rt6i_flags |= RTF_MODIFIED;
630                 if (mtu < IPV6_MIN_MTU) {
631                         mtu = IPV6_MIN_MTU;
632                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
633                 }
634                 dst->metrics[RTAX_MTU-1] = mtu;
635         }
636 }
637
638 /* Protected by rt6_lock.  */
639 static struct dst_entry *ndisc_dst_gc_list;
640 static int ipv6_get_mtu(struct net_device *dev);
641
642 static inline unsigned int ipv6_advmss(unsigned int mtu)
643 {
644         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
645
646         if (mtu < ip6_rt_min_advmss)
647                 mtu = ip6_rt_min_advmss;
648
649         /*
650          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
651          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
652          * IPV6_MAXPLEN is also valid and means: "any MSS, 
653          * rely only on pmtu discovery"
654          */
655         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
656                 mtu = IPV6_MAXPLEN;
657         return mtu;
658 }
659
660 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
661                                   struct neighbour *neigh,
662                                   struct in6_addr *addr,
663                                   int (*output)(struct sk_buff *))
664 {
665         struct rt6_info *rt;
666         struct inet6_dev *idev = in6_dev_get(dev);
667
668         if (unlikely(idev == NULL))
669                 return NULL;
670
671         rt = ip6_dst_alloc();
672         if (unlikely(rt == NULL)) {
673                 in6_dev_put(idev);
674                 goto out;
675         }
676
677         dev_hold(dev);
678         if (neigh)
679                 neigh_hold(neigh);
680         else
681                 neigh = ndisc_get_neigh(dev, addr);
682
683         rt->rt6i_dev      = dev;
684         rt->rt6i_idev     = idev;
685         rt->rt6i_nexthop  = neigh;
686         atomic_set(&rt->u.dst.__refcnt, 1);
687         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
688         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
689         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
690         rt->u.dst.output  = output;
691
692 #if 0   /* there's no chance to use these for ndisc */
693         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
694                                 ? DST_HOST 
695                                 : 0;
696         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
697         rt->rt6i_dst.plen = 128;
698 #endif
699
700         write_lock_bh(&rt6_lock);
701         rt->u.dst.next = ndisc_dst_gc_list;
702         ndisc_dst_gc_list = &rt->u.dst;
703         write_unlock_bh(&rt6_lock);
704
705         fib6_force_start_gc();
706
707 out:
708         return (struct dst_entry *)rt;
709 }
710
711 int ndisc_dst_gc(int *more)
712 {
713         struct dst_entry *dst, *next, **pprev;
714         int freed;
715
716         next = NULL;
717         pprev = &ndisc_dst_gc_list;
718         freed = 0;
719         while ((dst = *pprev) != NULL) {
720                 if (!atomic_read(&dst->__refcnt)) {
721                         *pprev = dst->next;
722                         dst_free(dst);
723                         freed++;
724                 } else {
725                         pprev = &dst->next;
726                         (*more)++;
727                 }
728         }
729
730         return freed;
731 }
732
733 static int ip6_dst_gc(void)
734 {
735         static unsigned expire = 30*HZ;
736         static unsigned long last_gc;
737         unsigned long now = jiffies;
738
739         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
740             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
741                 goto out;
742
743         expire++;
744         fib6_run_gc(expire);
745         last_gc = now;
746         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
747                 expire = ip6_rt_gc_timeout>>1;
748
749 out:
750         expire -= expire>>ip6_rt_gc_elasticity;
751         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
752 }
753
754 /* Clean host part of a prefix. Not necessary in radix tree,
755    but results in cleaner routing tables.
756
757    Remove it only when all the things will work!
758  */
759
760 static int ipv6_get_mtu(struct net_device *dev)
761 {
762         int mtu = IPV6_MIN_MTU;
763         struct inet6_dev *idev;
764
765         idev = in6_dev_get(dev);
766         if (idev) {
767                 mtu = idev->cnf.mtu6;
768                 in6_dev_put(idev);
769         }
770         return mtu;
771 }
772
773 int ipv6_get_hoplimit(struct net_device *dev)
774 {
775         int hoplimit = ipv6_devconf.hop_limit;
776         struct inet6_dev *idev;
777
778         idev = in6_dev_get(dev);
779         if (idev) {
780                 hoplimit = idev->cnf.hop_limit;
781                 in6_dev_put(idev);
782         }
783         return hoplimit;
784 }
785
786 /*
787  *
788  */
789
790 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
791 {
792         int err;
793         struct rtmsg *r;
794         struct rtattr **rta;
795         struct rt6_info *rt = NULL;
796         struct net_device *dev = NULL;
797         struct inet6_dev *idev = NULL;
798         int addr_type;
799
800         rta = (struct rtattr **) _rtattr;
801
802         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
803                 return -EINVAL;
804 #ifndef CONFIG_IPV6_SUBTREES
805         if (rtmsg->rtmsg_src_len)
806                 return -EINVAL;
807 #endif
808         if (rtmsg->rtmsg_ifindex) {
809                 err = -ENODEV;
810                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
811                 if (!dev)
812                         goto out;
813                 idev = in6_dev_get(dev);
814                 if (!idev)
815                         goto out;
816         }
817
818         if (rtmsg->rtmsg_metric == 0)
819                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
820
821         rt = ip6_dst_alloc();
822
823         if (rt == NULL) {
824                 err = -ENOMEM;
825                 goto out;
826         }
827
828         rt->u.dst.obsolete = -1;
829         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
830         if (nlh && (r = NLMSG_DATA(nlh))) {
831                 rt->rt6i_protocol = r->rtm_protocol;
832         } else {
833                 rt->rt6i_protocol = RTPROT_BOOT;
834         }
835
836         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
837
838         if (addr_type & IPV6_ADDR_MULTICAST)
839                 rt->u.dst.input = ip6_mc_input;
840         else
841                 rt->u.dst.input = ip6_forward;
842
843         rt->u.dst.output = ip6_output;
844
845         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
846                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
847         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
848         if (rt->rt6i_dst.plen == 128)
849                rt->u.dst.flags = DST_HOST;
850
851 #ifdef CONFIG_IPV6_SUBTREES
852         ipv6_addr_prefix(&rt->rt6i_src.addr, 
853                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
854         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
855 #endif
856
857         rt->rt6i_metric = rtmsg->rtmsg_metric;
858
859         /* We cannot add true routes via loopback here,
860            they would result in kernel looping; promote them to reject routes
861          */
862         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
863             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
864                 /* hold loopback dev/idev if we haven't done so. */
865                 if (dev != &loopback_dev) {
866                         if (dev) {
867                                 dev_put(dev);
868                                 in6_dev_put(idev);
869                         }
870                         dev = &loopback_dev;
871                         dev_hold(dev);
872                         idev = in6_dev_get(dev);
873                         if (!idev) {
874                                 err = -ENODEV;
875                                 goto out;
876                         }
877                 }
878                 rt->u.dst.output = ip6_pkt_discard_out;
879                 rt->u.dst.input = ip6_pkt_discard;
880                 rt->u.dst.error = -ENETUNREACH;
881                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
882                 goto install_route;
883         }
884
885         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
886                 struct in6_addr *gw_addr;
887                 int gwa_type;
888
889                 gw_addr = &rtmsg->rtmsg_gateway;
890                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
891                 gwa_type = ipv6_addr_type(gw_addr);
892
893                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
894                         struct rt6_info *grt;
895
896                         /* IPv6 strictly inhibits using not link-local
897                            addresses as nexthop address.
898                            Otherwise, router will not able to send redirects.
899                            It is very good, but in some (rare!) circumstances
900                            (SIT, PtP, NBMA NOARP links) it is handy to allow
901                            some exceptions. --ANK
902                          */
903                         err = -EINVAL;
904                         if (!(gwa_type&IPV6_ADDR_UNICAST))
905                                 goto out;
906
907                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
908
909                         err = -EHOSTUNREACH;
910                         if (grt == NULL)
911                                 goto out;
912                         if (dev) {
913                                 if (dev != grt->rt6i_dev) {
914                                         dst_release(&grt->u.dst);
915                                         goto out;
916                                 }
917                         } else {
918                                 dev = grt->rt6i_dev;
919                                 idev = grt->rt6i_idev;
920                                 dev_hold(dev);
921                                 in6_dev_hold(grt->rt6i_idev);
922                         }
923                         if (!(grt->rt6i_flags&RTF_GATEWAY))
924                                 err = 0;
925                         dst_release(&grt->u.dst);
926
927                         if (err)
928                                 goto out;
929                 }
930                 err = -EINVAL;
931                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
932                         goto out;
933         }
934
935         err = -ENODEV;
936         if (dev == NULL)
937                 goto out;
938
939         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
940                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
941                 if (IS_ERR(rt->rt6i_nexthop)) {
942                         err = PTR_ERR(rt->rt6i_nexthop);
943                         rt->rt6i_nexthop = NULL;
944                         goto out;
945                 }
946         }
947
948         rt->rt6i_flags = rtmsg->rtmsg_flags;
949
950 install_route:
951         if (rta && rta[RTA_METRICS-1]) {
952                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
953                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
954
955                 while (RTA_OK(attr, attrlen)) {
956                         unsigned flavor = attr->rta_type;
957                         if (flavor) {
958                                 if (flavor > RTAX_MAX) {
959                                         err = -EINVAL;
960                                         goto out;
961                                 }
962                                 rt->u.dst.metrics[flavor-1] =
963                                         *(u32 *)RTA_DATA(attr);
964                         }
965                         attr = RTA_NEXT(attr, attrlen);
966                 }
967         }
968
969         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
970                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
971         if (!rt->u.dst.metrics[RTAX_MTU-1])
972                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
973         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
974                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
975         rt->u.dst.dev = dev;
976         rt->rt6i_idev = idev;
977         return ip6_ins_rt(rt, nlh, _rtattr);
978
979 out:
980         if (dev)
981                 dev_put(dev);
982         if (idev)
983                 in6_dev_put(idev);
984         if (rt)
985                 dst_free((struct dst_entry *) rt);
986         return err;
987 }
988
989 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
990 {
991         int err;
992
993         write_lock_bh(&rt6_lock);
994
995         rt6_reset_dflt_pointer(NULL);
996
997         err = fib6_del(rt, nlh, _rtattr);
998         dst_release(&rt->u.dst);
999
1000         write_unlock_bh(&rt6_lock);
1001
1002         return err;
1003 }
1004
1005 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1006 {
1007         struct fib6_node *fn;
1008         struct rt6_info *rt;
1009         int err = -ESRCH;
1010
1011         read_lock_bh(&rt6_lock);
1012
1013         fn = fib6_locate(&ip6_routing_table,
1014                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1015                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1016         
1017         if (fn) {
1018                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1019                         if (rtmsg->rtmsg_ifindex &&
1020                             (rt->rt6i_dev == NULL ||
1021                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1022                                 continue;
1023                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1024                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1025                                 continue;
1026                         if (rtmsg->rtmsg_metric &&
1027                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1028                                 continue;
1029                         dst_hold(&rt->u.dst);
1030                         read_unlock_bh(&rt6_lock);
1031
1032                         return ip6_del_rt(rt, nlh, _rtattr);
1033                 }
1034         }
1035         read_unlock_bh(&rt6_lock);
1036
1037         return err;
1038 }
1039
1040 /*
1041  *      Handle redirects
1042  */
1043 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1044                   struct neighbour *neigh, u8 *lladdr, int on_link)
1045 {
1046         struct rt6_info *rt, *nrt;
1047
1048         /* Locate old route to this destination. */
1049         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1050
1051         if (rt == NULL)
1052                 return;
1053
1054         if (neigh->dev != rt->rt6i_dev)
1055                 goto out;
1056
1057         /*
1058          * Current route is on-link; redirect is always invalid.
1059          * 
1060          * Seems, previous statement is not true. It could
1061          * be node, which looks for us as on-link (f.e. proxy ndisc)
1062          * But then router serving it might decide, that we should
1063          * know truth 8)8) --ANK (980726).
1064          */
1065         if (!(rt->rt6i_flags&RTF_GATEWAY))
1066                 goto out;
1067
1068         /*
1069          *      RFC 2461 specifies that redirects should only be
1070          *      accepted if they come from the nexthop to the target.
1071          *      Due to the way default routers are chosen, this notion
1072          *      is a bit fuzzy and one might need to check all default
1073          *      routers.
1074          */
1075         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1076                 if (rt->rt6i_flags & RTF_DEFAULT) {
1077                         struct rt6_info *rt1;
1078
1079                         read_lock(&rt6_lock);
1080                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1081                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1082                                         dst_hold(&rt1->u.dst);
1083                                         dst_release(&rt->u.dst);
1084                                         read_unlock(&rt6_lock);
1085                                         rt = rt1;
1086                                         goto source_ok;
1087                                 }
1088                         }
1089                         read_unlock(&rt6_lock);
1090                 }
1091                 if (net_ratelimit())
1092                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1093                                "for redirect target\n");
1094                 goto out;
1095         }
1096
1097 source_ok:
1098
1099         /*
1100          *      We have finally decided to accept it.
1101          */
1102
1103         neigh_update(neigh, lladdr, NUD_STALE, 
1104                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1105                      NEIGH_UPDATE_F_OVERRIDE|
1106                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1107                                      NEIGH_UPDATE_F_ISROUTER))
1108                      );
1109
1110         /*
1111          * Redirect received -> path was valid.
1112          * Look, redirects are sent only in response to data packets,
1113          * so that this nexthop apparently is reachable. --ANK
1114          */
1115         dst_confirm(&rt->u.dst);
1116
1117         /* Duplicate redirect: silently ignore. */
1118         if (neigh == rt->u.dst.neighbour)
1119                 goto out;
1120
1121         nrt = ip6_rt_copy(rt);
1122         if (nrt == NULL)
1123                 goto out;
1124
1125         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1126         if (on_link)
1127                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1128
1129         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1130         nrt->rt6i_dst.plen = 128;
1131         nrt->u.dst.flags |= DST_HOST;
1132
1133         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1134         nrt->rt6i_nexthop = neigh_clone(neigh);
1135         /* Reset pmtu, it may be better */
1136         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1137         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1138
1139         if (ip6_ins_rt(nrt, NULL, NULL))
1140                 goto out;
1141
1142         if (rt->rt6i_flags&RTF_CACHE) {
1143                 ip6_del_rt(rt, NULL, NULL);
1144                 return;
1145         }
1146
1147 out:
1148         dst_release(&rt->u.dst);
1149         return;
1150 }
1151
1152 /*
1153  *      Handle ICMP "packet too big" messages
1154  *      i.e. Path MTU discovery
1155  */
1156
1157 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1158                         struct net_device *dev, u32 pmtu)
1159 {
1160         struct rt6_info *rt, *nrt;
1161         int allfrag = 0;
1162
1163         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1164         if (rt == NULL)
1165                 return;
1166
1167         if (pmtu >= dst_mtu(&rt->u.dst))
1168                 goto out;
1169
1170         if (pmtu < IPV6_MIN_MTU) {
1171                 /*
1172                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1173                  * MTU (1280) and a fragment header should always be included
1174                  * after a node receiving Too Big message reporting PMTU is
1175                  * less than the IPv6 Minimum Link MTU.
1176                  */
1177                 pmtu = IPV6_MIN_MTU;
1178                 allfrag = 1;
1179         }
1180
1181         /* New mtu received -> path was valid.
1182            They are sent only in response to data packets,
1183            so that this nexthop apparently is reachable. --ANK
1184          */
1185         dst_confirm(&rt->u.dst);
1186
1187         /* Host route. If it is static, it would be better
1188            not to override it, but add new one, so that
1189            when cache entry will expire old pmtu
1190            would return automatically.
1191          */
1192         if (rt->rt6i_flags & RTF_CACHE) {
1193                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1194                 if (allfrag)
1195                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1196                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1197                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1198                 goto out;
1199         }
1200
1201         /* Network route.
1202            Two cases are possible:
1203            1. It is connected route. Action: COW
1204            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1205          */
1206         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1207                 nrt = rt6_cow(rt, daddr, saddr);
1208                 if (!nrt->u.dst.error) {
1209                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1210                         if (allfrag)
1211                                 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1212                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1213                            happened within 5 mins, the recommended timer is 10 mins.
1214                            Here this route expiration time is set to ip6_rt_mtu_expires
1215                            which is 10 mins. After 10 mins the decreased pmtu is expired
1216                            and detecting PMTU increase will be automatically happened.
1217                          */
1218                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1219                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1220                 }
1221                 dst_release(&nrt->u.dst);
1222         } else {
1223                 nrt = ip6_rt_copy(rt);
1224                 if (nrt == NULL)
1225                         goto out;
1226                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1227                 nrt->rt6i_dst.plen = 128;
1228                 nrt->u.dst.flags |= DST_HOST;
1229                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1230                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1231                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1232                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1233                 if (allfrag)
1234                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1235                 ip6_ins_rt(nrt, NULL, NULL);
1236         }
1237
1238 out:
1239         dst_release(&rt->u.dst);
1240 }
1241
1242 /*
1243  *      Misc support functions
1244  */
1245
1246 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1247 {
1248         struct rt6_info *rt = ip6_dst_alloc();
1249
1250         if (rt) {
1251                 rt->u.dst.input = ort->u.dst.input;
1252                 rt->u.dst.output = ort->u.dst.output;
1253
1254                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1255                 rt->u.dst.dev = ort->u.dst.dev;
1256                 if (rt->u.dst.dev)
1257                         dev_hold(rt->u.dst.dev);
1258                 rt->rt6i_idev = ort->rt6i_idev;
1259                 if (rt->rt6i_idev)
1260                         in6_dev_hold(rt->rt6i_idev);
1261                 rt->u.dst.lastuse = jiffies;
1262                 rt->rt6i_expires = 0;
1263
1264                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1265                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1266                 rt->rt6i_metric = 0;
1267
1268                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1269 #ifdef CONFIG_IPV6_SUBTREES
1270                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1271 #endif
1272         }
1273         return rt;
1274 }
1275
1276 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1277 {       
1278         struct rt6_info *rt;
1279         struct fib6_node *fn;
1280
1281         fn = &ip6_routing_table;
1282
1283         write_lock_bh(&rt6_lock);
1284         for (rt = fn->leaf; rt; rt=rt->u.next) {
1285                 if (dev == rt->rt6i_dev &&
1286                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1287                         break;
1288         }
1289         if (rt)
1290                 dst_hold(&rt->u.dst);
1291         write_unlock_bh(&rt6_lock);
1292         return rt;
1293 }
1294
1295 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1296                                      struct net_device *dev)
1297 {
1298         struct in6_rtmsg rtmsg;
1299
1300         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1301         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1302         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1303         rtmsg.rtmsg_metric = 1024;
1304         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1305
1306         rtmsg.rtmsg_ifindex = dev->ifindex;
1307
1308         ip6_route_add(&rtmsg, NULL, NULL);
1309         return rt6_get_dflt_router(gwaddr, dev);
1310 }
1311
1312 void rt6_purge_dflt_routers(void)
1313 {
1314         struct rt6_info *rt;
1315
1316 restart:
1317         read_lock_bh(&rt6_lock);
1318         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1319                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1320                         dst_hold(&rt->u.dst);
1321
1322                         rt6_reset_dflt_pointer(NULL);
1323
1324                         read_unlock_bh(&rt6_lock);
1325
1326                         ip6_del_rt(rt, NULL, NULL);
1327
1328                         goto restart;
1329                 }
1330         }
1331         read_unlock_bh(&rt6_lock);
1332 }
1333
1334 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1335 {
1336         struct in6_rtmsg rtmsg;
1337         int err;
1338
1339         switch(cmd) {
1340         case SIOCADDRT:         /* Add a route */
1341         case SIOCDELRT:         /* Delete a route */
1342                 if (!capable(CAP_NET_ADMIN))
1343                         return -EPERM;
1344                 err = copy_from_user(&rtmsg, arg,
1345                                      sizeof(struct in6_rtmsg));
1346                 if (err)
1347                         return -EFAULT;
1348                         
1349                 rtnl_lock();
1350                 switch (cmd) {
1351                 case SIOCADDRT:
1352                         err = ip6_route_add(&rtmsg, NULL, NULL);
1353                         break;
1354                 case SIOCDELRT:
1355                         err = ip6_route_del(&rtmsg, NULL, NULL);
1356                         break;
1357                 default:
1358                         err = -EINVAL;
1359                 }
1360                 rtnl_unlock();
1361
1362                 return err;
1363         };
1364
1365         return -EINVAL;
1366 }
1367
1368 /*
1369  *      Drop the packet on the floor
1370  */
1371
1372 int ip6_pkt_discard(struct sk_buff *skb)
1373 {
1374         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1375         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1376         kfree_skb(skb);
1377         return 0;
1378 }
1379
1380 int ip6_pkt_discard_out(struct sk_buff *skb)
1381 {
1382         skb->dev = skb->dst->dev;
1383         return ip6_pkt_discard(skb);
1384 }
1385
1386 /*
1387  *      Allocate a dst for local (unicast / anycast) address.
1388  */
1389
1390 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1391                                     const struct in6_addr *addr,
1392                                     int anycast)
1393 {
1394         struct rt6_info *rt = ip6_dst_alloc();
1395
1396         if (rt == NULL)
1397                 return ERR_PTR(-ENOMEM);
1398
1399         dev_hold(&loopback_dev);
1400         in6_dev_hold(idev);
1401
1402         rt->u.dst.flags = DST_HOST;
1403         rt->u.dst.input = ip6_input;
1404         rt->u.dst.output = ip6_output;
1405         rt->rt6i_dev = &loopback_dev;
1406         rt->rt6i_idev = idev;
1407         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1408         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1409         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1410         rt->u.dst.obsolete = -1;
1411
1412         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1413         if (!anycast)
1414                 rt->rt6i_flags |= RTF_LOCAL;
1415         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1416         if (rt->rt6i_nexthop == NULL) {
1417                 dst_free((struct dst_entry *) rt);
1418                 return ERR_PTR(-ENOMEM);
1419         }
1420
1421         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1422         rt->rt6i_dst.plen = 128;
1423
1424         atomic_set(&rt->u.dst.__refcnt, 1);
1425
1426         return rt;
1427 }
1428
1429 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1430 {
1431         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1432             rt != &ip6_null_entry) {
1433                 RT6_TRACE("deleted by ifdown %p\n", rt);
1434                 return -1;
1435         }
1436         return 0;
1437 }
1438
1439 void rt6_ifdown(struct net_device *dev)
1440 {
1441         write_lock_bh(&rt6_lock);
1442         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1443         write_unlock_bh(&rt6_lock);
1444 }
1445
1446 struct rt6_mtu_change_arg
1447 {
1448         struct net_device *dev;
1449         unsigned mtu;
1450 };
1451
1452 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1453 {
1454         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1455         struct inet6_dev *idev;
1456
1457         /* In IPv6 pmtu discovery is not optional,
1458            so that RTAX_MTU lock cannot disable it.
1459            We still use this lock to block changes
1460            caused by addrconf/ndisc.
1461         */
1462
1463         idev = __in6_dev_get(arg->dev);
1464         if (idev == NULL)
1465                 return 0;
1466
1467         /* For administrative MTU increase, there is no way to discover
1468            IPv6 PMTU increase, so PMTU increase should be updated here.
1469            Since RFC 1981 doesn't include administrative MTU increase
1470            update PMTU increase is a MUST. (i.e. jumbo frame)
1471          */
1472         /*
1473            If new MTU is less than route PMTU, this new MTU will be the
1474            lowest MTU in the path, update the route PMTU to reflect PMTU
1475            decreases; if new MTU is greater than route PMTU, and the
1476            old MTU is the lowest MTU in the path, update the route PMTU
1477            to reflect the increase. In this case if the other nodes' MTU
1478            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1479            PMTU discouvery.
1480          */
1481         if (rt->rt6i_dev == arg->dev &&
1482             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1483             (dst_mtu(&rt->u.dst) > arg->mtu ||
1484              (dst_mtu(&rt->u.dst) < arg->mtu &&
1485               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1486                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1487         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1488         return 0;
1489 }
1490
1491 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1492 {
1493         struct rt6_mtu_change_arg arg;
1494
1495         arg.dev = dev;
1496         arg.mtu = mtu;
1497         read_lock_bh(&rt6_lock);
1498         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1499         read_unlock_bh(&rt6_lock);
1500 }
1501
1502 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1503                               struct in6_rtmsg *rtmsg)
1504 {
1505         memset(rtmsg, 0, sizeof(*rtmsg));
1506
1507         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1508         rtmsg->rtmsg_src_len = r->rtm_src_len;
1509         rtmsg->rtmsg_flags = RTF_UP;
1510         if (r->rtm_type == RTN_UNREACHABLE)
1511                 rtmsg->rtmsg_flags |= RTF_REJECT;
1512
1513         if (rta[RTA_GATEWAY-1]) {
1514                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1515                         return -EINVAL;
1516                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1517                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1518         }
1519         if (rta[RTA_DST-1]) {
1520                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1521                         return -EINVAL;
1522                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1523         }
1524         if (rta[RTA_SRC-1]) {
1525                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1526                         return -EINVAL;
1527                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1528         }
1529         if (rta[RTA_OIF-1]) {
1530                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1531                         return -EINVAL;
1532                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1533         }
1534         if (rta[RTA_PRIORITY-1]) {
1535                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1536                         return -EINVAL;
1537                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1538         }
1539         return 0;
1540 }
1541
1542 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1543 {
1544         struct rtmsg *r = NLMSG_DATA(nlh);
1545         struct in6_rtmsg rtmsg;
1546
1547         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1548                 return -EINVAL;
1549         return ip6_route_del(&rtmsg, nlh, arg);
1550 }
1551
1552 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1553 {
1554         struct rtmsg *r = NLMSG_DATA(nlh);
1555         struct in6_rtmsg rtmsg;
1556
1557         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1558                 return -EINVAL;
1559         return ip6_route_add(&rtmsg, nlh, arg);
1560 }
1561
1562 struct rt6_rtnl_dump_arg
1563 {
1564         struct sk_buff *skb;
1565         struct netlink_callback *cb;
1566 };
1567
1568 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1569                          struct in6_addr *dst,
1570                          struct in6_addr *src,
1571                          int iif,
1572                          int type, u32 pid, u32 seq,
1573                          struct nlmsghdr *in_nlh, int prefix,
1574                          unsigned int flags)
1575 {
1576         struct rtmsg *rtm;
1577         struct nlmsghdr  *nlh;
1578         unsigned char    *b = skb->tail;
1579         struct rta_cacheinfo ci;
1580
1581         if (prefix) {   /* user wants prefix routes only */
1582                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1583                         /* success since this is not a prefix route */
1584                         return 1;
1585                 }
1586         }
1587
1588         if (!pid && in_nlh) {
1589                 pid = in_nlh->nlmsg_pid;
1590         }
1591
1592         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1593         rtm = NLMSG_DATA(nlh);
1594         rtm->rtm_family = AF_INET6;
1595         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1596         rtm->rtm_src_len = rt->rt6i_src.plen;
1597         rtm->rtm_tos = 0;
1598         rtm->rtm_table = RT_TABLE_MAIN;
1599         if (rt->rt6i_flags&RTF_REJECT)
1600                 rtm->rtm_type = RTN_UNREACHABLE;
1601         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1602                 rtm->rtm_type = RTN_LOCAL;
1603         else
1604                 rtm->rtm_type = RTN_UNICAST;
1605         rtm->rtm_flags = 0;
1606         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1607         rtm->rtm_protocol = rt->rt6i_protocol;
1608         if (rt->rt6i_flags&RTF_DYNAMIC)
1609                 rtm->rtm_protocol = RTPROT_REDIRECT;
1610         else if (rt->rt6i_flags & RTF_ADDRCONF)
1611                 rtm->rtm_protocol = RTPROT_KERNEL;
1612         else if (rt->rt6i_flags&RTF_DEFAULT)
1613                 rtm->rtm_protocol = RTPROT_RA;
1614
1615         if (rt->rt6i_flags&RTF_CACHE)
1616                 rtm->rtm_flags |= RTM_F_CLONED;
1617
1618         if (dst) {
1619                 RTA_PUT(skb, RTA_DST, 16, dst);
1620                 rtm->rtm_dst_len = 128;
1621         } else if (rtm->rtm_dst_len)
1622                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1623 #ifdef CONFIG_IPV6_SUBTREES
1624         if (src) {
1625                 RTA_PUT(skb, RTA_SRC, 16, src);
1626                 rtm->rtm_src_len = 128;
1627         } else if (rtm->rtm_src_len)
1628                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1629 #endif
1630         if (iif)
1631                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1632         else if (dst) {
1633                 struct in6_addr saddr_buf;
1634                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1635                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1636         }
1637         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1638                 goto rtattr_failure;
1639         if (rt->u.dst.neighbour)
1640                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1641         if (rt->u.dst.dev)
1642                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1643         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1644         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1645         if (rt->rt6i_expires)
1646                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1647         else
1648                 ci.rta_expires = 0;
1649         ci.rta_used = rt->u.dst.__use;
1650         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1651         ci.rta_error = rt->u.dst.error;
1652         ci.rta_id = 0;
1653         ci.rta_ts = 0;
1654         ci.rta_tsage = 0;
1655         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1656         nlh->nlmsg_len = skb->tail - b;
1657         return skb->len;
1658
1659 nlmsg_failure:
1660 rtattr_failure:
1661         skb_trim(skb, b - skb->data);
1662         return -1;
1663 }
1664
1665 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1666 {
1667         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1668         int prefix;
1669
1670         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1671                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1672                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1673         } else
1674                 prefix = 0;
1675
1676         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1677                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1678                      NULL, prefix, NLM_F_MULTI);
1679 }
1680
1681 static int fib6_dump_node(struct fib6_walker_t *w)
1682 {
1683         int res;
1684         struct rt6_info *rt;
1685
1686         for (rt = w->leaf; rt; rt = rt->u.next) {
1687                 res = rt6_dump_route(rt, w->args);
1688                 if (res < 0) {
1689                         /* Frame is full, suspend walking */
1690                         w->leaf = rt;
1691                         return 1;
1692                 }
1693                 BUG_TRAP(res!=0);
1694         }
1695         w->leaf = NULL;
1696         return 0;
1697 }
1698
1699 static void fib6_dump_end(struct netlink_callback *cb)
1700 {
1701         struct fib6_walker_t *w = (void*)cb->args[0];
1702
1703         if (w) {
1704                 cb->args[0] = 0;
1705                 fib6_walker_unlink(w);
1706                 kfree(w);
1707         }
1708         if (cb->args[1]) {
1709                 cb->done = (void*)cb->args[1];
1710                 cb->args[1] = 0;
1711         }
1712 }
1713
1714 static int fib6_dump_done(struct netlink_callback *cb)
1715 {
1716         fib6_dump_end(cb);
1717         return cb->done(cb);
1718 }
1719
1720 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1721 {
1722         struct rt6_rtnl_dump_arg arg;
1723         struct fib6_walker_t *w;
1724         int res;
1725
1726         arg.skb = skb;
1727         arg.cb = cb;
1728
1729         w = (void*)cb->args[0];
1730         if (w == NULL) {
1731                 /* New dump:
1732                  * 
1733                  * 1. hook callback destructor.
1734                  */
1735                 cb->args[1] = (long)cb->done;
1736                 cb->done = fib6_dump_done;
1737
1738                 /*
1739                  * 2. allocate and initialize walker.
1740                  */
1741                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1742                 if (w == NULL)
1743                         return -ENOMEM;
1744                 RT6_TRACE("dump<%p", w);
1745                 memset(w, 0, sizeof(*w));
1746                 w->root = &ip6_routing_table;
1747                 w->func = fib6_dump_node;
1748                 w->args = &arg;
1749                 cb->args[0] = (long)w;
1750                 read_lock_bh(&rt6_lock);
1751                 res = fib6_walk(w);
1752                 read_unlock_bh(&rt6_lock);
1753         } else {
1754                 w->args = &arg;
1755                 read_lock_bh(&rt6_lock);
1756                 res = fib6_walk_continue(w);
1757                 read_unlock_bh(&rt6_lock);
1758         }
1759 #if RT6_DEBUG >= 3
1760         if (res <= 0 && skb->len == 0)
1761                 RT6_TRACE("%p>dump end\n", w);
1762 #endif
1763         res = res < 0 ? res : skb->len;
1764         /* res < 0 is an error. (really, impossible)
1765            res == 0 means that dump is complete, but skb still can contain data.
1766            res > 0 dump is not complete, but frame is full.
1767          */
1768         /* Destroy walker, if dump of this table is complete. */
1769         if (res <= 0)
1770                 fib6_dump_end(cb);
1771         return res;
1772 }
1773
1774 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1775 {
1776         struct rtattr **rta = arg;
1777         int iif = 0;
1778         int err = -ENOBUFS;
1779         struct sk_buff *skb;
1780         struct flowi fl;
1781         struct rt6_info *rt;
1782
1783         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1784         if (skb == NULL)
1785                 goto out;
1786
1787         /* Reserve room for dummy headers, this skb can pass
1788            through good chunk of routing engine.
1789          */
1790         skb->mac.raw = skb->data;
1791         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1792
1793         memset(&fl, 0, sizeof(fl));
1794         if (rta[RTA_SRC-1])
1795                 ipv6_addr_copy(&fl.fl6_src,
1796                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1797         if (rta[RTA_DST-1])
1798                 ipv6_addr_copy(&fl.fl6_dst,
1799                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1800
1801         if (rta[RTA_IIF-1])
1802                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1803
1804         if (iif) {
1805                 struct net_device *dev;
1806                 dev = __dev_get_by_index(iif);
1807                 if (!dev) {
1808                         err = -ENODEV;
1809                         goto out_free;
1810                 }
1811         }
1812
1813         fl.oif = 0;
1814         if (rta[RTA_OIF-1])
1815                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1816
1817         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1818
1819         skb->dst = &rt->u.dst;
1820
1821         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1822         err = rt6_fill_node(skb, rt, 
1823                             &fl.fl6_dst, &fl.fl6_src,
1824                             iif,
1825                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1826                             nlh->nlmsg_seq, nlh, 0, 0);
1827         if (err < 0) {
1828                 err = -EMSGSIZE;
1829                 goto out_free;
1830         }
1831
1832         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1833         if (err > 0)
1834                 err = 0;
1835 out:
1836         return err;
1837 out_free:
1838         kfree_skb(skb);
1839         goto out;       
1840 }
1841
1842 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1843 {
1844         struct sk_buff *skb;
1845         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1846
1847         skb = alloc_skb(size, gfp_any());
1848         if (!skb) {
1849                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1850                 return;
1851         }
1852         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0, 0) < 0) {
1853                 kfree_skb(skb);
1854                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1855                 return;
1856         }
1857         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1858         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1859 }
1860
1861 /*
1862  *      /proc
1863  */
1864
1865 #ifdef CONFIG_PROC_FS
1866
1867 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1868
1869 struct rt6_proc_arg
1870 {
1871         char *buffer;
1872         int offset;
1873         int length;
1874         int skip;
1875         int len;
1876 };
1877
1878 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1879 {
1880         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1881         int i;
1882
1883         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1884                 arg->skip++;
1885                 return 0;
1886         }
1887
1888         if (arg->len >= arg->length)
1889                 return 0;
1890
1891         for (i=0; i<16; i++) {
1892                 sprintf(arg->buffer + arg->len, "%02x",
1893                         rt->rt6i_dst.addr.s6_addr[i]);
1894                 arg->len += 2;
1895         }
1896         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1897                             rt->rt6i_dst.plen);
1898
1899 #ifdef CONFIG_IPV6_SUBTREES
1900         for (i=0; i<16; i++) {
1901                 sprintf(arg->buffer + arg->len, "%02x",
1902                         rt->rt6i_src.addr.s6_addr[i]);
1903                 arg->len += 2;
1904         }
1905         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1906                             rt->rt6i_src.plen);
1907 #else
1908         sprintf(arg->buffer + arg->len,
1909                 "00000000000000000000000000000000 00 ");
1910         arg->len += 36;
1911 #endif
1912
1913         if (rt->rt6i_nexthop) {
1914                 for (i=0; i<16; i++) {
1915                         sprintf(arg->buffer + arg->len, "%02x",
1916                                 rt->rt6i_nexthop->primary_key[i]);
1917                         arg->len += 2;
1918                 }
1919         } else {
1920                 sprintf(arg->buffer + arg->len,
1921                         "00000000000000000000000000000000");
1922                 arg->len += 32;
1923         }
1924         arg->len += sprintf(arg->buffer + arg->len,
1925                             " %08x %08x %08x %08x %8s\n",
1926                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1927                             rt->u.dst.__use, rt->rt6i_flags, 
1928                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1929         return 0;
1930 }
1931
1932 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1933 {
1934         struct rt6_proc_arg arg;
1935         arg.buffer = buffer;
1936         arg.offset = offset;
1937         arg.length = length;
1938         arg.skip = 0;
1939         arg.len = 0;
1940
1941         read_lock_bh(&rt6_lock);
1942         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1943         read_unlock_bh(&rt6_lock);
1944
1945         *start = buffer;
1946         if (offset)
1947                 *start += offset % RT6_INFO_LEN;
1948
1949         arg.len -= offset % RT6_INFO_LEN;
1950
1951         if (arg.len > length)
1952                 arg.len = length;
1953         if (arg.len < 0)
1954                 arg.len = 0;
1955
1956         return arg.len;
1957 }
1958
1959 extern struct rt6_statistics rt6_stats;
1960
1961 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1962 {
1963         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1964                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1965                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1966                       rt6_stats.fib_rt_cache,
1967                       atomic_read(&ip6_dst_ops.entries),
1968                       rt6_stats.fib_discarded_routes);
1969
1970         return 0;
1971 }
1972
1973 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1974 {
1975         return single_open(file, rt6_stats_seq_show, NULL);
1976 }
1977
1978 static struct file_operations rt6_stats_seq_fops = {
1979         .owner   = THIS_MODULE,
1980         .open    = rt6_stats_seq_open,
1981         .read    = seq_read,
1982         .llseek  = seq_lseek,
1983         .release = single_release,
1984 };
1985 #endif  /* CONFIG_PROC_FS */
1986
1987 #ifdef CONFIG_SYSCTL
1988
1989 static int flush_delay;
1990
1991 static
1992 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1993                               void __user *buffer, size_t *lenp, loff_t *ppos)
1994 {
1995         if (write) {
1996                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1997                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1998                 return 0;
1999         } else
2000                 return -EINVAL;
2001 }
2002
2003 ctl_table ipv6_route_table[] = {
2004         {
2005                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2006                 .procname       =       "flush",
2007                 .data           =       &flush_delay,
2008                 .maxlen         =       sizeof(int),
2009                 .mode           =       0200,
2010                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2011         },
2012         {
2013                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2014                 .procname       =       "gc_thresh",
2015                 .data           =       &ip6_dst_ops.gc_thresh,
2016                 .maxlen         =       sizeof(int),
2017                 .mode           =       0644,
2018                 .proc_handler   =       &proc_dointvec,
2019         },
2020         {
2021                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2022                 .procname       =       "max_size",
2023                 .data           =       &ip6_rt_max_size,
2024                 .maxlen         =       sizeof(int),
2025                 .mode           =       0644,
2026                 .proc_handler   =       &proc_dointvec,
2027         },
2028         {
2029                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2030                 .procname       =       "gc_min_interval",
2031                 .data           =       &ip6_rt_gc_min_interval,
2032                 .maxlen         =       sizeof(int),
2033                 .mode           =       0644,
2034                 .proc_handler   =       &proc_dointvec_jiffies,
2035                 .strategy       =       &sysctl_jiffies,
2036         },
2037         {
2038                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2039                 .procname       =       "gc_timeout",
2040                 .data           =       &ip6_rt_gc_timeout,
2041                 .maxlen         =       sizeof(int),
2042                 .mode           =       0644,
2043                 .proc_handler   =       &proc_dointvec_jiffies,
2044                 .strategy       =       &sysctl_jiffies,
2045         },
2046         {
2047                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2048                 .procname       =       "gc_interval",
2049                 .data           =       &ip6_rt_gc_interval,
2050                 .maxlen         =       sizeof(int),
2051                 .mode           =       0644,
2052                 .proc_handler   =       &proc_dointvec_jiffies,
2053                 .strategy       =       &sysctl_jiffies,
2054         },
2055         {
2056                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2057                 .procname       =       "gc_elasticity",
2058                 .data           =       &ip6_rt_gc_elasticity,
2059                 .maxlen         =       sizeof(int),
2060                 .mode           =       0644,
2061                 .proc_handler   =       &proc_dointvec_jiffies,
2062                 .strategy       =       &sysctl_jiffies,
2063         },
2064         {
2065                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2066                 .procname       =       "mtu_expires",
2067                 .data           =       &ip6_rt_mtu_expires,
2068                 .maxlen         =       sizeof(int),
2069                 .mode           =       0644,
2070                 .proc_handler   =       &proc_dointvec_jiffies,
2071                 .strategy       =       &sysctl_jiffies,
2072         },
2073         {
2074                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2075                 .procname       =       "min_adv_mss",
2076                 .data           =       &ip6_rt_min_advmss,
2077                 .maxlen         =       sizeof(int),
2078                 .mode           =       0644,
2079                 .proc_handler   =       &proc_dointvec_jiffies,
2080                 .strategy       =       &sysctl_jiffies,
2081         },
2082         {
2083                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2084                 .procname       =       "gc_min_interval_ms",
2085                 .data           =       &ip6_rt_gc_min_interval,
2086                 .maxlen         =       sizeof(int),
2087                 .mode           =       0644,
2088                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2089                 .strategy       =       &sysctl_ms_jiffies,
2090         },
2091         { .ctl_name = 0 }
2092 };
2093
2094 #endif
2095
2096 void __init ip6_route_init(void)
2097 {
2098         struct proc_dir_entry *p;
2099
2100         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2101                                                      sizeof(struct rt6_info),
2102                                                      0, SLAB_HWCACHE_ALIGN,
2103                                                      NULL, NULL);
2104         if (!ip6_dst_ops.kmem_cachep)
2105                 panic("cannot create ip6_dst_cache");
2106
2107         fib6_init();
2108 #ifdef  CONFIG_PROC_FS
2109         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2110         if (p)
2111                 p->owner = THIS_MODULE;
2112
2113         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2114 #endif
2115 #ifdef CONFIG_XFRM
2116         xfrm6_init();
2117 #endif
2118 }
2119
2120 void ip6_route_cleanup(void)
2121 {
2122 #ifdef CONFIG_PROC_FS
2123         proc_net_remove("ipv6_route");
2124         proc_net_remove("rt6_stats");
2125 #endif
2126 #ifdef CONFIG_XFRM
2127         xfrm6_fini();
2128 #endif
2129         rt6_ifdown(NULL);
2130         fib6_gc_cleanup();
2131         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2132 }