]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
[IPV6]: ROUTE: Unify two code paths for pmtu disc.
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void             ip6_dst_destroy(struct dst_entry *);
88 static void             ip6_dst_ifdown(struct dst_entry *,
89                                        struct net_device *dev, int how);
90 static int               ip6_dst_gc(void);
91
92 static int              ip6_pkt_discard(struct sk_buff *skb);
93 static int              ip6_pkt_discard_out(struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97 static struct dst_ops ip6_dst_ops = {
98         .family                 =       AF_INET6,
99         .protocol               =       __constant_htons(ETH_P_IPV6),
100         .gc                     =       ip6_dst_gc,
101         .gc_thresh              =       1024,
102         .check                  =       ip6_dst_check,
103         .destroy                =       ip6_dst_destroy,
104         .ifdown                 =       ip6_dst_ifdown,
105         .negative_advice        =       ip6_negative_advice,
106         .link_failure           =       ip6_link_failure,
107         .update_pmtu            =       ip6_rt_update_pmtu,
108         .entry_size             =       sizeof(struct rt6_info),
109 };
110
111 struct rt6_info ip6_null_entry = {
112         .u = {
113                 .dst = {
114                         .__refcnt       = ATOMIC_INIT(1),
115                         .__use          = 1,
116                         .dev            = &loopback_dev,
117                         .obsolete       = -1,
118                         .error          = -ENETUNREACH,
119                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
120                         .input          = ip6_pkt_discard,
121                         .output         = ip6_pkt_discard_out,
122                         .ops            = &ip6_dst_ops,
123                         .path           = (struct dst_entry*)&ip6_null_entry,
124                 }
125         },
126         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
127         .rt6i_metric    = ~(u32) 0,
128         .rt6i_ref       = ATOMIC_INIT(1),
129 };
130
131 struct fib6_node ip6_routing_table = {
132         .leaf           = &ip6_null_entry,
133         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 };
135
136 /* Protects all the ip6 fib */
137
138 DEFINE_RWLOCK(rt6_lock);
139
140
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
143 {
144         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 }
146
147 static void ip6_dst_destroy(struct dst_entry *dst)
148 {
149         struct rt6_info *rt = (struct rt6_info *)dst;
150         struct inet6_dev *idev = rt->rt6i_idev;
151
152         if (idev != NULL) {
153                 rt->rt6i_idev = NULL;
154                 in6_dev_put(idev);
155         }       
156 }
157
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159                            int how)
160 {
161         struct rt6_info *rt = (struct rt6_info *)dst;
162         struct inet6_dev *idev = rt->rt6i_idev;
163
164         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166                 if (loopback_idev != NULL) {
167                         rt->rt6i_idev = loopback_idev;
168                         in6_dev_put(idev);
169                 }
170         }
171 }
172
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174 {
175         return (rt->rt6i_flags & RTF_EXPIRES &&
176                 time_after(jiffies, rt->rt6i_expires));
177 }
178
179 /*
180  *      Route lookup. Any rt6_lock is implied.
181  */
182
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184                                                     int oif,
185                                                     int strict)
186 {
187         struct rt6_info *local = NULL;
188         struct rt6_info *sprt;
189
190         if (oif) {
191                 for (sprt = rt; sprt; sprt = sprt->u.next) {
192                         struct net_device *dev = sprt->rt6i_dev;
193                         if (dev->ifindex == oif)
194                                 return sprt;
195                         if (dev->flags & IFF_LOOPBACK) {
196                                 if (sprt->rt6i_idev == NULL ||
197                                     sprt->rt6i_idev->dev->ifindex != oif) {
198                                         if (strict && oif)
199                                                 continue;
200                                         if (local && (!oif || 
201                                                       local->rt6i_idev->dev->ifindex == oif))
202                                                 continue;
203                                 }
204                                 local = sprt;
205                         }
206                 }
207
208                 if (local)
209                         return local;
210
211                 if (strict)
212                         return &ip6_null_entry;
213         }
214         return rt;
215 }
216
217 /*
218  *      pointer to the last default router chosen. BH is disabled locally.
219  */
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
224 {
225         spin_lock_bh(&rt6_dflt_lock);
226         if (rt == NULL || rt == rt6_dflt_pointer) {
227                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228                 rt6_dflt_pointer = NULL;
229         }
230         spin_unlock_bh(&rt6_dflt_lock);
231 }
232
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235 {
236         struct rt6_info *match = NULL;
237         struct rt6_info *sprt;
238         int mpri = 0;
239
240         for (sprt = rt; sprt; sprt = sprt->u.next) {
241                 struct neighbour *neigh;
242                 int m = 0;
243
244                 if (!oif ||
245                     (sprt->rt6i_dev &&
246                      sprt->rt6i_dev->ifindex == oif))
247                         m += 8;
248
249                 if (rt6_check_expired(sprt))
250                         continue;
251
252                 if (sprt == rt6_dflt_pointer)
253                         m += 4;
254
255                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256                         read_lock_bh(&neigh->lock);
257                         switch (neigh->nud_state) {
258                         case NUD_REACHABLE:
259                                 m += 3;
260                                 break;
261
262                         case NUD_STALE:
263                         case NUD_DELAY:
264                         case NUD_PROBE:
265                                 m += 2;
266                                 break;
267
268                         case NUD_NOARP:
269                         case NUD_PERMANENT:
270                                 m += 1;
271                                 break;
272
273                         case NUD_INCOMPLETE:
274                         default:
275                                 read_unlock_bh(&neigh->lock);
276                                 continue;
277                         }
278                         read_unlock_bh(&neigh->lock);
279                 } else {
280                         continue;
281                 }
282
283                 if (m > mpri || m >= 12) {
284                         match = sprt;
285                         mpri = m;
286                         if (m >= 12) {
287                                 /* we choose the last default router if it
288                                  * is in (probably) reachable state.
289                                  * If route changed, we should do pmtu
290                                  * discovery. --yoshfuji
291                                  */
292                                 break;
293                         }
294                 }
295         }
296
297         spin_lock(&rt6_dflt_lock);
298         if (!match) {
299                 /*
300                  *      No default routers are known to be reachable.
301                  *      SHOULD round robin
302                  */
303                 if (rt6_dflt_pointer) {
304                         for (sprt = rt6_dflt_pointer->u.next;
305                              sprt; sprt = sprt->u.next) {
306                                 if (sprt->u.dst.obsolete <= 0 &&
307                                     sprt->u.dst.error == 0 &&
308                                     !rt6_check_expired(sprt)) {
309                                         match = sprt;
310                                         break;
311                                 }
312                         }
313                         for (sprt = rt;
314                              !match && sprt;
315                              sprt = sprt->u.next) {
316                                 if (sprt->u.dst.obsolete <= 0 &&
317                                     sprt->u.dst.error == 0 &&
318                                     !rt6_check_expired(sprt)) {
319                                         match = sprt;
320                                         break;
321                                 }
322                                 if (sprt == rt6_dflt_pointer)
323                                         break;
324                         }
325                 }
326         }
327
328         if (match) {
329                 if (rt6_dflt_pointer != match)
330                         RT6_TRACE("changed default router: %p->%p\n",
331                                   rt6_dflt_pointer, match);
332                 rt6_dflt_pointer = match;
333         }
334         spin_unlock(&rt6_dflt_lock);
335
336         if (!match) {
337                 /*
338                  * Last Resort: if no default routers found, 
339                  * use addrconf default route.
340                  * We don't record this route.
341                  */
342                 for (sprt = ip6_routing_table.leaf;
343                      sprt; sprt = sprt->u.next) {
344                         if (!rt6_check_expired(sprt) &&
345                             (sprt->rt6i_flags & RTF_DEFAULT) &&
346                             (!oif ||
347                              (sprt->rt6i_dev &&
348                               sprt->rt6i_dev->ifindex == oif))) {
349                                 match = sprt;
350                                 break;
351                         }
352                 }
353                 if (!match) {
354                         /* no default route.  give up. */
355                         match = &ip6_null_entry;
356                 }
357         }
358
359         return match;
360 }
361
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363                             int oif, int strict)
364 {
365         struct fib6_node *fn;
366         struct rt6_info *rt;
367
368         read_lock_bh(&rt6_lock);
369         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370         rt = rt6_device_match(fn->leaf, oif, strict);
371         dst_hold(&rt->u.dst);
372         rt->u.dst.__use++;
373         read_unlock_bh(&rt6_lock);
374
375         rt->u.dst.lastuse = jiffies;
376         if (rt->u.dst.error == 0)
377                 return rt;
378         dst_release(&rt->u.dst);
379         return NULL;
380 }
381
382 /* ip6_ins_rt is called with FREE rt6_lock.
383    It takes new route entry, the addition fails by any reason the
384    route is freed. In any case, if caller does not hold it, it may
385    be destroyed.
386  */
387
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389                 void *_rtattr, struct netlink_skb_parms *req)
390 {
391         int err;
392
393         write_lock_bh(&rt6_lock);
394         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395         write_unlock_bh(&rt6_lock);
396
397         return err;
398 }
399
400 /* No rt6_lock! If COW failed, the function returns dead route entry
401    with dst->error set to errno value.
402  */
403
404 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
405                                       struct in6_addr *saddr)
406 {
407         struct rt6_info *rt;
408
409         /*
410          *      Clone the route.
411          */
412
413         rt = ip6_rt_copy(ort);
414
415         if (rt) {
416                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417                         if (rt->rt6i_dst.plen != 128 &&
418                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419                                 rt->rt6i_flags |= RTF_ANYCAST;
420                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
421                 }
422
423                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
424                 rt->rt6i_dst.plen = 128;
425                 rt->rt6i_flags |= RTF_CACHE;
426                 rt->u.dst.flags |= DST_HOST;
427
428 #ifdef CONFIG_IPV6_SUBTREES
429                 if (rt->rt6i_src.plen && saddr) {
430                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431                         rt->rt6i_src.plen = 128;
432                 }
433 #endif
434
435                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
436
437         }
438
439         return rt;
440 }
441
442 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
443                                 struct in6_addr *saddr, struct netlink_skb_parms *req)
444 {
445         struct rt6_info *rt = rt6_alloc_cow(ort, daddr, saddr);
446         int err;
447
448         if (!rt) {
449                 dst_hold(&ip6_null_entry.u.dst);
450                 return &ip6_null_entry;
451         }
452
453         dst_hold(&rt->u.dst);
454
455         err = ip6_ins_rt(rt, NULL, NULL, req);
456         if (err)
457                 rt->u.dst.error = err;
458
459         return rt;
460 }
461
462 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
463 {
464         struct rt6_info *rt = ip6_rt_copy(ort);
465         if (rt) {
466                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
467                 rt->rt6i_dst.plen = 128;
468                 rt->rt6i_flags |= RTF_CACHE;
469                 if (rt->rt6i_flags & RTF_REJECT)
470                         rt->u.dst.error = ort->u.dst.error;
471                 rt->u.dst.flags |= DST_HOST;
472                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
473         }
474         return rt;
475 }
476
477 #define BACKTRACK() \
478 if (rt == &ip6_null_entry && strict) { \
479        while ((fn = fn->parent) != NULL) { \
480                 if (fn->fn_flags & RTN_ROOT) { \
481                         dst_hold(&rt->u.dst); \
482                         goto out; \
483                 } \
484                 if (fn->fn_flags & RTN_RTINFO) \
485                         goto restart; \
486         } \
487 }
488
489
490 void ip6_route_input(struct sk_buff *skb)
491 {
492         struct fib6_node *fn;
493         struct rt6_info *rt;
494         int strict;
495         int attempts = 3;
496
497         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
498
499 relookup:
500         read_lock_bh(&rt6_lock);
501
502         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
503                          &skb->nh.ipv6h->saddr);
504
505 restart:
506         rt = fn->leaf;
507
508         if ((rt->rt6i_flags & RTF_CACHE)) {
509                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
510                 BACKTRACK();
511                 dst_hold(&rt->u.dst);
512                 goto out;
513         }
514
515         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
516         BACKTRACK();
517
518         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
519                 struct rt6_info *nrt;
520                 dst_hold(&rt->u.dst);
521                 read_unlock_bh(&rt6_lock);
522
523                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
524                               &skb->nh.ipv6h->saddr,
525                               &NETLINK_CB(skb));
526
527                 dst_release(&rt->u.dst);
528                 rt = nrt;
529
530                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
531                         goto out2;
532
533                 /* Race condition! In the gap, when rt6_lock was
534                    released someone could insert this route.  Relookup.
535                 */
536                 dst_release(&rt->u.dst);
537                 goto relookup;
538         }
539         dst_hold(&rt->u.dst);
540
541 out:
542         read_unlock_bh(&rt6_lock);
543 out2:
544         rt->u.dst.lastuse = jiffies;
545         rt->u.dst.__use++;
546         skb->dst = (struct dst_entry *) rt;
547 }
548
549 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
550 {
551         struct fib6_node *fn;
552         struct rt6_info *rt;
553         int strict;
554         int attempts = 3;
555
556         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
557
558 relookup:
559         read_lock_bh(&rt6_lock);
560
561         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
562
563 restart:
564         rt = fn->leaf;
565
566         if ((rt->rt6i_flags & RTF_CACHE)) {
567                 rt = rt6_device_match(rt, fl->oif, strict);
568                 BACKTRACK();
569                 dst_hold(&rt->u.dst);
570                 goto out;
571         }
572         if (rt->rt6i_flags & RTF_DEFAULT) {
573                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
574                         rt = rt6_best_dflt(rt, fl->oif);
575         } else {
576                 rt = rt6_device_match(rt, fl->oif, strict);
577                 BACKTRACK();
578         }
579
580         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
581                 struct rt6_info *nrt;
582                 dst_hold(&rt->u.dst);
583                 read_unlock_bh(&rt6_lock);
584
585                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
586
587                 dst_release(&rt->u.dst);
588                 rt = nrt;
589
590                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
591                         goto out2;
592
593                 /* Race condition! In the gap, when rt6_lock was
594                    released someone could insert this route.  Relookup.
595                 */
596                 dst_release(&rt->u.dst);
597                 goto relookup;
598         }
599         dst_hold(&rt->u.dst);
600
601 out:
602         read_unlock_bh(&rt6_lock);
603 out2:
604         rt->u.dst.lastuse = jiffies;
605         rt->u.dst.__use++;
606         return &rt->u.dst;
607 }
608
609
610 /*
611  *      Destination cache support functions
612  */
613
614 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
615 {
616         struct rt6_info *rt;
617
618         rt = (struct rt6_info *) dst;
619
620         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
621                 return dst;
622
623         return NULL;
624 }
625
626 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
627 {
628         struct rt6_info *rt = (struct rt6_info *) dst;
629
630         if (rt) {
631                 if (rt->rt6i_flags & RTF_CACHE)
632                         ip6_del_rt(rt, NULL, NULL, NULL);
633                 else
634                         dst_release(dst);
635         }
636         return NULL;
637 }
638
639 static void ip6_link_failure(struct sk_buff *skb)
640 {
641         struct rt6_info *rt;
642
643         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
644
645         rt = (struct rt6_info *) skb->dst;
646         if (rt) {
647                 if (rt->rt6i_flags&RTF_CACHE) {
648                         dst_set_expires(&rt->u.dst, 0);
649                         rt->rt6i_flags |= RTF_EXPIRES;
650                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
651                         rt->rt6i_node->fn_sernum = -1;
652         }
653 }
654
655 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
656 {
657         struct rt6_info *rt6 = (struct rt6_info*)dst;
658
659         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
660                 rt6->rt6i_flags |= RTF_MODIFIED;
661                 if (mtu < IPV6_MIN_MTU) {
662                         mtu = IPV6_MIN_MTU;
663                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
664                 }
665                 dst->metrics[RTAX_MTU-1] = mtu;
666         }
667 }
668
669 /* Protected by rt6_lock.  */
670 static struct dst_entry *ndisc_dst_gc_list;
671 static int ipv6_get_mtu(struct net_device *dev);
672
673 static inline unsigned int ipv6_advmss(unsigned int mtu)
674 {
675         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
676
677         if (mtu < ip6_rt_min_advmss)
678                 mtu = ip6_rt_min_advmss;
679
680         /*
681          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
682          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
683          * IPV6_MAXPLEN is also valid and means: "any MSS, 
684          * rely only on pmtu discovery"
685          */
686         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
687                 mtu = IPV6_MAXPLEN;
688         return mtu;
689 }
690
691 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
692                                   struct neighbour *neigh,
693                                   struct in6_addr *addr,
694                                   int (*output)(struct sk_buff *))
695 {
696         struct rt6_info *rt;
697         struct inet6_dev *idev = in6_dev_get(dev);
698
699         if (unlikely(idev == NULL))
700                 return NULL;
701
702         rt = ip6_dst_alloc();
703         if (unlikely(rt == NULL)) {
704                 in6_dev_put(idev);
705                 goto out;
706         }
707
708         dev_hold(dev);
709         if (neigh)
710                 neigh_hold(neigh);
711         else
712                 neigh = ndisc_get_neigh(dev, addr);
713
714         rt->rt6i_dev      = dev;
715         rt->rt6i_idev     = idev;
716         rt->rt6i_nexthop  = neigh;
717         atomic_set(&rt->u.dst.__refcnt, 1);
718         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
719         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
720         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
721         rt->u.dst.output  = output;
722
723 #if 0   /* there's no chance to use these for ndisc */
724         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
725                                 ? DST_HOST 
726                                 : 0;
727         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
728         rt->rt6i_dst.plen = 128;
729 #endif
730
731         write_lock_bh(&rt6_lock);
732         rt->u.dst.next = ndisc_dst_gc_list;
733         ndisc_dst_gc_list = &rt->u.dst;
734         write_unlock_bh(&rt6_lock);
735
736         fib6_force_start_gc();
737
738 out:
739         return (struct dst_entry *)rt;
740 }
741
742 int ndisc_dst_gc(int *more)
743 {
744         struct dst_entry *dst, *next, **pprev;
745         int freed;
746
747         next = NULL;
748         pprev = &ndisc_dst_gc_list;
749         freed = 0;
750         while ((dst = *pprev) != NULL) {
751                 if (!atomic_read(&dst->__refcnt)) {
752                         *pprev = dst->next;
753                         dst_free(dst);
754                         freed++;
755                 } else {
756                         pprev = &dst->next;
757                         (*more)++;
758                 }
759         }
760
761         return freed;
762 }
763
764 static int ip6_dst_gc(void)
765 {
766         static unsigned expire = 30*HZ;
767         static unsigned long last_gc;
768         unsigned long now = jiffies;
769
770         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
771             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
772                 goto out;
773
774         expire++;
775         fib6_run_gc(expire);
776         last_gc = now;
777         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
778                 expire = ip6_rt_gc_timeout>>1;
779
780 out:
781         expire -= expire>>ip6_rt_gc_elasticity;
782         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
783 }
784
785 /* Clean host part of a prefix. Not necessary in radix tree,
786    but results in cleaner routing tables.
787
788    Remove it only when all the things will work!
789  */
790
791 static int ipv6_get_mtu(struct net_device *dev)
792 {
793         int mtu = IPV6_MIN_MTU;
794         struct inet6_dev *idev;
795
796         idev = in6_dev_get(dev);
797         if (idev) {
798                 mtu = idev->cnf.mtu6;
799                 in6_dev_put(idev);
800         }
801         return mtu;
802 }
803
804 int ipv6_get_hoplimit(struct net_device *dev)
805 {
806         int hoplimit = ipv6_devconf.hop_limit;
807         struct inet6_dev *idev;
808
809         idev = in6_dev_get(dev);
810         if (idev) {
811                 hoplimit = idev->cnf.hop_limit;
812                 in6_dev_put(idev);
813         }
814         return hoplimit;
815 }
816
817 /*
818  *
819  */
820
821 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
822                 void *_rtattr, struct netlink_skb_parms *req)
823 {
824         int err;
825         struct rtmsg *r;
826         struct rtattr **rta;
827         struct rt6_info *rt = NULL;
828         struct net_device *dev = NULL;
829         struct inet6_dev *idev = NULL;
830         int addr_type;
831
832         rta = (struct rtattr **) _rtattr;
833
834         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
835                 return -EINVAL;
836 #ifndef CONFIG_IPV6_SUBTREES
837         if (rtmsg->rtmsg_src_len)
838                 return -EINVAL;
839 #endif
840         if (rtmsg->rtmsg_ifindex) {
841                 err = -ENODEV;
842                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
843                 if (!dev)
844                         goto out;
845                 idev = in6_dev_get(dev);
846                 if (!idev)
847                         goto out;
848         }
849
850         if (rtmsg->rtmsg_metric == 0)
851                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
852
853         rt = ip6_dst_alloc();
854
855         if (rt == NULL) {
856                 err = -ENOMEM;
857                 goto out;
858         }
859
860         rt->u.dst.obsolete = -1;
861         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
862         if (nlh && (r = NLMSG_DATA(nlh))) {
863                 rt->rt6i_protocol = r->rtm_protocol;
864         } else {
865                 rt->rt6i_protocol = RTPROT_BOOT;
866         }
867
868         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
869
870         if (addr_type & IPV6_ADDR_MULTICAST)
871                 rt->u.dst.input = ip6_mc_input;
872         else
873                 rt->u.dst.input = ip6_forward;
874
875         rt->u.dst.output = ip6_output;
876
877         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
878                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
879         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
880         if (rt->rt6i_dst.plen == 128)
881                rt->u.dst.flags = DST_HOST;
882
883 #ifdef CONFIG_IPV6_SUBTREES
884         ipv6_addr_prefix(&rt->rt6i_src.addr, 
885                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
886         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
887 #endif
888
889         rt->rt6i_metric = rtmsg->rtmsg_metric;
890
891         /* We cannot add true routes via loopback here,
892            they would result in kernel looping; promote them to reject routes
893          */
894         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
895             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
896                 /* hold loopback dev/idev if we haven't done so. */
897                 if (dev != &loopback_dev) {
898                         if (dev) {
899                                 dev_put(dev);
900                                 in6_dev_put(idev);
901                         }
902                         dev = &loopback_dev;
903                         dev_hold(dev);
904                         idev = in6_dev_get(dev);
905                         if (!idev) {
906                                 err = -ENODEV;
907                                 goto out;
908                         }
909                 }
910                 rt->u.dst.output = ip6_pkt_discard_out;
911                 rt->u.dst.input = ip6_pkt_discard;
912                 rt->u.dst.error = -ENETUNREACH;
913                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
914                 goto install_route;
915         }
916
917         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
918                 struct in6_addr *gw_addr;
919                 int gwa_type;
920
921                 gw_addr = &rtmsg->rtmsg_gateway;
922                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
923                 gwa_type = ipv6_addr_type(gw_addr);
924
925                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
926                         struct rt6_info *grt;
927
928                         /* IPv6 strictly inhibits using not link-local
929                            addresses as nexthop address.
930                            Otherwise, router will not able to send redirects.
931                            It is very good, but in some (rare!) circumstances
932                            (SIT, PtP, NBMA NOARP links) it is handy to allow
933                            some exceptions. --ANK
934                          */
935                         err = -EINVAL;
936                         if (!(gwa_type&IPV6_ADDR_UNICAST))
937                                 goto out;
938
939                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
940
941                         err = -EHOSTUNREACH;
942                         if (grt == NULL)
943                                 goto out;
944                         if (dev) {
945                                 if (dev != grt->rt6i_dev) {
946                                         dst_release(&grt->u.dst);
947                                         goto out;
948                                 }
949                         } else {
950                                 dev = grt->rt6i_dev;
951                                 idev = grt->rt6i_idev;
952                                 dev_hold(dev);
953                                 in6_dev_hold(grt->rt6i_idev);
954                         }
955                         if (!(grt->rt6i_flags&RTF_GATEWAY))
956                                 err = 0;
957                         dst_release(&grt->u.dst);
958
959                         if (err)
960                                 goto out;
961                 }
962                 err = -EINVAL;
963                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
964                         goto out;
965         }
966
967         err = -ENODEV;
968         if (dev == NULL)
969                 goto out;
970
971         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
972                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
973                 if (IS_ERR(rt->rt6i_nexthop)) {
974                         err = PTR_ERR(rt->rt6i_nexthop);
975                         rt->rt6i_nexthop = NULL;
976                         goto out;
977                 }
978         }
979
980         rt->rt6i_flags = rtmsg->rtmsg_flags;
981
982 install_route:
983         if (rta && rta[RTA_METRICS-1]) {
984                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
985                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
986
987                 while (RTA_OK(attr, attrlen)) {
988                         unsigned flavor = attr->rta_type;
989                         if (flavor) {
990                                 if (flavor > RTAX_MAX) {
991                                         err = -EINVAL;
992                                         goto out;
993                                 }
994                                 rt->u.dst.metrics[flavor-1] =
995                                         *(u32 *)RTA_DATA(attr);
996                         }
997                         attr = RTA_NEXT(attr, attrlen);
998                 }
999         }
1000
1001         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1002                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1003         if (!rt->u.dst.metrics[RTAX_MTU-1])
1004                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1005         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1006                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1007         rt->u.dst.dev = dev;
1008         rt->rt6i_idev = idev;
1009         return ip6_ins_rt(rt, nlh, _rtattr, req);
1010
1011 out:
1012         if (dev)
1013                 dev_put(dev);
1014         if (idev)
1015                 in6_dev_put(idev);
1016         if (rt)
1017                 dst_free((struct dst_entry *) rt);
1018         return err;
1019 }
1020
1021 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1022 {
1023         int err;
1024
1025         write_lock_bh(&rt6_lock);
1026
1027         rt6_reset_dflt_pointer(NULL);
1028
1029         err = fib6_del(rt, nlh, _rtattr, req);
1030         dst_release(&rt->u.dst);
1031
1032         write_unlock_bh(&rt6_lock);
1033
1034         return err;
1035 }
1036
1037 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1038 {
1039         struct fib6_node *fn;
1040         struct rt6_info *rt;
1041         int err = -ESRCH;
1042
1043         read_lock_bh(&rt6_lock);
1044
1045         fn = fib6_locate(&ip6_routing_table,
1046                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1047                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1048         
1049         if (fn) {
1050                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1051                         if (rtmsg->rtmsg_ifindex &&
1052                             (rt->rt6i_dev == NULL ||
1053                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1054                                 continue;
1055                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1056                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1057                                 continue;
1058                         if (rtmsg->rtmsg_metric &&
1059                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1060                                 continue;
1061                         dst_hold(&rt->u.dst);
1062                         read_unlock_bh(&rt6_lock);
1063
1064                         return ip6_del_rt(rt, nlh, _rtattr, req);
1065                 }
1066         }
1067         read_unlock_bh(&rt6_lock);
1068
1069         return err;
1070 }
1071
1072 /*
1073  *      Handle redirects
1074  */
1075 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1076                   struct neighbour *neigh, u8 *lladdr, int on_link)
1077 {
1078         struct rt6_info *rt, *nrt;
1079
1080         /* Locate old route to this destination. */
1081         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1082
1083         if (rt == NULL)
1084                 return;
1085
1086         if (neigh->dev != rt->rt6i_dev)
1087                 goto out;
1088
1089         /*
1090          * Current route is on-link; redirect is always invalid.
1091          * 
1092          * Seems, previous statement is not true. It could
1093          * be node, which looks for us as on-link (f.e. proxy ndisc)
1094          * But then router serving it might decide, that we should
1095          * know truth 8)8) --ANK (980726).
1096          */
1097         if (!(rt->rt6i_flags&RTF_GATEWAY))
1098                 goto out;
1099
1100         /*
1101          *      RFC 2461 specifies that redirects should only be
1102          *      accepted if they come from the nexthop to the target.
1103          *      Due to the way default routers are chosen, this notion
1104          *      is a bit fuzzy and one might need to check all default
1105          *      routers.
1106          */
1107         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1108                 if (rt->rt6i_flags & RTF_DEFAULT) {
1109                         struct rt6_info *rt1;
1110
1111                         read_lock(&rt6_lock);
1112                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1113                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1114                                         dst_hold(&rt1->u.dst);
1115                                         dst_release(&rt->u.dst);
1116                                         read_unlock(&rt6_lock);
1117                                         rt = rt1;
1118                                         goto source_ok;
1119                                 }
1120                         }
1121                         read_unlock(&rt6_lock);
1122                 }
1123                 if (net_ratelimit())
1124                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1125                                "for redirect target\n");
1126                 goto out;
1127         }
1128
1129 source_ok:
1130
1131         /*
1132          *      We have finally decided to accept it.
1133          */
1134
1135         neigh_update(neigh, lladdr, NUD_STALE, 
1136                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1137                      NEIGH_UPDATE_F_OVERRIDE|
1138                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1139                                      NEIGH_UPDATE_F_ISROUTER))
1140                      );
1141
1142         /*
1143          * Redirect received -> path was valid.
1144          * Look, redirects are sent only in response to data packets,
1145          * so that this nexthop apparently is reachable. --ANK
1146          */
1147         dst_confirm(&rt->u.dst);
1148
1149         /* Duplicate redirect: silently ignore. */
1150         if (neigh == rt->u.dst.neighbour)
1151                 goto out;
1152
1153         nrt = ip6_rt_copy(rt);
1154         if (nrt == NULL)
1155                 goto out;
1156
1157         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1158         if (on_link)
1159                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1160
1161         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1162         nrt->rt6i_dst.plen = 128;
1163         nrt->u.dst.flags |= DST_HOST;
1164
1165         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1166         nrt->rt6i_nexthop = neigh_clone(neigh);
1167         /* Reset pmtu, it may be better */
1168         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1169         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1170
1171         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1172                 goto out;
1173
1174         if (rt->rt6i_flags&RTF_CACHE) {
1175                 ip6_del_rt(rt, NULL, NULL, NULL);
1176                 return;
1177         }
1178
1179 out:
1180         dst_release(&rt->u.dst);
1181         return;
1182 }
1183
1184 /*
1185  *      Handle ICMP "packet too big" messages
1186  *      i.e. Path MTU discovery
1187  */
1188
1189 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1190                         struct net_device *dev, u32 pmtu)
1191 {
1192         struct rt6_info *rt, *nrt;
1193         int allfrag = 0;
1194
1195         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1196         if (rt == NULL)
1197                 return;
1198
1199         if (pmtu >= dst_mtu(&rt->u.dst))
1200                 goto out;
1201
1202         if (pmtu < IPV6_MIN_MTU) {
1203                 /*
1204                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1205                  * MTU (1280) and a fragment header should always be included
1206                  * after a node receiving Too Big message reporting PMTU is
1207                  * less than the IPv6 Minimum Link MTU.
1208                  */
1209                 pmtu = IPV6_MIN_MTU;
1210                 allfrag = 1;
1211         }
1212
1213         /* New mtu received -> path was valid.
1214            They are sent only in response to data packets,
1215            so that this nexthop apparently is reachable. --ANK
1216          */
1217         dst_confirm(&rt->u.dst);
1218
1219         /* Host route. If it is static, it would be better
1220            not to override it, but add new one, so that
1221            when cache entry will expire old pmtu
1222            would return automatically.
1223          */
1224         if (rt->rt6i_flags & RTF_CACHE) {
1225                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1226                 if (allfrag)
1227                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1228                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1229                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1230                 goto out;
1231         }
1232
1233         /* Network route.
1234            Two cases are possible:
1235            1. It is connected route. Action: COW
1236            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1237          */
1238         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1239                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1240         else
1241                 nrt = rt6_alloc_clone(rt, daddr);
1242
1243         if (nrt) {
1244                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1245                 if (allfrag)
1246                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1247
1248                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1249                  * happened within 5 mins, the recommended timer is 10 mins.
1250                  * Here this route expiration time is set to ip6_rt_mtu_expires
1251                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1252                  * and detecting PMTU increase will be automatically happened.
1253                  */
1254                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1255                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1256
1257                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1258         }
1259 out:
1260         dst_release(&rt->u.dst);
1261 }
1262
1263 /*
1264  *      Misc support functions
1265  */
1266
1267 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1268 {
1269         struct rt6_info *rt = ip6_dst_alloc();
1270
1271         if (rt) {
1272                 rt->u.dst.input = ort->u.dst.input;
1273                 rt->u.dst.output = ort->u.dst.output;
1274
1275                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1276                 rt->u.dst.dev = ort->u.dst.dev;
1277                 if (rt->u.dst.dev)
1278                         dev_hold(rt->u.dst.dev);
1279                 rt->rt6i_idev = ort->rt6i_idev;
1280                 if (rt->rt6i_idev)
1281                         in6_dev_hold(rt->rt6i_idev);
1282                 rt->u.dst.lastuse = jiffies;
1283                 rt->rt6i_expires = 0;
1284
1285                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1286                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1287                 rt->rt6i_metric = 0;
1288
1289                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1290 #ifdef CONFIG_IPV6_SUBTREES
1291                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1292 #endif
1293         }
1294         return rt;
1295 }
1296
1297 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1298 {       
1299         struct rt6_info *rt;
1300         struct fib6_node *fn;
1301
1302         fn = &ip6_routing_table;
1303
1304         write_lock_bh(&rt6_lock);
1305         for (rt = fn->leaf; rt; rt=rt->u.next) {
1306                 if (dev == rt->rt6i_dev &&
1307                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1308                         break;
1309         }
1310         if (rt)
1311                 dst_hold(&rt->u.dst);
1312         write_unlock_bh(&rt6_lock);
1313         return rt;
1314 }
1315
1316 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1317                                      struct net_device *dev)
1318 {
1319         struct in6_rtmsg rtmsg;
1320
1321         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1322         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1323         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1324         rtmsg.rtmsg_metric = 1024;
1325         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1326
1327         rtmsg.rtmsg_ifindex = dev->ifindex;
1328
1329         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1330         return rt6_get_dflt_router(gwaddr, dev);
1331 }
1332
1333 void rt6_purge_dflt_routers(void)
1334 {
1335         struct rt6_info *rt;
1336
1337 restart:
1338         read_lock_bh(&rt6_lock);
1339         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1340                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1341                         dst_hold(&rt->u.dst);
1342
1343                         rt6_reset_dflt_pointer(NULL);
1344
1345                         read_unlock_bh(&rt6_lock);
1346
1347                         ip6_del_rt(rt, NULL, NULL, NULL);
1348
1349                         goto restart;
1350                 }
1351         }
1352         read_unlock_bh(&rt6_lock);
1353 }
1354
1355 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1356 {
1357         struct in6_rtmsg rtmsg;
1358         int err;
1359
1360         switch(cmd) {
1361         case SIOCADDRT:         /* Add a route */
1362         case SIOCDELRT:         /* Delete a route */
1363                 if (!capable(CAP_NET_ADMIN))
1364                         return -EPERM;
1365                 err = copy_from_user(&rtmsg, arg,
1366                                      sizeof(struct in6_rtmsg));
1367                 if (err)
1368                         return -EFAULT;
1369                         
1370                 rtnl_lock();
1371                 switch (cmd) {
1372                 case SIOCADDRT:
1373                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1374                         break;
1375                 case SIOCDELRT:
1376                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1377                         break;
1378                 default:
1379                         err = -EINVAL;
1380                 }
1381                 rtnl_unlock();
1382
1383                 return err;
1384         };
1385
1386         return -EINVAL;
1387 }
1388
1389 /*
1390  *      Drop the packet on the floor
1391  */
1392
1393 static int ip6_pkt_discard(struct sk_buff *skb)
1394 {
1395         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1396         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1397         kfree_skb(skb);
1398         return 0;
1399 }
1400
1401 static int ip6_pkt_discard_out(struct sk_buff *skb)
1402 {
1403         skb->dev = skb->dst->dev;
1404         return ip6_pkt_discard(skb);
1405 }
1406
1407 /*
1408  *      Allocate a dst for local (unicast / anycast) address.
1409  */
1410
1411 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1412                                     const struct in6_addr *addr,
1413                                     int anycast)
1414 {
1415         struct rt6_info *rt = ip6_dst_alloc();
1416
1417         if (rt == NULL)
1418                 return ERR_PTR(-ENOMEM);
1419
1420         dev_hold(&loopback_dev);
1421         in6_dev_hold(idev);
1422
1423         rt->u.dst.flags = DST_HOST;
1424         rt->u.dst.input = ip6_input;
1425         rt->u.dst.output = ip6_output;
1426         rt->rt6i_dev = &loopback_dev;
1427         rt->rt6i_idev = idev;
1428         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1429         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1430         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1431         rt->u.dst.obsolete = -1;
1432
1433         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1434         if (anycast)
1435                 rt->rt6i_flags |= RTF_ANYCAST;
1436         else
1437                 rt->rt6i_flags |= RTF_LOCAL;
1438         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1439         if (rt->rt6i_nexthop == NULL) {
1440                 dst_free((struct dst_entry *) rt);
1441                 return ERR_PTR(-ENOMEM);
1442         }
1443
1444         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1445         rt->rt6i_dst.plen = 128;
1446
1447         atomic_set(&rt->u.dst.__refcnt, 1);
1448
1449         return rt;
1450 }
1451
1452 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1453 {
1454         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1455             rt != &ip6_null_entry) {
1456                 RT6_TRACE("deleted by ifdown %p\n", rt);
1457                 return -1;
1458         }
1459         return 0;
1460 }
1461
1462 void rt6_ifdown(struct net_device *dev)
1463 {
1464         write_lock_bh(&rt6_lock);
1465         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1466         write_unlock_bh(&rt6_lock);
1467 }
1468
1469 struct rt6_mtu_change_arg
1470 {
1471         struct net_device *dev;
1472         unsigned mtu;
1473 };
1474
1475 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1476 {
1477         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1478         struct inet6_dev *idev;
1479
1480         /* In IPv6 pmtu discovery is not optional,
1481            so that RTAX_MTU lock cannot disable it.
1482            We still use this lock to block changes
1483            caused by addrconf/ndisc.
1484         */
1485
1486         idev = __in6_dev_get(arg->dev);
1487         if (idev == NULL)
1488                 return 0;
1489
1490         /* For administrative MTU increase, there is no way to discover
1491            IPv6 PMTU increase, so PMTU increase should be updated here.
1492            Since RFC 1981 doesn't include administrative MTU increase
1493            update PMTU increase is a MUST. (i.e. jumbo frame)
1494          */
1495         /*
1496            If new MTU is less than route PMTU, this new MTU will be the
1497            lowest MTU in the path, update the route PMTU to reflect PMTU
1498            decreases; if new MTU is greater than route PMTU, and the
1499            old MTU is the lowest MTU in the path, update the route PMTU
1500            to reflect the increase. In this case if the other nodes' MTU
1501            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1502            PMTU discouvery.
1503          */
1504         if (rt->rt6i_dev == arg->dev &&
1505             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1506             (dst_mtu(&rt->u.dst) > arg->mtu ||
1507              (dst_mtu(&rt->u.dst) < arg->mtu &&
1508               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1509                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1510         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1511         return 0;
1512 }
1513
1514 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1515 {
1516         struct rt6_mtu_change_arg arg;
1517
1518         arg.dev = dev;
1519         arg.mtu = mtu;
1520         read_lock_bh(&rt6_lock);
1521         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1522         read_unlock_bh(&rt6_lock);
1523 }
1524
1525 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1526                               struct in6_rtmsg *rtmsg)
1527 {
1528         memset(rtmsg, 0, sizeof(*rtmsg));
1529
1530         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1531         rtmsg->rtmsg_src_len = r->rtm_src_len;
1532         rtmsg->rtmsg_flags = RTF_UP;
1533         if (r->rtm_type == RTN_UNREACHABLE)
1534                 rtmsg->rtmsg_flags |= RTF_REJECT;
1535
1536         if (rta[RTA_GATEWAY-1]) {
1537                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1538                         return -EINVAL;
1539                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1540                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1541         }
1542         if (rta[RTA_DST-1]) {
1543                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1544                         return -EINVAL;
1545                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1546         }
1547         if (rta[RTA_SRC-1]) {
1548                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1549                         return -EINVAL;
1550                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1551         }
1552         if (rta[RTA_OIF-1]) {
1553                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1554                         return -EINVAL;
1555                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1556         }
1557         if (rta[RTA_PRIORITY-1]) {
1558                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1559                         return -EINVAL;
1560                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1561         }
1562         return 0;
1563 }
1564
1565 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1566 {
1567         struct rtmsg *r = NLMSG_DATA(nlh);
1568         struct in6_rtmsg rtmsg;
1569
1570         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1571                 return -EINVAL;
1572         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1573 }
1574
1575 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1576 {
1577         struct rtmsg *r = NLMSG_DATA(nlh);
1578         struct in6_rtmsg rtmsg;
1579
1580         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1581                 return -EINVAL;
1582         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1583 }
1584
1585 struct rt6_rtnl_dump_arg
1586 {
1587         struct sk_buff *skb;
1588         struct netlink_callback *cb;
1589 };
1590
1591 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1592                          struct in6_addr *dst, struct in6_addr *src,
1593                          int iif, int type, u32 pid, u32 seq,
1594                          int prefix, unsigned int flags)
1595 {
1596         struct rtmsg *rtm;
1597         struct nlmsghdr  *nlh;
1598         unsigned char    *b = skb->tail;
1599         struct rta_cacheinfo ci;
1600
1601         if (prefix) {   /* user wants prefix routes only */
1602                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1603                         /* success since this is not a prefix route */
1604                         return 1;
1605                 }
1606         }
1607
1608         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1609         rtm = NLMSG_DATA(nlh);
1610         rtm->rtm_family = AF_INET6;
1611         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1612         rtm->rtm_src_len = rt->rt6i_src.plen;
1613         rtm->rtm_tos = 0;
1614         rtm->rtm_table = RT_TABLE_MAIN;
1615         if (rt->rt6i_flags&RTF_REJECT)
1616                 rtm->rtm_type = RTN_UNREACHABLE;
1617         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1618                 rtm->rtm_type = RTN_LOCAL;
1619         else
1620                 rtm->rtm_type = RTN_UNICAST;
1621         rtm->rtm_flags = 0;
1622         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1623         rtm->rtm_protocol = rt->rt6i_protocol;
1624         if (rt->rt6i_flags&RTF_DYNAMIC)
1625                 rtm->rtm_protocol = RTPROT_REDIRECT;
1626         else if (rt->rt6i_flags & RTF_ADDRCONF)
1627                 rtm->rtm_protocol = RTPROT_KERNEL;
1628         else if (rt->rt6i_flags&RTF_DEFAULT)
1629                 rtm->rtm_protocol = RTPROT_RA;
1630
1631         if (rt->rt6i_flags&RTF_CACHE)
1632                 rtm->rtm_flags |= RTM_F_CLONED;
1633
1634         if (dst) {
1635                 RTA_PUT(skb, RTA_DST, 16, dst);
1636                 rtm->rtm_dst_len = 128;
1637         } else if (rtm->rtm_dst_len)
1638                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1639 #ifdef CONFIG_IPV6_SUBTREES
1640         if (src) {
1641                 RTA_PUT(skb, RTA_SRC, 16, src);
1642                 rtm->rtm_src_len = 128;
1643         } else if (rtm->rtm_src_len)
1644                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1645 #endif
1646         if (iif)
1647                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1648         else if (dst) {
1649                 struct in6_addr saddr_buf;
1650                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1651                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1652         }
1653         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1654                 goto rtattr_failure;
1655         if (rt->u.dst.neighbour)
1656                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1657         if (rt->u.dst.dev)
1658                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1659         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1660         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1661         if (rt->rt6i_expires)
1662                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1663         else
1664                 ci.rta_expires = 0;
1665         ci.rta_used = rt->u.dst.__use;
1666         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1667         ci.rta_error = rt->u.dst.error;
1668         ci.rta_id = 0;
1669         ci.rta_ts = 0;
1670         ci.rta_tsage = 0;
1671         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1672         nlh->nlmsg_len = skb->tail - b;
1673         return skb->len;
1674
1675 nlmsg_failure:
1676 rtattr_failure:
1677         skb_trim(skb, b - skb->data);
1678         return -1;
1679 }
1680
1681 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1682 {
1683         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1684         int prefix;
1685
1686         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1687                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1688                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1689         } else
1690                 prefix = 0;
1691
1692         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1693                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1694                      prefix, NLM_F_MULTI);
1695 }
1696
1697 static int fib6_dump_node(struct fib6_walker_t *w)
1698 {
1699         int res;
1700         struct rt6_info *rt;
1701
1702         for (rt = w->leaf; rt; rt = rt->u.next) {
1703                 res = rt6_dump_route(rt, w->args);
1704                 if (res < 0) {
1705                         /* Frame is full, suspend walking */
1706                         w->leaf = rt;
1707                         return 1;
1708                 }
1709                 BUG_TRAP(res!=0);
1710         }
1711         w->leaf = NULL;
1712         return 0;
1713 }
1714
1715 static void fib6_dump_end(struct netlink_callback *cb)
1716 {
1717         struct fib6_walker_t *w = (void*)cb->args[0];
1718
1719         if (w) {
1720                 cb->args[0] = 0;
1721                 fib6_walker_unlink(w);
1722                 kfree(w);
1723         }
1724         cb->done = (void*)cb->args[1];
1725         cb->args[1] = 0;
1726 }
1727
1728 static int fib6_dump_done(struct netlink_callback *cb)
1729 {
1730         fib6_dump_end(cb);
1731         return cb->done ? cb->done(cb) : 0;
1732 }
1733
1734 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1735 {
1736         struct rt6_rtnl_dump_arg arg;
1737         struct fib6_walker_t *w;
1738         int res;
1739
1740         arg.skb = skb;
1741         arg.cb = cb;
1742
1743         w = (void*)cb->args[0];
1744         if (w == NULL) {
1745                 /* New dump:
1746                  * 
1747                  * 1. hook callback destructor.
1748                  */
1749                 cb->args[1] = (long)cb->done;
1750                 cb->done = fib6_dump_done;
1751
1752                 /*
1753                  * 2. allocate and initialize walker.
1754                  */
1755                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1756                 if (w == NULL)
1757                         return -ENOMEM;
1758                 RT6_TRACE("dump<%p", w);
1759                 memset(w, 0, sizeof(*w));
1760                 w->root = &ip6_routing_table;
1761                 w->func = fib6_dump_node;
1762                 w->args = &arg;
1763                 cb->args[0] = (long)w;
1764                 read_lock_bh(&rt6_lock);
1765                 res = fib6_walk(w);
1766                 read_unlock_bh(&rt6_lock);
1767         } else {
1768                 w->args = &arg;
1769                 read_lock_bh(&rt6_lock);
1770                 res = fib6_walk_continue(w);
1771                 read_unlock_bh(&rt6_lock);
1772         }
1773 #if RT6_DEBUG >= 3
1774         if (res <= 0 && skb->len == 0)
1775                 RT6_TRACE("%p>dump end\n", w);
1776 #endif
1777         res = res < 0 ? res : skb->len;
1778         /* res < 0 is an error. (really, impossible)
1779            res == 0 means that dump is complete, but skb still can contain data.
1780            res > 0 dump is not complete, but frame is full.
1781          */
1782         /* Destroy walker, if dump of this table is complete. */
1783         if (res <= 0)
1784                 fib6_dump_end(cb);
1785         return res;
1786 }
1787
1788 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1789 {
1790         struct rtattr **rta = arg;
1791         int iif = 0;
1792         int err = -ENOBUFS;
1793         struct sk_buff *skb;
1794         struct flowi fl;
1795         struct rt6_info *rt;
1796
1797         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1798         if (skb == NULL)
1799                 goto out;
1800
1801         /* Reserve room for dummy headers, this skb can pass
1802            through good chunk of routing engine.
1803          */
1804         skb->mac.raw = skb->data;
1805         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1806
1807         memset(&fl, 0, sizeof(fl));
1808         if (rta[RTA_SRC-1])
1809                 ipv6_addr_copy(&fl.fl6_src,
1810                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1811         if (rta[RTA_DST-1])
1812                 ipv6_addr_copy(&fl.fl6_dst,
1813                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1814
1815         if (rta[RTA_IIF-1])
1816                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1817
1818         if (iif) {
1819                 struct net_device *dev;
1820                 dev = __dev_get_by_index(iif);
1821                 if (!dev) {
1822                         err = -ENODEV;
1823                         goto out_free;
1824                 }
1825         }
1826
1827         fl.oif = 0;
1828         if (rta[RTA_OIF-1])
1829                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1830
1831         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1832
1833         skb->dst = &rt->u.dst;
1834
1835         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1836         err = rt6_fill_node(skb, rt, 
1837                             &fl.fl6_dst, &fl.fl6_src,
1838                             iif,
1839                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1840                             nlh->nlmsg_seq, 0, 0);
1841         if (err < 0) {
1842                 err = -EMSGSIZE;
1843                 goto out_free;
1844         }
1845
1846         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1847         if (err > 0)
1848                 err = 0;
1849 out:
1850         return err;
1851 out_free:
1852         kfree_skb(skb);
1853         goto out;       
1854 }
1855
1856 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1857                         struct netlink_skb_parms *req)
1858 {
1859         struct sk_buff *skb;
1860         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1861         u32 pid = current->pid;
1862         u32 seq = 0;
1863
1864         if (req)
1865                 pid = req->pid;
1866         if (nlh)
1867                 seq = nlh->nlmsg_seq;
1868         
1869         skb = alloc_skb(size, gfp_any());
1870         if (!skb) {
1871                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1872                 return;
1873         }
1874         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1875                 kfree_skb(skb);
1876                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1877                 return;
1878         }
1879         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1880         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1881 }
1882
1883 /*
1884  *      /proc
1885  */
1886
1887 #ifdef CONFIG_PROC_FS
1888
1889 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1890
1891 struct rt6_proc_arg
1892 {
1893         char *buffer;
1894         int offset;
1895         int length;
1896         int skip;
1897         int len;
1898 };
1899
1900 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1901 {
1902         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1903         int i;
1904
1905         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1906                 arg->skip++;
1907                 return 0;
1908         }
1909
1910         if (arg->len >= arg->length)
1911                 return 0;
1912
1913         for (i=0; i<16; i++) {
1914                 sprintf(arg->buffer + arg->len, "%02x",
1915                         rt->rt6i_dst.addr.s6_addr[i]);
1916                 arg->len += 2;
1917         }
1918         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1919                             rt->rt6i_dst.plen);
1920
1921 #ifdef CONFIG_IPV6_SUBTREES
1922         for (i=0; i<16; i++) {
1923                 sprintf(arg->buffer + arg->len, "%02x",
1924                         rt->rt6i_src.addr.s6_addr[i]);
1925                 arg->len += 2;
1926         }
1927         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1928                             rt->rt6i_src.plen);
1929 #else
1930         sprintf(arg->buffer + arg->len,
1931                 "00000000000000000000000000000000 00 ");
1932         arg->len += 36;
1933 #endif
1934
1935         if (rt->rt6i_nexthop) {
1936                 for (i=0; i<16; i++) {
1937                         sprintf(arg->buffer + arg->len, "%02x",
1938                                 rt->rt6i_nexthop->primary_key[i]);
1939                         arg->len += 2;
1940                 }
1941         } else {
1942                 sprintf(arg->buffer + arg->len,
1943                         "00000000000000000000000000000000");
1944                 arg->len += 32;
1945         }
1946         arg->len += sprintf(arg->buffer + arg->len,
1947                             " %08x %08x %08x %08x %8s\n",
1948                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1949                             rt->u.dst.__use, rt->rt6i_flags, 
1950                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1951         return 0;
1952 }
1953
1954 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1955 {
1956         struct rt6_proc_arg arg;
1957         arg.buffer = buffer;
1958         arg.offset = offset;
1959         arg.length = length;
1960         arg.skip = 0;
1961         arg.len = 0;
1962
1963         read_lock_bh(&rt6_lock);
1964         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1965         read_unlock_bh(&rt6_lock);
1966
1967         *start = buffer;
1968         if (offset)
1969                 *start += offset % RT6_INFO_LEN;
1970
1971         arg.len -= offset % RT6_INFO_LEN;
1972
1973         if (arg.len > length)
1974                 arg.len = length;
1975         if (arg.len < 0)
1976                 arg.len = 0;
1977
1978         return arg.len;
1979 }
1980
1981 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1982 {
1983         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1984                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1985                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1986                       rt6_stats.fib_rt_cache,
1987                       atomic_read(&ip6_dst_ops.entries),
1988                       rt6_stats.fib_discarded_routes);
1989
1990         return 0;
1991 }
1992
1993 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1994 {
1995         return single_open(file, rt6_stats_seq_show, NULL);
1996 }
1997
1998 static struct file_operations rt6_stats_seq_fops = {
1999         .owner   = THIS_MODULE,
2000         .open    = rt6_stats_seq_open,
2001         .read    = seq_read,
2002         .llseek  = seq_lseek,
2003         .release = single_release,
2004 };
2005 #endif  /* CONFIG_PROC_FS */
2006
2007 #ifdef CONFIG_SYSCTL
2008
2009 static int flush_delay;
2010
2011 static
2012 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2013                               void __user *buffer, size_t *lenp, loff_t *ppos)
2014 {
2015         if (write) {
2016                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2017                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2018                 return 0;
2019         } else
2020                 return -EINVAL;
2021 }
2022
2023 ctl_table ipv6_route_table[] = {
2024         {
2025                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2026                 .procname       =       "flush",
2027                 .data           =       &flush_delay,
2028                 .maxlen         =       sizeof(int),
2029                 .mode           =       0200,
2030                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2031         },
2032         {
2033                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2034                 .procname       =       "gc_thresh",
2035                 .data           =       &ip6_dst_ops.gc_thresh,
2036                 .maxlen         =       sizeof(int),
2037                 .mode           =       0644,
2038                 .proc_handler   =       &proc_dointvec,
2039         },
2040         {
2041                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2042                 .procname       =       "max_size",
2043                 .data           =       &ip6_rt_max_size,
2044                 .maxlen         =       sizeof(int),
2045                 .mode           =       0644,
2046                 .proc_handler   =       &proc_dointvec,
2047         },
2048         {
2049                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2050                 .procname       =       "gc_min_interval",
2051                 .data           =       &ip6_rt_gc_min_interval,
2052                 .maxlen         =       sizeof(int),
2053                 .mode           =       0644,
2054                 .proc_handler   =       &proc_dointvec_jiffies,
2055                 .strategy       =       &sysctl_jiffies,
2056         },
2057         {
2058                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2059                 .procname       =       "gc_timeout",
2060                 .data           =       &ip6_rt_gc_timeout,
2061                 .maxlen         =       sizeof(int),
2062                 .mode           =       0644,
2063                 .proc_handler   =       &proc_dointvec_jiffies,
2064                 .strategy       =       &sysctl_jiffies,
2065         },
2066         {
2067                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2068                 .procname       =       "gc_interval",
2069                 .data           =       &ip6_rt_gc_interval,
2070                 .maxlen         =       sizeof(int),
2071                 .mode           =       0644,
2072                 .proc_handler   =       &proc_dointvec_jiffies,
2073                 .strategy       =       &sysctl_jiffies,
2074         },
2075         {
2076                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2077                 .procname       =       "gc_elasticity",
2078                 .data           =       &ip6_rt_gc_elasticity,
2079                 .maxlen         =       sizeof(int),
2080                 .mode           =       0644,
2081                 .proc_handler   =       &proc_dointvec_jiffies,
2082                 .strategy       =       &sysctl_jiffies,
2083         },
2084         {
2085                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2086                 .procname       =       "mtu_expires",
2087                 .data           =       &ip6_rt_mtu_expires,
2088                 .maxlen         =       sizeof(int),
2089                 .mode           =       0644,
2090                 .proc_handler   =       &proc_dointvec_jiffies,
2091                 .strategy       =       &sysctl_jiffies,
2092         },
2093         {
2094                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2095                 .procname       =       "min_adv_mss",
2096                 .data           =       &ip6_rt_min_advmss,
2097                 .maxlen         =       sizeof(int),
2098                 .mode           =       0644,
2099                 .proc_handler   =       &proc_dointvec_jiffies,
2100                 .strategy       =       &sysctl_jiffies,
2101         },
2102         {
2103                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2104                 .procname       =       "gc_min_interval_ms",
2105                 .data           =       &ip6_rt_gc_min_interval,
2106                 .maxlen         =       sizeof(int),
2107                 .mode           =       0644,
2108                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2109                 .strategy       =       &sysctl_ms_jiffies,
2110         },
2111         { .ctl_name = 0 }
2112 };
2113
2114 #endif
2115
2116 void __init ip6_route_init(void)
2117 {
2118         struct proc_dir_entry *p;
2119
2120         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2121                                                      sizeof(struct rt6_info),
2122                                                      0, SLAB_HWCACHE_ALIGN,
2123                                                      NULL, NULL);
2124         if (!ip6_dst_ops.kmem_cachep)
2125                 panic("cannot create ip6_dst_cache");
2126
2127         fib6_init();
2128 #ifdef  CONFIG_PROC_FS
2129         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2130         if (p)
2131                 p->owner = THIS_MODULE;
2132
2133         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2134 #endif
2135 #ifdef CONFIG_XFRM
2136         xfrm6_init();
2137 #endif
2138 }
2139
2140 void ip6_route_cleanup(void)
2141 {
2142 #ifdef CONFIG_PROC_FS
2143         proc_net_remove("ipv6_route");
2144         proc_net_remove("rt6_stats");
2145 #endif
2146 #ifdef CONFIG_XFRM
2147         xfrm6_fini();
2148 #endif
2149         rt6_ifdown(NULL);
2150         fib6_gc_cleanup();
2151         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2152 }