]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
[IPV6]: ROUTE: Copy u.dst.error for RTF_REJECT routes when cloning.
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void             ip6_dst_destroy(struct dst_entry *);
88 static void             ip6_dst_ifdown(struct dst_entry *,
89                                        struct net_device *dev, int how);
90 static int               ip6_dst_gc(void);
91
92 static int              ip6_pkt_discard(struct sk_buff *skb);
93 static int              ip6_pkt_discard_out(struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97 static struct dst_ops ip6_dst_ops = {
98         .family                 =       AF_INET6,
99         .protocol               =       __constant_htons(ETH_P_IPV6),
100         .gc                     =       ip6_dst_gc,
101         .gc_thresh              =       1024,
102         .check                  =       ip6_dst_check,
103         .destroy                =       ip6_dst_destroy,
104         .ifdown                 =       ip6_dst_ifdown,
105         .negative_advice        =       ip6_negative_advice,
106         .link_failure           =       ip6_link_failure,
107         .update_pmtu            =       ip6_rt_update_pmtu,
108         .entry_size             =       sizeof(struct rt6_info),
109 };
110
111 struct rt6_info ip6_null_entry = {
112         .u = {
113                 .dst = {
114                         .__refcnt       = ATOMIC_INIT(1),
115                         .__use          = 1,
116                         .dev            = &loopback_dev,
117                         .obsolete       = -1,
118                         .error          = -ENETUNREACH,
119                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
120                         .input          = ip6_pkt_discard,
121                         .output         = ip6_pkt_discard_out,
122                         .ops            = &ip6_dst_ops,
123                         .path           = (struct dst_entry*)&ip6_null_entry,
124                 }
125         },
126         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
127         .rt6i_metric    = ~(u32) 0,
128         .rt6i_ref       = ATOMIC_INIT(1),
129 };
130
131 struct fib6_node ip6_routing_table = {
132         .leaf           = &ip6_null_entry,
133         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 };
135
136 /* Protects all the ip6 fib */
137
138 DEFINE_RWLOCK(rt6_lock);
139
140
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
143 {
144         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 }
146
147 static void ip6_dst_destroy(struct dst_entry *dst)
148 {
149         struct rt6_info *rt = (struct rt6_info *)dst;
150         struct inet6_dev *idev = rt->rt6i_idev;
151
152         if (idev != NULL) {
153                 rt->rt6i_idev = NULL;
154                 in6_dev_put(idev);
155         }       
156 }
157
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159                            int how)
160 {
161         struct rt6_info *rt = (struct rt6_info *)dst;
162         struct inet6_dev *idev = rt->rt6i_idev;
163
164         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166                 if (loopback_idev != NULL) {
167                         rt->rt6i_idev = loopback_idev;
168                         in6_dev_put(idev);
169                 }
170         }
171 }
172
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174 {
175         return (rt->rt6i_flags & RTF_EXPIRES &&
176                 time_after(jiffies, rt->rt6i_expires));
177 }
178
179 /*
180  *      Route lookup. Any rt6_lock is implied.
181  */
182
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184                                                     int oif,
185                                                     int strict)
186 {
187         struct rt6_info *local = NULL;
188         struct rt6_info *sprt;
189
190         if (oif) {
191                 for (sprt = rt; sprt; sprt = sprt->u.next) {
192                         struct net_device *dev = sprt->rt6i_dev;
193                         if (dev->ifindex == oif)
194                                 return sprt;
195                         if (dev->flags & IFF_LOOPBACK) {
196                                 if (sprt->rt6i_idev == NULL ||
197                                     sprt->rt6i_idev->dev->ifindex != oif) {
198                                         if (strict && oif)
199                                                 continue;
200                                         if (local && (!oif || 
201                                                       local->rt6i_idev->dev->ifindex == oif))
202                                                 continue;
203                                 }
204                                 local = sprt;
205                         }
206                 }
207
208                 if (local)
209                         return local;
210
211                 if (strict)
212                         return &ip6_null_entry;
213         }
214         return rt;
215 }
216
217 /*
218  *      pointer to the last default router chosen. BH is disabled locally.
219  */
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
224 {
225         spin_lock_bh(&rt6_dflt_lock);
226         if (rt == NULL || rt == rt6_dflt_pointer) {
227                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228                 rt6_dflt_pointer = NULL;
229         }
230         spin_unlock_bh(&rt6_dflt_lock);
231 }
232
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235 {
236         struct rt6_info *match = NULL;
237         struct rt6_info *sprt;
238         int mpri = 0;
239
240         for (sprt = rt; sprt; sprt = sprt->u.next) {
241                 struct neighbour *neigh;
242                 int m = 0;
243
244                 if (!oif ||
245                     (sprt->rt6i_dev &&
246                      sprt->rt6i_dev->ifindex == oif))
247                         m += 8;
248
249                 if (rt6_check_expired(sprt))
250                         continue;
251
252                 if (sprt == rt6_dflt_pointer)
253                         m += 4;
254
255                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256                         read_lock_bh(&neigh->lock);
257                         switch (neigh->nud_state) {
258                         case NUD_REACHABLE:
259                                 m += 3;
260                                 break;
261
262                         case NUD_STALE:
263                         case NUD_DELAY:
264                         case NUD_PROBE:
265                                 m += 2;
266                                 break;
267
268                         case NUD_NOARP:
269                         case NUD_PERMANENT:
270                                 m += 1;
271                                 break;
272
273                         case NUD_INCOMPLETE:
274                         default:
275                                 read_unlock_bh(&neigh->lock);
276                                 continue;
277                         }
278                         read_unlock_bh(&neigh->lock);
279                 } else {
280                         continue;
281                 }
282
283                 if (m > mpri || m >= 12) {
284                         match = sprt;
285                         mpri = m;
286                         if (m >= 12) {
287                                 /* we choose the last default router if it
288                                  * is in (probably) reachable state.
289                                  * If route changed, we should do pmtu
290                                  * discovery. --yoshfuji
291                                  */
292                                 break;
293                         }
294                 }
295         }
296
297         spin_lock(&rt6_dflt_lock);
298         if (!match) {
299                 /*
300                  *      No default routers are known to be reachable.
301                  *      SHOULD round robin
302                  */
303                 if (rt6_dflt_pointer) {
304                         for (sprt = rt6_dflt_pointer->u.next;
305                              sprt; sprt = sprt->u.next) {
306                                 if (sprt->u.dst.obsolete <= 0 &&
307                                     sprt->u.dst.error == 0 &&
308                                     !rt6_check_expired(sprt)) {
309                                         match = sprt;
310                                         break;
311                                 }
312                         }
313                         for (sprt = rt;
314                              !match && sprt;
315                              sprt = sprt->u.next) {
316                                 if (sprt->u.dst.obsolete <= 0 &&
317                                     sprt->u.dst.error == 0 &&
318                                     !rt6_check_expired(sprt)) {
319                                         match = sprt;
320                                         break;
321                                 }
322                                 if (sprt == rt6_dflt_pointer)
323                                         break;
324                         }
325                 }
326         }
327
328         if (match) {
329                 if (rt6_dflt_pointer != match)
330                         RT6_TRACE("changed default router: %p->%p\n",
331                                   rt6_dflt_pointer, match);
332                 rt6_dflt_pointer = match;
333         }
334         spin_unlock(&rt6_dflt_lock);
335
336         if (!match) {
337                 /*
338                  * Last Resort: if no default routers found, 
339                  * use addrconf default route.
340                  * We don't record this route.
341                  */
342                 for (sprt = ip6_routing_table.leaf;
343                      sprt; sprt = sprt->u.next) {
344                         if (!rt6_check_expired(sprt) &&
345                             (sprt->rt6i_flags & RTF_DEFAULT) &&
346                             (!oif ||
347                              (sprt->rt6i_dev &&
348                               sprt->rt6i_dev->ifindex == oif))) {
349                                 match = sprt;
350                                 break;
351                         }
352                 }
353                 if (!match) {
354                         /* no default route.  give up. */
355                         match = &ip6_null_entry;
356                 }
357         }
358
359         return match;
360 }
361
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363                             int oif, int strict)
364 {
365         struct fib6_node *fn;
366         struct rt6_info *rt;
367
368         read_lock_bh(&rt6_lock);
369         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370         rt = rt6_device_match(fn->leaf, oif, strict);
371         dst_hold(&rt->u.dst);
372         rt->u.dst.__use++;
373         read_unlock_bh(&rt6_lock);
374
375         rt->u.dst.lastuse = jiffies;
376         if (rt->u.dst.error == 0)
377                 return rt;
378         dst_release(&rt->u.dst);
379         return NULL;
380 }
381
382 /* ip6_ins_rt is called with FREE rt6_lock.
383    It takes new route entry, the addition fails by any reason the
384    route is freed. In any case, if caller does not hold it, it may
385    be destroyed.
386  */
387
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389                 void *_rtattr, struct netlink_skb_parms *req)
390 {
391         int err;
392
393         write_lock_bh(&rt6_lock);
394         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395         write_unlock_bh(&rt6_lock);
396
397         return err;
398 }
399
400 /* No rt6_lock! If COW failed, the function returns dead route entry
401    with dst->error set to errno value.
402  */
403
404 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
405                                       struct in6_addr *saddr)
406 {
407         struct rt6_info *rt;
408
409         /*
410          *      Clone the route.
411          */
412
413         rt = ip6_rt_copy(ort);
414
415         if (rt) {
416                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417                         if (rt->rt6i_dst.plen != 128 &&
418                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419                                 rt->rt6i_flags |= RTF_ANYCAST;
420                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
421                 }
422
423                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
424                 rt->rt6i_dst.plen = 128;
425                 rt->rt6i_flags |= RTF_CACHE;
426                 rt->u.dst.flags |= DST_HOST;
427
428 #ifdef CONFIG_IPV6_SUBTREES
429                 if (rt->rt6i_src.plen && saddr) {
430                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431                         rt->rt6i_src.plen = 128;
432                 }
433 #endif
434
435                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
436
437         }
438
439         return rt;
440 }
441
442 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
443                                 struct in6_addr *saddr, struct netlink_skb_parms *req)
444 {
445         struct rt6_info *rt = rt6_alloc_cow(ort, daddr, saddr);
446         int err;
447
448         if (!rt) {
449                 dst_hold(&ip6_null_entry.u.dst);
450                 return &ip6_null_entry;
451         }
452
453         dst_hold(&rt->u.dst);
454
455         err = ip6_ins_rt(rt, NULL, NULL, req);
456         if (err)
457                 rt->u.dst.error = err;
458
459         return rt;
460 }
461
462 #define BACKTRACK() \
463 if (rt == &ip6_null_entry && strict) { \
464        while ((fn = fn->parent) != NULL) { \
465                 if (fn->fn_flags & RTN_ROOT) { \
466                         dst_hold(&rt->u.dst); \
467                         goto out; \
468                 } \
469                 if (fn->fn_flags & RTN_RTINFO) \
470                         goto restart; \
471         } \
472 }
473
474
475 void ip6_route_input(struct sk_buff *skb)
476 {
477         struct fib6_node *fn;
478         struct rt6_info *rt;
479         int strict;
480         int attempts = 3;
481
482         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
483
484 relookup:
485         read_lock_bh(&rt6_lock);
486
487         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
488                          &skb->nh.ipv6h->saddr);
489
490 restart:
491         rt = fn->leaf;
492
493         if ((rt->rt6i_flags & RTF_CACHE)) {
494                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
495                 BACKTRACK();
496                 dst_hold(&rt->u.dst);
497                 goto out;
498         }
499
500         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
501         BACKTRACK();
502
503         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
504                 struct rt6_info *nrt;
505                 dst_hold(&rt->u.dst);
506                 read_unlock_bh(&rt6_lock);
507
508                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
509                               &skb->nh.ipv6h->saddr,
510                               &NETLINK_CB(skb));
511
512                 dst_release(&rt->u.dst);
513                 rt = nrt;
514
515                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
516                         goto out2;
517
518                 /* Race condition! In the gap, when rt6_lock was
519                    released someone could insert this route.  Relookup.
520                 */
521                 dst_release(&rt->u.dst);
522                 goto relookup;
523         }
524         dst_hold(&rt->u.dst);
525
526 out:
527         read_unlock_bh(&rt6_lock);
528 out2:
529         rt->u.dst.lastuse = jiffies;
530         rt->u.dst.__use++;
531         skb->dst = (struct dst_entry *) rt;
532 }
533
534 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
535 {
536         struct fib6_node *fn;
537         struct rt6_info *rt;
538         int strict;
539         int attempts = 3;
540
541         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
542
543 relookup:
544         read_lock_bh(&rt6_lock);
545
546         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
547
548 restart:
549         rt = fn->leaf;
550
551         if ((rt->rt6i_flags & RTF_CACHE)) {
552                 rt = rt6_device_match(rt, fl->oif, strict);
553                 BACKTRACK();
554                 dst_hold(&rt->u.dst);
555                 goto out;
556         }
557         if (rt->rt6i_flags & RTF_DEFAULT) {
558                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
559                         rt = rt6_best_dflt(rt, fl->oif);
560         } else {
561                 rt = rt6_device_match(rt, fl->oif, strict);
562                 BACKTRACK();
563         }
564
565         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
566                 struct rt6_info *nrt;
567                 dst_hold(&rt->u.dst);
568                 read_unlock_bh(&rt6_lock);
569
570                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
571
572                 dst_release(&rt->u.dst);
573                 rt = nrt;
574
575                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
576                         goto out2;
577
578                 /* Race condition! In the gap, when rt6_lock was
579                    released someone could insert this route.  Relookup.
580                 */
581                 dst_release(&rt->u.dst);
582                 goto relookup;
583         }
584         dst_hold(&rt->u.dst);
585
586 out:
587         read_unlock_bh(&rt6_lock);
588 out2:
589         rt->u.dst.lastuse = jiffies;
590         rt->u.dst.__use++;
591         return &rt->u.dst;
592 }
593
594
595 /*
596  *      Destination cache support functions
597  */
598
599 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
600 {
601         struct rt6_info *rt;
602
603         rt = (struct rt6_info *) dst;
604
605         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
606                 return dst;
607
608         return NULL;
609 }
610
611 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
612 {
613         struct rt6_info *rt = (struct rt6_info *) dst;
614
615         if (rt) {
616                 if (rt->rt6i_flags & RTF_CACHE)
617                         ip6_del_rt(rt, NULL, NULL, NULL);
618                 else
619                         dst_release(dst);
620         }
621         return NULL;
622 }
623
624 static void ip6_link_failure(struct sk_buff *skb)
625 {
626         struct rt6_info *rt;
627
628         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
629
630         rt = (struct rt6_info *) skb->dst;
631         if (rt) {
632                 if (rt->rt6i_flags&RTF_CACHE) {
633                         dst_set_expires(&rt->u.dst, 0);
634                         rt->rt6i_flags |= RTF_EXPIRES;
635                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
636                         rt->rt6i_node->fn_sernum = -1;
637         }
638 }
639
640 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
641 {
642         struct rt6_info *rt6 = (struct rt6_info*)dst;
643
644         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
645                 rt6->rt6i_flags |= RTF_MODIFIED;
646                 if (mtu < IPV6_MIN_MTU) {
647                         mtu = IPV6_MIN_MTU;
648                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
649                 }
650                 dst->metrics[RTAX_MTU-1] = mtu;
651         }
652 }
653
654 /* Protected by rt6_lock.  */
655 static struct dst_entry *ndisc_dst_gc_list;
656 static int ipv6_get_mtu(struct net_device *dev);
657
658 static inline unsigned int ipv6_advmss(unsigned int mtu)
659 {
660         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
661
662         if (mtu < ip6_rt_min_advmss)
663                 mtu = ip6_rt_min_advmss;
664
665         /*
666          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
667          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
668          * IPV6_MAXPLEN is also valid and means: "any MSS, 
669          * rely only on pmtu discovery"
670          */
671         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
672                 mtu = IPV6_MAXPLEN;
673         return mtu;
674 }
675
676 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
677                                   struct neighbour *neigh,
678                                   struct in6_addr *addr,
679                                   int (*output)(struct sk_buff *))
680 {
681         struct rt6_info *rt;
682         struct inet6_dev *idev = in6_dev_get(dev);
683
684         if (unlikely(idev == NULL))
685                 return NULL;
686
687         rt = ip6_dst_alloc();
688         if (unlikely(rt == NULL)) {
689                 in6_dev_put(idev);
690                 goto out;
691         }
692
693         dev_hold(dev);
694         if (neigh)
695                 neigh_hold(neigh);
696         else
697                 neigh = ndisc_get_neigh(dev, addr);
698
699         rt->rt6i_dev      = dev;
700         rt->rt6i_idev     = idev;
701         rt->rt6i_nexthop  = neigh;
702         atomic_set(&rt->u.dst.__refcnt, 1);
703         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
704         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
705         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
706         rt->u.dst.output  = output;
707
708 #if 0   /* there's no chance to use these for ndisc */
709         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
710                                 ? DST_HOST 
711                                 : 0;
712         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
713         rt->rt6i_dst.plen = 128;
714 #endif
715
716         write_lock_bh(&rt6_lock);
717         rt->u.dst.next = ndisc_dst_gc_list;
718         ndisc_dst_gc_list = &rt->u.dst;
719         write_unlock_bh(&rt6_lock);
720
721         fib6_force_start_gc();
722
723 out:
724         return (struct dst_entry *)rt;
725 }
726
727 int ndisc_dst_gc(int *more)
728 {
729         struct dst_entry *dst, *next, **pprev;
730         int freed;
731
732         next = NULL;
733         pprev = &ndisc_dst_gc_list;
734         freed = 0;
735         while ((dst = *pprev) != NULL) {
736                 if (!atomic_read(&dst->__refcnt)) {
737                         *pprev = dst->next;
738                         dst_free(dst);
739                         freed++;
740                 } else {
741                         pprev = &dst->next;
742                         (*more)++;
743                 }
744         }
745
746         return freed;
747 }
748
749 static int ip6_dst_gc(void)
750 {
751         static unsigned expire = 30*HZ;
752         static unsigned long last_gc;
753         unsigned long now = jiffies;
754
755         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
756             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
757                 goto out;
758
759         expire++;
760         fib6_run_gc(expire);
761         last_gc = now;
762         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
763                 expire = ip6_rt_gc_timeout>>1;
764
765 out:
766         expire -= expire>>ip6_rt_gc_elasticity;
767         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
768 }
769
770 /* Clean host part of a prefix. Not necessary in radix tree,
771    but results in cleaner routing tables.
772
773    Remove it only when all the things will work!
774  */
775
776 static int ipv6_get_mtu(struct net_device *dev)
777 {
778         int mtu = IPV6_MIN_MTU;
779         struct inet6_dev *idev;
780
781         idev = in6_dev_get(dev);
782         if (idev) {
783                 mtu = idev->cnf.mtu6;
784                 in6_dev_put(idev);
785         }
786         return mtu;
787 }
788
789 int ipv6_get_hoplimit(struct net_device *dev)
790 {
791         int hoplimit = ipv6_devconf.hop_limit;
792         struct inet6_dev *idev;
793
794         idev = in6_dev_get(dev);
795         if (idev) {
796                 hoplimit = idev->cnf.hop_limit;
797                 in6_dev_put(idev);
798         }
799         return hoplimit;
800 }
801
802 /*
803  *
804  */
805
806 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
807                 void *_rtattr, struct netlink_skb_parms *req)
808 {
809         int err;
810         struct rtmsg *r;
811         struct rtattr **rta;
812         struct rt6_info *rt = NULL;
813         struct net_device *dev = NULL;
814         struct inet6_dev *idev = NULL;
815         int addr_type;
816
817         rta = (struct rtattr **) _rtattr;
818
819         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
820                 return -EINVAL;
821 #ifndef CONFIG_IPV6_SUBTREES
822         if (rtmsg->rtmsg_src_len)
823                 return -EINVAL;
824 #endif
825         if (rtmsg->rtmsg_ifindex) {
826                 err = -ENODEV;
827                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
828                 if (!dev)
829                         goto out;
830                 idev = in6_dev_get(dev);
831                 if (!idev)
832                         goto out;
833         }
834
835         if (rtmsg->rtmsg_metric == 0)
836                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
837
838         rt = ip6_dst_alloc();
839
840         if (rt == NULL) {
841                 err = -ENOMEM;
842                 goto out;
843         }
844
845         rt->u.dst.obsolete = -1;
846         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
847         if (nlh && (r = NLMSG_DATA(nlh))) {
848                 rt->rt6i_protocol = r->rtm_protocol;
849         } else {
850                 rt->rt6i_protocol = RTPROT_BOOT;
851         }
852
853         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
854
855         if (addr_type & IPV6_ADDR_MULTICAST)
856                 rt->u.dst.input = ip6_mc_input;
857         else
858                 rt->u.dst.input = ip6_forward;
859
860         rt->u.dst.output = ip6_output;
861
862         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
863                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
864         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
865         if (rt->rt6i_dst.plen == 128)
866                rt->u.dst.flags = DST_HOST;
867
868 #ifdef CONFIG_IPV6_SUBTREES
869         ipv6_addr_prefix(&rt->rt6i_src.addr, 
870                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
871         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
872 #endif
873
874         rt->rt6i_metric = rtmsg->rtmsg_metric;
875
876         /* We cannot add true routes via loopback here,
877            they would result in kernel looping; promote them to reject routes
878          */
879         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
880             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
881                 /* hold loopback dev/idev if we haven't done so. */
882                 if (dev != &loopback_dev) {
883                         if (dev) {
884                                 dev_put(dev);
885                                 in6_dev_put(idev);
886                         }
887                         dev = &loopback_dev;
888                         dev_hold(dev);
889                         idev = in6_dev_get(dev);
890                         if (!idev) {
891                                 err = -ENODEV;
892                                 goto out;
893                         }
894                 }
895                 rt->u.dst.output = ip6_pkt_discard_out;
896                 rt->u.dst.input = ip6_pkt_discard;
897                 rt->u.dst.error = -ENETUNREACH;
898                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
899                 goto install_route;
900         }
901
902         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
903                 struct in6_addr *gw_addr;
904                 int gwa_type;
905
906                 gw_addr = &rtmsg->rtmsg_gateway;
907                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
908                 gwa_type = ipv6_addr_type(gw_addr);
909
910                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
911                         struct rt6_info *grt;
912
913                         /* IPv6 strictly inhibits using not link-local
914                            addresses as nexthop address.
915                            Otherwise, router will not able to send redirects.
916                            It is very good, but in some (rare!) circumstances
917                            (SIT, PtP, NBMA NOARP links) it is handy to allow
918                            some exceptions. --ANK
919                          */
920                         err = -EINVAL;
921                         if (!(gwa_type&IPV6_ADDR_UNICAST))
922                                 goto out;
923
924                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
925
926                         err = -EHOSTUNREACH;
927                         if (grt == NULL)
928                                 goto out;
929                         if (dev) {
930                                 if (dev != grt->rt6i_dev) {
931                                         dst_release(&grt->u.dst);
932                                         goto out;
933                                 }
934                         } else {
935                                 dev = grt->rt6i_dev;
936                                 idev = grt->rt6i_idev;
937                                 dev_hold(dev);
938                                 in6_dev_hold(grt->rt6i_idev);
939                         }
940                         if (!(grt->rt6i_flags&RTF_GATEWAY))
941                                 err = 0;
942                         dst_release(&grt->u.dst);
943
944                         if (err)
945                                 goto out;
946                 }
947                 err = -EINVAL;
948                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
949                         goto out;
950         }
951
952         err = -ENODEV;
953         if (dev == NULL)
954                 goto out;
955
956         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
957                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
958                 if (IS_ERR(rt->rt6i_nexthop)) {
959                         err = PTR_ERR(rt->rt6i_nexthop);
960                         rt->rt6i_nexthop = NULL;
961                         goto out;
962                 }
963         }
964
965         rt->rt6i_flags = rtmsg->rtmsg_flags;
966
967 install_route:
968         if (rta && rta[RTA_METRICS-1]) {
969                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
970                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
971
972                 while (RTA_OK(attr, attrlen)) {
973                         unsigned flavor = attr->rta_type;
974                         if (flavor) {
975                                 if (flavor > RTAX_MAX) {
976                                         err = -EINVAL;
977                                         goto out;
978                                 }
979                                 rt->u.dst.metrics[flavor-1] =
980                                         *(u32 *)RTA_DATA(attr);
981                         }
982                         attr = RTA_NEXT(attr, attrlen);
983                 }
984         }
985
986         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
987                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
988         if (!rt->u.dst.metrics[RTAX_MTU-1])
989                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
990         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
991                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
992         rt->u.dst.dev = dev;
993         rt->rt6i_idev = idev;
994         return ip6_ins_rt(rt, nlh, _rtattr, req);
995
996 out:
997         if (dev)
998                 dev_put(dev);
999         if (idev)
1000                 in6_dev_put(idev);
1001         if (rt)
1002                 dst_free((struct dst_entry *) rt);
1003         return err;
1004 }
1005
1006 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1007 {
1008         int err;
1009
1010         write_lock_bh(&rt6_lock);
1011
1012         rt6_reset_dflt_pointer(NULL);
1013
1014         err = fib6_del(rt, nlh, _rtattr, req);
1015         dst_release(&rt->u.dst);
1016
1017         write_unlock_bh(&rt6_lock);
1018
1019         return err;
1020 }
1021
1022 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1023 {
1024         struct fib6_node *fn;
1025         struct rt6_info *rt;
1026         int err = -ESRCH;
1027
1028         read_lock_bh(&rt6_lock);
1029
1030         fn = fib6_locate(&ip6_routing_table,
1031                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1032                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1033         
1034         if (fn) {
1035                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1036                         if (rtmsg->rtmsg_ifindex &&
1037                             (rt->rt6i_dev == NULL ||
1038                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1039                                 continue;
1040                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1041                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1042                                 continue;
1043                         if (rtmsg->rtmsg_metric &&
1044                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1045                                 continue;
1046                         dst_hold(&rt->u.dst);
1047                         read_unlock_bh(&rt6_lock);
1048
1049                         return ip6_del_rt(rt, nlh, _rtattr, req);
1050                 }
1051         }
1052         read_unlock_bh(&rt6_lock);
1053
1054         return err;
1055 }
1056
1057 /*
1058  *      Handle redirects
1059  */
1060 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1061                   struct neighbour *neigh, u8 *lladdr, int on_link)
1062 {
1063         struct rt6_info *rt, *nrt;
1064
1065         /* Locate old route to this destination. */
1066         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1067
1068         if (rt == NULL)
1069                 return;
1070
1071         if (neigh->dev != rt->rt6i_dev)
1072                 goto out;
1073
1074         /*
1075          * Current route is on-link; redirect is always invalid.
1076          * 
1077          * Seems, previous statement is not true. It could
1078          * be node, which looks for us as on-link (f.e. proxy ndisc)
1079          * But then router serving it might decide, that we should
1080          * know truth 8)8) --ANK (980726).
1081          */
1082         if (!(rt->rt6i_flags&RTF_GATEWAY))
1083                 goto out;
1084
1085         /*
1086          *      RFC 2461 specifies that redirects should only be
1087          *      accepted if they come from the nexthop to the target.
1088          *      Due to the way default routers are chosen, this notion
1089          *      is a bit fuzzy and one might need to check all default
1090          *      routers.
1091          */
1092         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1093                 if (rt->rt6i_flags & RTF_DEFAULT) {
1094                         struct rt6_info *rt1;
1095
1096                         read_lock(&rt6_lock);
1097                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1098                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1099                                         dst_hold(&rt1->u.dst);
1100                                         dst_release(&rt->u.dst);
1101                                         read_unlock(&rt6_lock);
1102                                         rt = rt1;
1103                                         goto source_ok;
1104                                 }
1105                         }
1106                         read_unlock(&rt6_lock);
1107                 }
1108                 if (net_ratelimit())
1109                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1110                                "for redirect target\n");
1111                 goto out;
1112         }
1113
1114 source_ok:
1115
1116         /*
1117          *      We have finally decided to accept it.
1118          */
1119
1120         neigh_update(neigh, lladdr, NUD_STALE, 
1121                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1122                      NEIGH_UPDATE_F_OVERRIDE|
1123                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1124                                      NEIGH_UPDATE_F_ISROUTER))
1125                      );
1126
1127         /*
1128          * Redirect received -> path was valid.
1129          * Look, redirects are sent only in response to data packets,
1130          * so that this nexthop apparently is reachable. --ANK
1131          */
1132         dst_confirm(&rt->u.dst);
1133
1134         /* Duplicate redirect: silently ignore. */
1135         if (neigh == rt->u.dst.neighbour)
1136                 goto out;
1137
1138         nrt = ip6_rt_copy(rt);
1139         if (nrt == NULL)
1140                 goto out;
1141
1142         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1143         if (on_link)
1144                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1145
1146         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1147         nrt->rt6i_dst.plen = 128;
1148         nrt->u.dst.flags |= DST_HOST;
1149
1150         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1151         nrt->rt6i_nexthop = neigh_clone(neigh);
1152         /* Reset pmtu, it may be better */
1153         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1154         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1155
1156         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1157                 goto out;
1158
1159         if (rt->rt6i_flags&RTF_CACHE) {
1160                 ip6_del_rt(rt, NULL, NULL, NULL);
1161                 return;
1162         }
1163
1164 out:
1165         dst_release(&rt->u.dst);
1166         return;
1167 }
1168
1169 /*
1170  *      Handle ICMP "packet too big" messages
1171  *      i.e. Path MTU discovery
1172  */
1173
1174 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1175                         struct net_device *dev, u32 pmtu)
1176 {
1177         struct rt6_info *rt, *nrt;
1178         int allfrag = 0;
1179
1180         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1181         if (rt == NULL)
1182                 return;
1183
1184         if (pmtu >= dst_mtu(&rt->u.dst))
1185                 goto out;
1186
1187         if (pmtu < IPV6_MIN_MTU) {
1188                 /*
1189                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1190                  * MTU (1280) and a fragment header should always be included
1191                  * after a node receiving Too Big message reporting PMTU is
1192                  * less than the IPv6 Minimum Link MTU.
1193                  */
1194                 pmtu = IPV6_MIN_MTU;
1195                 allfrag = 1;
1196         }
1197
1198         /* New mtu received -> path was valid.
1199            They are sent only in response to data packets,
1200            so that this nexthop apparently is reachable. --ANK
1201          */
1202         dst_confirm(&rt->u.dst);
1203
1204         /* Host route. If it is static, it would be better
1205            not to override it, but add new one, so that
1206            when cache entry will expire old pmtu
1207            would return automatically.
1208          */
1209         if (rt->rt6i_flags & RTF_CACHE) {
1210                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1211                 if (allfrag)
1212                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1213                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1214                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1215                 goto out;
1216         }
1217
1218         /* Network route.
1219            Two cases are possible:
1220            1. It is connected route. Action: COW
1221            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1222          */
1223         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1224                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1225                 if (!nrt)
1226                         goto out;
1227
1228                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1229                 if (allfrag)
1230                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1231
1232                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1233                  * happened within 5 mins, the recommended timer is 10 mins.
1234                  * Here this route expiration time is set to ip6_rt_mtu_expires
1235                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1236                  * and detecting PMTU increase will be automatically happened.
1237                  */
1238                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1239                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1240
1241                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1242         } else {
1243                 nrt = ip6_rt_copy(rt);
1244                 if (nrt == NULL)
1245                         goto out;
1246                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1247                 nrt->rt6i_dst.plen = 128;
1248                 nrt->u.dst.flags |= DST_HOST;
1249                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1250                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1251                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1252                 if (nrt->rt6i_flags & RTF_REJECT)
1253                         nrt->u.dst.error = rt->u.dst.error;
1254                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1255                 if (allfrag)
1256                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1257                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1258         }
1259
1260 out:
1261         dst_release(&rt->u.dst);
1262 }
1263
1264 /*
1265  *      Misc support functions
1266  */
1267
1268 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1269 {
1270         struct rt6_info *rt = ip6_dst_alloc();
1271
1272         if (rt) {
1273                 rt->u.dst.input = ort->u.dst.input;
1274                 rt->u.dst.output = ort->u.dst.output;
1275
1276                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1277                 rt->u.dst.dev = ort->u.dst.dev;
1278                 if (rt->u.dst.dev)
1279                         dev_hold(rt->u.dst.dev);
1280                 rt->rt6i_idev = ort->rt6i_idev;
1281                 if (rt->rt6i_idev)
1282                         in6_dev_hold(rt->rt6i_idev);
1283                 rt->u.dst.lastuse = jiffies;
1284                 rt->rt6i_expires = 0;
1285
1286                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1287                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1288                 rt->rt6i_metric = 0;
1289
1290                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1291 #ifdef CONFIG_IPV6_SUBTREES
1292                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1293 #endif
1294         }
1295         return rt;
1296 }
1297
1298 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1299 {       
1300         struct rt6_info *rt;
1301         struct fib6_node *fn;
1302
1303         fn = &ip6_routing_table;
1304
1305         write_lock_bh(&rt6_lock);
1306         for (rt = fn->leaf; rt; rt=rt->u.next) {
1307                 if (dev == rt->rt6i_dev &&
1308                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1309                         break;
1310         }
1311         if (rt)
1312                 dst_hold(&rt->u.dst);
1313         write_unlock_bh(&rt6_lock);
1314         return rt;
1315 }
1316
1317 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1318                                      struct net_device *dev)
1319 {
1320         struct in6_rtmsg rtmsg;
1321
1322         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1323         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1324         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1325         rtmsg.rtmsg_metric = 1024;
1326         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1327
1328         rtmsg.rtmsg_ifindex = dev->ifindex;
1329
1330         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1331         return rt6_get_dflt_router(gwaddr, dev);
1332 }
1333
1334 void rt6_purge_dflt_routers(void)
1335 {
1336         struct rt6_info *rt;
1337
1338 restart:
1339         read_lock_bh(&rt6_lock);
1340         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1341                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1342                         dst_hold(&rt->u.dst);
1343
1344                         rt6_reset_dflt_pointer(NULL);
1345
1346                         read_unlock_bh(&rt6_lock);
1347
1348                         ip6_del_rt(rt, NULL, NULL, NULL);
1349
1350                         goto restart;
1351                 }
1352         }
1353         read_unlock_bh(&rt6_lock);
1354 }
1355
1356 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1357 {
1358         struct in6_rtmsg rtmsg;
1359         int err;
1360
1361         switch(cmd) {
1362         case SIOCADDRT:         /* Add a route */
1363         case SIOCDELRT:         /* Delete a route */
1364                 if (!capable(CAP_NET_ADMIN))
1365                         return -EPERM;
1366                 err = copy_from_user(&rtmsg, arg,
1367                                      sizeof(struct in6_rtmsg));
1368                 if (err)
1369                         return -EFAULT;
1370                         
1371                 rtnl_lock();
1372                 switch (cmd) {
1373                 case SIOCADDRT:
1374                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1375                         break;
1376                 case SIOCDELRT:
1377                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1378                         break;
1379                 default:
1380                         err = -EINVAL;
1381                 }
1382                 rtnl_unlock();
1383
1384                 return err;
1385         };
1386
1387         return -EINVAL;
1388 }
1389
1390 /*
1391  *      Drop the packet on the floor
1392  */
1393
1394 static int ip6_pkt_discard(struct sk_buff *skb)
1395 {
1396         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1397         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1398         kfree_skb(skb);
1399         return 0;
1400 }
1401
1402 static int ip6_pkt_discard_out(struct sk_buff *skb)
1403 {
1404         skb->dev = skb->dst->dev;
1405         return ip6_pkt_discard(skb);
1406 }
1407
1408 /*
1409  *      Allocate a dst for local (unicast / anycast) address.
1410  */
1411
1412 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1413                                     const struct in6_addr *addr,
1414                                     int anycast)
1415 {
1416         struct rt6_info *rt = ip6_dst_alloc();
1417
1418         if (rt == NULL)
1419                 return ERR_PTR(-ENOMEM);
1420
1421         dev_hold(&loopback_dev);
1422         in6_dev_hold(idev);
1423
1424         rt->u.dst.flags = DST_HOST;
1425         rt->u.dst.input = ip6_input;
1426         rt->u.dst.output = ip6_output;
1427         rt->rt6i_dev = &loopback_dev;
1428         rt->rt6i_idev = idev;
1429         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1430         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1431         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1432         rt->u.dst.obsolete = -1;
1433
1434         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1435         if (anycast)
1436                 rt->rt6i_flags |= RTF_ANYCAST;
1437         else
1438                 rt->rt6i_flags |= RTF_LOCAL;
1439         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1440         if (rt->rt6i_nexthop == NULL) {
1441                 dst_free((struct dst_entry *) rt);
1442                 return ERR_PTR(-ENOMEM);
1443         }
1444
1445         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1446         rt->rt6i_dst.plen = 128;
1447
1448         atomic_set(&rt->u.dst.__refcnt, 1);
1449
1450         return rt;
1451 }
1452
1453 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1454 {
1455         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1456             rt != &ip6_null_entry) {
1457                 RT6_TRACE("deleted by ifdown %p\n", rt);
1458                 return -1;
1459         }
1460         return 0;
1461 }
1462
1463 void rt6_ifdown(struct net_device *dev)
1464 {
1465         write_lock_bh(&rt6_lock);
1466         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1467         write_unlock_bh(&rt6_lock);
1468 }
1469
1470 struct rt6_mtu_change_arg
1471 {
1472         struct net_device *dev;
1473         unsigned mtu;
1474 };
1475
1476 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1477 {
1478         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1479         struct inet6_dev *idev;
1480
1481         /* In IPv6 pmtu discovery is not optional,
1482            so that RTAX_MTU lock cannot disable it.
1483            We still use this lock to block changes
1484            caused by addrconf/ndisc.
1485         */
1486
1487         idev = __in6_dev_get(arg->dev);
1488         if (idev == NULL)
1489                 return 0;
1490
1491         /* For administrative MTU increase, there is no way to discover
1492            IPv6 PMTU increase, so PMTU increase should be updated here.
1493            Since RFC 1981 doesn't include administrative MTU increase
1494            update PMTU increase is a MUST. (i.e. jumbo frame)
1495          */
1496         /*
1497            If new MTU is less than route PMTU, this new MTU will be the
1498            lowest MTU in the path, update the route PMTU to reflect PMTU
1499            decreases; if new MTU is greater than route PMTU, and the
1500            old MTU is the lowest MTU in the path, update the route PMTU
1501            to reflect the increase. In this case if the other nodes' MTU
1502            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1503            PMTU discouvery.
1504          */
1505         if (rt->rt6i_dev == arg->dev &&
1506             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1507             (dst_mtu(&rt->u.dst) > arg->mtu ||
1508              (dst_mtu(&rt->u.dst) < arg->mtu &&
1509               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1510                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1511         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1512         return 0;
1513 }
1514
1515 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1516 {
1517         struct rt6_mtu_change_arg arg;
1518
1519         arg.dev = dev;
1520         arg.mtu = mtu;
1521         read_lock_bh(&rt6_lock);
1522         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1523         read_unlock_bh(&rt6_lock);
1524 }
1525
1526 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1527                               struct in6_rtmsg *rtmsg)
1528 {
1529         memset(rtmsg, 0, sizeof(*rtmsg));
1530
1531         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1532         rtmsg->rtmsg_src_len = r->rtm_src_len;
1533         rtmsg->rtmsg_flags = RTF_UP;
1534         if (r->rtm_type == RTN_UNREACHABLE)
1535                 rtmsg->rtmsg_flags |= RTF_REJECT;
1536
1537         if (rta[RTA_GATEWAY-1]) {
1538                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1539                         return -EINVAL;
1540                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1541                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1542         }
1543         if (rta[RTA_DST-1]) {
1544                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1545                         return -EINVAL;
1546                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1547         }
1548         if (rta[RTA_SRC-1]) {
1549                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1550                         return -EINVAL;
1551                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1552         }
1553         if (rta[RTA_OIF-1]) {
1554                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1555                         return -EINVAL;
1556                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1557         }
1558         if (rta[RTA_PRIORITY-1]) {
1559                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1560                         return -EINVAL;
1561                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1562         }
1563         return 0;
1564 }
1565
1566 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1567 {
1568         struct rtmsg *r = NLMSG_DATA(nlh);
1569         struct in6_rtmsg rtmsg;
1570
1571         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1572                 return -EINVAL;
1573         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1574 }
1575
1576 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1577 {
1578         struct rtmsg *r = NLMSG_DATA(nlh);
1579         struct in6_rtmsg rtmsg;
1580
1581         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1582                 return -EINVAL;
1583         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1584 }
1585
1586 struct rt6_rtnl_dump_arg
1587 {
1588         struct sk_buff *skb;
1589         struct netlink_callback *cb;
1590 };
1591
1592 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1593                          struct in6_addr *dst, struct in6_addr *src,
1594                          int iif, int type, u32 pid, u32 seq,
1595                          int prefix, unsigned int flags)
1596 {
1597         struct rtmsg *rtm;
1598         struct nlmsghdr  *nlh;
1599         unsigned char    *b = skb->tail;
1600         struct rta_cacheinfo ci;
1601
1602         if (prefix) {   /* user wants prefix routes only */
1603                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1604                         /* success since this is not a prefix route */
1605                         return 1;
1606                 }
1607         }
1608
1609         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1610         rtm = NLMSG_DATA(nlh);
1611         rtm->rtm_family = AF_INET6;
1612         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1613         rtm->rtm_src_len = rt->rt6i_src.plen;
1614         rtm->rtm_tos = 0;
1615         rtm->rtm_table = RT_TABLE_MAIN;
1616         if (rt->rt6i_flags&RTF_REJECT)
1617                 rtm->rtm_type = RTN_UNREACHABLE;
1618         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1619                 rtm->rtm_type = RTN_LOCAL;
1620         else
1621                 rtm->rtm_type = RTN_UNICAST;
1622         rtm->rtm_flags = 0;
1623         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1624         rtm->rtm_protocol = rt->rt6i_protocol;
1625         if (rt->rt6i_flags&RTF_DYNAMIC)
1626                 rtm->rtm_protocol = RTPROT_REDIRECT;
1627         else if (rt->rt6i_flags & RTF_ADDRCONF)
1628                 rtm->rtm_protocol = RTPROT_KERNEL;
1629         else if (rt->rt6i_flags&RTF_DEFAULT)
1630                 rtm->rtm_protocol = RTPROT_RA;
1631
1632         if (rt->rt6i_flags&RTF_CACHE)
1633                 rtm->rtm_flags |= RTM_F_CLONED;
1634
1635         if (dst) {
1636                 RTA_PUT(skb, RTA_DST, 16, dst);
1637                 rtm->rtm_dst_len = 128;
1638         } else if (rtm->rtm_dst_len)
1639                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1640 #ifdef CONFIG_IPV6_SUBTREES
1641         if (src) {
1642                 RTA_PUT(skb, RTA_SRC, 16, src);
1643                 rtm->rtm_src_len = 128;
1644         } else if (rtm->rtm_src_len)
1645                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1646 #endif
1647         if (iif)
1648                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1649         else if (dst) {
1650                 struct in6_addr saddr_buf;
1651                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1652                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1653         }
1654         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1655                 goto rtattr_failure;
1656         if (rt->u.dst.neighbour)
1657                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1658         if (rt->u.dst.dev)
1659                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1660         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1661         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1662         if (rt->rt6i_expires)
1663                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1664         else
1665                 ci.rta_expires = 0;
1666         ci.rta_used = rt->u.dst.__use;
1667         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1668         ci.rta_error = rt->u.dst.error;
1669         ci.rta_id = 0;
1670         ci.rta_ts = 0;
1671         ci.rta_tsage = 0;
1672         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1673         nlh->nlmsg_len = skb->tail - b;
1674         return skb->len;
1675
1676 nlmsg_failure:
1677 rtattr_failure:
1678         skb_trim(skb, b - skb->data);
1679         return -1;
1680 }
1681
1682 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1683 {
1684         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1685         int prefix;
1686
1687         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1688                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1689                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1690         } else
1691                 prefix = 0;
1692
1693         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1694                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1695                      prefix, NLM_F_MULTI);
1696 }
1697
1698 static int fib6_dump_node(struct fib6_walker_t *w)
1699 {
1700         int res;
1701         struct rt6_info *rt;
1702
1703         for (rt = w->leaf; rt; rt = rt->u.next) {
1704                 res = rt6_dump_route(rt, w->args);
1705                 if (res < 0) {
1706                         /* Frame is full, suspend walking */
1707                         w->leaf = rt;
1708                         return 1;
1709                 }
1710                 BUG_TRAP(res!=0);
1711         }
1712         w->leaf = NULL;
1713         return 0;
1714 }
1715
1716 static void fib6_dump_end(struct netlink_callback *cb)
1717 {
1718         struct fib6_walker_t *w = (void*)cb->args[0];
1719
1720         if (w) {
1721                 cb->args[0] = 0;
1722                 fib6_walker_unlink(w);
1723                 kfree(w);
1724         }
1725         cb->done = (void*)cb->args[1];
1726         cb->args[1] = 0;
1727 }
1728
1729 static int fib6_dump_done(struct netlink_callback *cb)
1730 {
1731         fib6_dump_end(cb);
1732         return cb->done ? cb->done(cb) : 0;
1733 }
1734
1735 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1736 {
1737         struct rt6_rtnl_dump_arg arg;
1738         struct fib6_walker_t *w;
1739         int res;
1740
1741         arg.skb = skb;
1742         arg.cb = cb;
1743
1744         w = (void*)cb->args[0];
1745         if (w == NULL) {
1746                 /* New dump:
1747                  * 
1748                  * 1. hook callback destructor.
1749                  */
1750                 cb->args[1] = (long)cb->done;
1751                 cb->done = fib6_dump_done;
1752
1753                 /*
1754                  * 2. allocate and initialize walker.
1755                  */
1756                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1757                 if (w == NULL)
1758                         return -ENOMEM;
1759                 RT6_TRACE("dump<%p", w);
1760                 memset(w, 0, sizeof(*w));
1761                 w->root = &ip6_routing_table;
1762                 w->func = fib6_dump_node;
1763                 w->args = &arg;
1764                 cb->args[0] = (long)w;
1765                 read_lock_bh(&rt6_lock);
1766                 res = fib6_walk(w);
1767                 read_unlock_bh(&rt6_lock);
1768         } else {
1769                 w->args = &arg;
1770                 read_lock_bh(&rt6_lock);
1771                 res = fib6_walk_continue(w);
1772                 read_unlock_bh(&rt6_lock);
1773         }
1774 #if RT6_DEBUG >= 3
1775         if (res <= 0 && skb->len == 0)
1776                 RT6_TRACE("%p>dump end\n", w);
1777 #endif
1778         res = res < 0 ? res : skb->len;
1779         /* res < 0 is an error. (really, impossible)
1780            res == 0 means that dump is complete, but skb still can contain data.
1781            res > 0 dump is not complete, but frame is full.
1782          */
1783         /* Destroy walker, if dump of this table is complete. */
1784         if (res <= 0)
1785                 fib6_dump_end(cb);
1786         return res;
1787 }
1788
1789 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1790 {
1791         struct rtattr **rta = arg;
1792         int iif = 0;
1793         int err = -ENOBUFS;
1794         struct sk_buff *skb;
1795         struct flowi fl;
1796         struct rt6_info *rt;
1797
1798         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1799         if (skb == NULL)
1800                 goto out;
1801
1802         /* Reserve room for dummy headers, this skb can pass
1803            through good chunk of routing engine.
1804          */
1805         skb->mac.raw = skb->data;
1806         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1807
1808         memset(&fl, 0, sizeof(fl));
1809         if (rta[RTA_SRC-1])
1810                 ipv6_addr_copy(&fl.fl6_src,
1811                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1812         if (rta[RTA_DST-1])
1813                 ipv6_addr_copy(&fl.fl6_dst,
1814                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1815
1816         if (rta[RTA_IIF-1])
1817                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1818
1819         if (iif) {
1820                 struct net_device *dev;
1821                 dev = __dev_get_by_index(iif);
1822                 if (!dev) {
1823                         err = -ENODEV;
1824                         goto out_free;
1825                 }
1826         }
1827
1828         fl.oif = 0;
1829         if (rta[RTA_OIF-1])
1830                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1831
1832         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1833
1834         skb->dst = &rt->u.dst;
1835
1836         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1837         err = rt6_fill_node(skb, rt, 
1838                             &fl.fl6_dst, &fl.fl6_src,
1839                             iif,
1840                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1841                             nlh->nlmsg_seq, 0, 0);
1842         if (err < 0) {
1843                 err = -EMSGSIZE;
1844                 goto out_free;
1845         }
1846
1847         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1848         if (err > 0)
1849                 err = 0;
1850 out:
1851         return err;
1852 out_free:
1853         kfree_skb(skb);
1854         goto out;       
1855 }
1856
1857 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1858                         struct netlink_skb_parms *req)
1859 {
1860         struct sk_buff *skb;
1861         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1862         u32 pid = current->pid;
1863         u32 seq = 0;
1864
1865         if (req)
1866                 pid = req->pid;
1867         if (nlh)
1868                 seq = nlh->nlmsg_seq;
1869         
1870         skb = alloc_skb(size, gfp_any());
1871         if (!skb) {
1872                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1873                 return;
1874         }
1875         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1876                 kfree_skb(skb);
1877                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1878                 return;
1879         }
1880         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1881         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1882 }
1883
1884 /*
1885  *      /proc
1886  */
1887
1888 #ifdef CONFIG_PROC_FS
1889
1890 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1891
1892 struct rt6_proc_arg
1893 {
1894         char *buffer;
1895         int offset;
1896         int length;
1897         int skip;
1898         int len;
1899 };
1900
1901 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1902 {
1903         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1904         int i;
1905
1906         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1907                 arg->skip++;
1908                 return 0;
1909         }
1910
1911         if (arg->len >= arg->length)
1912                 return 0;
1913
1914         for (i=0; i<16; i++) {
1915                 sprintf(arg->buffer + arg->len, "%02x",
1916                         rt->rt6i_dst.addr.s6_addr[i]);
1917                 arg->len += 2;
1918         }
1919         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1920                             rt->rt6i_dst.plen);
1921
1922 #ifdef CONFIG_IPV6_SUBTREES
1923         for (i=0; i<16; i++) {
1924                 sprintf(arg->buffer + arg->len, "%02x",
1925                         rt->rt6i_src.addr.s6_addr[i]);
1926                 arg->len += 2;
1927         }
1928         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1929                             rt->rt6i_src.plen);
1930 #else
1931         sprintf(arg->buffer + arg->len,
1932                 "00000000000000000000000000000000 00 ");
1933         arg->len += 36;
1934 #endif
1935
1936         if (rt->rt6i_nexthop) {
1937                 for (i=0; i<16; i++) {
1938                         sprintf(arg->buffer + arg->len, "%02x",
1939                                 rt->rt6i_nexthop->primary_key[i]);
1940                         arg->len += 2;
1941                 }
1942         } else {
1943                 sprintf(arg->buffer + arg->len,
1944                         "00000000000000000000000000000000");
1945                 arg->len += 32;
1946         }
1947         arg->len += sprintf(arg->buffer + arg->len,
1948                             " %08x %08x %08x %08x %8s\n",
1949                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1950                             rt->u.dst.__use, rt->rt6i_flags, 
1951                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1952         return 0;
1953 }
1954
1955 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1956 {
1957         struct rt6_proc_arg arg;
1958         arg.buffer = buffer;
1959         arg.offset = offset;
1960         arg.length = length;
1961         arg.skip = 0;
1962         arg.len = 0;
1963
1964         read_lock_bh(&rt6_lock);
1965         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1966         read_unlock_bh(&rt6_lock);
1967
1968         *start = buffer;
1969         if (offset)
1970                 *start += offset % RT6_INFO_LEN;
1971
1972         arg.len -= offset % RT6_INFO_LEN;
1973
1974         if (arg.len > length)
1975                 arg.len = length;
1976         if (arg.len < 0)
1977                 arg.len = 0;
1978
1979         return arg.len;
1980 }
1981
1982 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1983 {
1984         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1985                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1986                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1987                       rt6_stats.fib_rt_cache,
1988                       atomic_read(&ip6_dst_ops.entries),
1989                       rt6_stats.fib_discarded_routes);
1990
1991         return 0;
1992 }
1993
1994 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1995 {
1996         return single_open(file, rt6_stats_seq_show, NULL);
1997 }
1998
1999 static struct file_operations rt6_stats_seq_fops = {
2000         .owner   = THIS_MODULE,
2001         .open    = rt6_stats_seq_open,
2002         .read    = seq_read,
2003         .llseek  = seq_lseek,
2004         .release = single_release,
2005 };
2006 #endif  /* CONFIG_PROC_FS */
2007
2008 #ifdef CONFIG_SYSCTL
2009
2010 static int flush_delay;
2011
2012 static
2013 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2014                               void __user *buffer, size_t *lenp, loff_t *ppos)
2015 {
2016         if (write) {
2017                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2018                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2019                 return 0;
2020         } else
2021                 return -EINVAL;
2022 }
2023
2024 ctl_table ipv6_route_table[] = {
2025         {
2026                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2027                 .procname       =       "flush",
2028                 .data           =       &flush_delay,
2029                 .maxlen         =       sizeof(int),
2030                 .mode           =       0200,
2031                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2032         },
2033         {
2034                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2035                 .procname       =       "gc_thresh",
2036                 .data           =       &ip6_dst_ops.gc_thresh,
2037                 .maxlen         =       sizeof(int),
2038                 .mode           =       0644,
2039                 .proc_handler   =       &proc_dointvec,
2040         },
2041         {
2042                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2043                 .procname       =       "max_size",
2044                 .data           =       &ip6_rt_max_size,
2045                 .maxlen         =       sizeof(int),
2046                 .mode           =       0644,
2047                 .proc_handler   =       &proc_dointvec,
2048         },
2049         {
2050                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2051                 .procname       =       "gc_min_interval",
2052                 .data           =       &ip6_rt_gc_min_interval,
2053                 .maxlen         =       sizeof(int),
2054                 .mode           =       0644,
2055                 .proc_handler   =       &proc_dointvec_jiffies,
2056                 .strategy       =       &sysctl_jiffies,
2057         },
2058         {
2059                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2060                 .procname       =       "gc_timeout",
2061                 .data           =       &ip6_rt_gc_timeout,
2062                 .maxlen         =       sizeof(int),
2063                 .mode           =       0644,
2064                 .proc_handler   =       &proc_dointvec_jiffies,
2065                 .strategy       =       &sysctl_jiffies,
2066         },
2067         {
2068                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2069                 .procname       =       "gc_interval",
2070                 .data           =       &ip6_rt_gc_interval,
2071                 .maxlen         =       sizeof(int),
2072                 .mode           =       0644,
2073                 .proc_handler   =       &proc_dointvec_jiffies,
2074                 .strategy       =       &sysctl_jiffies,
2075         },
2076         {
2077                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2078                 .procname       =       "gc_elasticity",
2079                 .data           =       &ip6_rt_gc_elasticity,
2080                 .maxlen         =       sizeof(int),
2081                 .mode           =       0644,
2082                 .proc_handler   =       &proc_dointvec_jiffies,
2083                 .strategy       =       &sysctl_jiffies,
2084         },
2085         {
2086                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2087                 .procname       =       "mtu_expires",
2088                 .data           =       &ip6_rt_mtu_expires,
2089                 .maxlen         =       sizeof(int),
2090                 .mode           =       0644,
2091                 .proc_handler   =       &proc_dointvec_jiffies,
2092                 .strategy       =       &sysctl_jiffies,
2093         },
2094         {
2095                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2096                 .procname       =       "min_adv_mss",
2097                 .data           =       &ip6_rt_min_advmss,
2098                 .maxlen         =       sizeof(int),
2099                 .mode           =       0644,
2100                 .proc_handler   =       &proc_dointvec_jiffies,
2101                 .strategy       =       &sysctl_jiffies,
2102         },
2103         {
2104                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2105                 .procname       =       "gc_min_interval_ms",
2106                 .data           =       &ip6_rt_gc_min_interval,
2107                 .maxlen         =       sizeof(int),
2108                 .mode           =       0644,
2109                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2110                 .strategy       =       &sysctl_ms_jiffies,
2111         },
2112         { .ctl_name = 0 }
2113 };
2114
2115 #endif
2116
2117 void __init ip6_route_init(void)
2118 {
2119         struct proc_dir_entry *p;
2120
2121         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2122                                                      sizeof(struct rt6_info),
2123                                                      0, SLAB_HWCACHE_ALIGN,
2124                                                      NULL, NULL);
2125         if (!ip6_dst_ops.kmem_cachep)
2126                 panic("cannot create ip6_dst_cache");
2127
2128         fib6_init();
2129 #ifdef  CONFIG_PROC_FS
2130         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2131         if (p)
2132                 p->owner = THIS_MODULE;
2133
2134         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2135 #endif
2136 #ifdef CONFIG_XFRM
2137         xfrm6_init();
2138 #endif
2139 }
2140
2141 void ip6_route_cleanup(void)
2142 {
2143 #ifdef CONFIG_PROC_FS
2144         proc_net_remove("ipv6_route");
2145         proc_net_remove("rt6_stats");
2146 #endif
2147 #ifdef CONFIG_XFRM
2148         xfrm6_fini();
2149 #endif
2150         rt6_ifdown(NULL);
2151         fib6_gc_cleanup();
2152         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2153 }