]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
[IPV6]: ROUTE: Split up rt6_cow() for future changes.
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void             ip6_dst_destroy(struct dst_entry *);
88 static void             ip6_dst_ifdown(struct dst_entry *,
89                                        struct net_device *dev, int how);
90 static int               ip6_dst_gc(void);
91
92 static int              ip6_pkt_discard(struct sk_buff *skb);
93 static int              ip6_pkt_discard_out(struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97 static struct dst_ops ip6_dst_ops = {
98         .family                 =       AF_INET6,
99         .protocol               =       __constant_htons(ETH_P_IPV6),
100         .gc                     =       ip6_dst_gc,
101         .gc_thresh              =       1024,
102         .check                  =       ip6_dst_check,
103         .destroy                =       ip6_dst_destroy,
104         .ifdown                 =       ip6_dst_ifdown,
105         .negative_advice        =       ip6_negative_advice,
106         .link_failure           =       ip6_link_failure,
107         .update_pmtu            =       ip6_rt_update_pmtu,
108         .entry_size             =       sizeof(struct rt6_info),
109 };
110
111 struct rt6_info ip6_null_entry = {
112         .u = {
113                 .dst = {
114                         .__refcnt       = ATOMIC_INIT(1),
115                         .__use          = 1,
116                         .dev            = &loopback_dev,
117                         .obsolete       = -1,
118                         .error          = -ENETUNREACH,
119                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
120                         .input          = ip6_pkt_discard,
121                         .output         = ip6_pkt_discard_out,
122                         .ops            = &ip6_dst_ops,
123                         .path           = (struct dst_entry*)&ip6_null_entry,
124                 }
125         },
126         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
127         .rt6i_metric    = ~(u32) 0,
128         .rt6i_ref       = ATOMIC_INIT(1),
129 };
130
131 struct fib6_node ip6_routing_table = {
132         .leaf           = &ip6_null_entry,
133         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 };
135
136 /* Protects all the ip6 fib */
137
138 DEFINE_RWLOCK(rt6_lock);
139
140
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
143 {
144         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 }
146
147 static void ip6_dst_destroy(struct dst_entry *dst)
148 {
149         struct rt6_info *rt = (struct rt6_info *)dst;
150         struct inet6_dev *idev = rt->rt6i_idev;
151
152         if (idev != NULL) {
153                 rt->rt6i_idev = NULL;
154                 in6_dev_put(idev);
155         }       
156 }
157
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159                            int how)
160 {
161         struct rt6_info *rt = (struct rt6_info *)dst;
162         struct inet6_dev *idev = rt->rt6i_idev;
163
164         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166                 if (loopback_idev != NULL) {
167                         rt->rt6i_idev = loopback_idev;
168                         in6_dev_put(idev);
169                 }
170         }
171 }
172
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174 {
175         return (rt->rt6i_flags & RTF_EXPIRES &&
176                 time_after(jiffies, rt->rt6i_expires));
177 }
178
179 /*
180  *      Route lookup. Any rt6_lock is implied.
181  */
182
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184                                                     int oif,
185                                                     int strict)
186 {
187         struct rt6_info *local = NULL;
188         struct rt6_info *sprt;
189
190         if (oif) {
191                 for (sprt = rt; sprt; sprt = sprt->u.next) {
192                         struct net_device *dev = sprt->rt6i_dev;
193                         if (dev->ifindex == oif)
194                                 return sprt;
195                         if (dev->flags & IFF_LOOPBACK) {
196                                 if (sprt->rt6i_idev == NULL ||
197                                     sprt->rt6i_idev->dev->ifindex != oif) {
198                                         if (strict && oif)
199                                                 continue;
200                                         if (local && (!oif || 
201                                                       local->rt6i_idev->dev->ifindex == oif))
202                                                 continue;
203                                 }
204                                 local = sprt;
205                         }
206                 }
207
208                 if (local)
209                         return local;
210
211                 if (strict)
212                         return &ip6_null_entry;
213         }
214         return rt;
215 }
216
217 /*
218  *      pointer to the last default router chosen. BH is disabled locally.
219  */
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
224 {
225         spin_lock_bh(&rt6_dflt_lock);
226         if (rt == NULL || rt == rt6_dflt_pointer) {
227                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228                 rt6_dflt_pointer = NULL;
229         }
230         spin_unlock_bh(&rt6_dflt_lock);
231 }
232
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235 {
236         struct rt6_info *match = NULL;
237         struct rt6_info *sprt;
238         int mpri = 0;
239
240         for (sprt = rt; sprt; sprt = sprt->u.next) {
241                 struct neighbour *neigh;
242                 int m = 0;
243
244                 if (!oif ||
245                     (sprt->rt6i_dev &&
246                      sprt->rt6i_dev->ifindex == oif))
247                         m += 8;
248
249                 if (rt6_check_expired(sprt))
250                         continue;
251
252                 if (sprt == rt6_dflt_pointer)
253                         m += 4;
254
255                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256                         read_lock_bh(&neigh->lock);
257                         switch (neigh->nud_state) {
258                         case NUD_REACHABLE:
259                                 m += 3;
260                                 break;
261
262                         case NUD_STALE:
263                         case NUD_DELAY:
264                         case NUD_PROBE:
265                                 m += 2;
266                                 break;
267
268                         case NUD_NOARP:
269                         case NUD_PERMANENT:
270                                 m += 1;
271                                 break;
272
273                         case NUD_INCOMPLETE:
274                         default:
275                                 read_unlock_bh(&neigh->lock);
276                                 continue;
277                         }
278                         read_unlock_bh(&neigh->lock);
279                 } else {
280                         continue;
281                 }
282
283                 if (m > mpri || m >= 12) {
284                         match = sprt;
285                         mpri = m;
286                         if (m >= 12) {
287                                 /* we choose the last default router if it
288                                  * is in (probably) reachable state.
289                                  * If route changed, we should do pmtu
290                                  * discovery. --yoshfuji
291                                  */
292                                 break;
293                         }
294                 }
295         }
296
297         spin_lock(&rt6_dflt_lock);
298         if (!match) {
299                 /*
300                  *      No default routers are known to be reachable.
301                  *      SHOULD round robin
302                  */
303                 if (rt6_dflt_pointer) {
304                         for (sprt = rt6_dflt_pointer->u.next;
305                              sprt; sprt = sprt->u.next) {
306                                 if (sprt->u.dst.obsolete <= 0 &&
307                                     sprt->u.dst.error == 0 &&
308                                     !rt6_check_expired(sprt)) {
309                                         match = sprt;
310                                         break;
311                                 }
312                         }
313                         for (sprt = rt;
314                              !match && sprt;
315                              sprt = sprt->u.next) {
316                                 if (sprt->u.dst.obsolete <= 0 &&
317                                     sprt->u.dst.error == 0 &&
318                                     !rt6_check_expired(sprt)) {
319                                         match = sprt;
320                                         break;
321                                 }
322                                 if (sprt == rt6_dflt_pointer)
323                                         break;
324                         }
325                 }
326         }
327
328         if (match) {
329                 if (rt6_dflt_pointer != match)
330                         RT6_TRACE("changed default router: %p->%p\n",
331                                   rt6_dflt_pointer, match);
332                 rt6_dflt_pointer = match;
333         }
334         spin_unlock(&rt6_dflt_lock);
335
336         if (!match) {
337                 /*
338                  * Last Resort: if no default routers found, 
339                  * use addrconf default route.
340                  * We don't record this route.
341                  */
342                 for (sprt = ip6_routing_table.leaf;
343                      sprt; sprt = sprt->u.next) {
344                         if (!rt6_check_expired(sprt) &&
345                             (sprt->rt6i_flags & RTF_DEFAULT) &&
346                             (!oif ||
347                              (sprt->rt6i_dev &&
348                               sprt->rt6i_dev->ifindex == oif))) {
349                                 match = sprt;
350                                 break;
351                         }
352                 }
353                 if (!match) {
354                         /* no default route.  give up. */
355                         match = &ip6_null_entry;
356                 }
357         }
358
359         return match;
360 }
361
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363                             int oif, int strict)
364 {
365         struct fib6_node *fn;
366         struct rt6_info *rt;
367
368         read_lock_bh(&rt6_lock);
369         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370         rt = rt6_device_match(fn->leaf, oif, strict);
371         dst_hold(&rt->u.dst);
372         rt->u.dst.__use++;
373         read_unlock_bh(&rt6_lock);
374
375         rt->u.dst.lastuse = jiffies;
376         if (rt->u.dst.error == 0)
377                 return rt;
378         dst_release(&rt->u.dst);
379         return NULL;
380 }
381
382 /* ip6_ins_rt is called with FREE rt6_lock.
383    It takes new route entry, the addition fails by any reason the
384    route is freed. In any case, if caller does not hold it, it may
385    be destroyed.
386  */
387
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389                 void *_rtattr, struct netlink_skb_parms *req)
390 {
391         int err;
392
393         write_lock_bh(&rt6_lock);
394         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395         write_unlock_bh(&rt6_lock);
396
397         return err;
398 }
399
400 /* No rt6_lock! If COW failed, the function returns dead route entry
401    with dst->error set to errno value.
402  */
403
404 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
405                                       struct in6_addr *saddr)
406 {
407         struct rt6_info *rt;
408
409         /*
410          *      Clone the route.
411          */
412
413         rt = ip6_rt_copy(ort);
414
415         if (rt) {
416                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417                         if (rt->rt6i_dst.plen != 128 &&
418                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419                                 rt->rt6i_flags |= RTF_ANYCAST;
420                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
421                 }
422
423                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
424                 rt->rt6i_dst.plen = 128;
425                 rt->rt6i_flags |= RTF_CACHE;
426                 rt->u.dst.flags |= DST_HOST;
427
428 #ifdef CONFIG_IPV6_SUBTREES
429                 if (rt->rt6i_src.plen && saddr) {
430                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431                         rt->rt6i_src.plen = 128;
432                 }
433 #endif
434
435                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
436
437         }
438
439         return rt;
440 }
441
442 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
443                                 struct in6_addr *saddr, struct netlink_skb_parms *req)
444 {
445         struct rt6_info *rt = rt6_alloc_cow(ort, daddr, saddr);
446         int err;
447
448         if (!rt) {
449                 dst_hold(&ip6_null_entry.u.dst);
450                 return &ip6_null_entry;
451         }
452
453         dst_hold(&rt->u.dst);
454
455         err = ip6_ins_rt(rt, NULL, NULL, req);
456         if (err)
457                 rt->u.dst.error = err;
458
459         return rt;
460 }
461
462 #define BACKTRACK() \
463 if (rt == &ip6_null_entry && strict) { \
464        while ((fn = fn->parent) != NULL) { \
465                 if (fn->fn_flags & RTN_ROOT) { \
466                         dst_hold(&rt->u.dst); \
467                         goto out; \
468                 } \
469                 if (fn->fn_flags & RTN_RTINFO) \
470                         goto restart; \
471         } \
472 }
473
474
475 void ip6_route_input(struct sk_buff *skb)
476 {
477         struct fib6_node *fn;
478         struct rt6_info *rt;
479         int strict;
480         int attempts = 3;
481
482         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
483
484 relookup:
485         read_lock_bh(&rt6_lock);
486
487         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
488                          &skb->nh.ipv6h->saddr);
489
490 restart:
491         rt = fn->leaf;
492
493         if ((rt->rt6i_flags & RTF_CACHE)) {
494                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
495                 BACKTRACK();
496                 dst_hold(&rt->u.dst);
497                 goto out;
498         }
499
500         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
501         BACKTRACK();
502
503         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
504                 struct rt6_info *nrt;
505                 dst_hold(&rt->u.dst);
506                 read_unlock_bh(&rt6_lock);
507
508                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
509                               &skb->nh.ipv6h->saddr,
510                               &NETLINK_CB(skb));
511
512                 dst_release(&rt->u.dst);
513                 rt = nrt;
514
515                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
516                         goto out2;
517
518                 /* Race condition! In the gap, when rt6_lock was
519                    released someone could insert this route.  Relookup.
520                 */
521                 dst_release(&rt->u.dst);
522                 goto relookup;
523         }
524         dst_hold(&rt->u.dst);
525
526 out:
527         read_unlock_bh(&rt6_lock);
528 out2:
529         rt->u.dst.lastuse = jiffies;
530         rt->u.dst.__use++;
531         skb->dst = (struct dst_entry *) rt;
532 }
533
534 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
535 {
536         struct fib6_node *fn;
537         struct rt6_info *rt;
538         int strict;
539         int attempts = 3;
540
541         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
542
543 relookup:
544         read_lock_bh(&rt6_lock);
545
546         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
547
548 restart:
549         rt = fn->leaf;
550
551         if ((rt->rt6i_flags & RTF_CACHE)) {
552                 rt = rt6_device_match(rt, fl->oif, strict);
553                 BACKTRACK();
554                 dst_hold(&rt->u.dst);
555                 goto out;
556         }
557         if (rt->rt6i_flags & RTF_DEFAULT) {
558                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
559                         rt = rt6_best_dflt(rt, fl->oif);
560         } else {
561                 rt = rt6_device_match(rt, fl->oif, strict);
562                 BACKTRACK();
563         }
564
565         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
566                 struct rt6_info *nrt;
567                 dst_hold(&rt->u.dst);
568                 read_unlock_bh(&rt6_lock);
569
570                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
571
572                 dst_release(&rt->u.dst);
573                 rt = nrt;
574
575                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
576                         goto out2;
577
578                 /* Race condition! In the gap, when rt6_lock was
579                    released someone could insert this route.  Relookup.
580                 */
581                 dst_release(&rt->u.dst);
582                 goto relookup;
583         }
584         dst_hold(&rt->u.dst);
585
586 out:
587         read_unlock_bh(&rt6_lock);
588 out2:
589         rt->u.dst.lastuse = jiffies;
590         rt->u.dst.__use++;
591         return &rt->u.dst;
592 }
593
594
595 /*
596  *      Destination cache support functions
597  */
598
599 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
600 {
601         struct rt6_info *rt;
602
603         rt = (struct rt6_info *) dst;
604
605         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
606                 return dst;
607
608         return NULL;
609 }
610
611 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
612 {
613         struct rt6_info *rt = (struct rt6_info *) dst;
614
615         if (rt) {
616                 if (rt->rt6i_flags & RTF_CACHE)
617                         ip6_del_rt(rt, NULL, NULL, NULL);
618                 else
619                         dst_release(dst);
620         }
621         return NULL;
622 }
623
624 static void ip6_link_failure(struct sk_buff *skb)
625 {
626         struct rt6_info *rt;
627
628         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
629
630         rt = (struct rt6_info *) skb->dst;
631         if (rt) {
632                 if (rt->rt6i_flags&RTF_CACHE) {
633                         dst_set_expires(&rt->u.dst, 0);
634                         rt->rt6i_flags |= RTF_EXPIRES;
635                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
636                         rt->rt6i_node->fn_sernum = -1;
637         }
638 }
639
640 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
641 {
642         struct rt6_info *rt6 = (struct rt6_info*)dst;
643
644         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
645                 rt6->rt6i_flags |= RTF_MODIFIED;
646                 if (mtu < IPV6_MIN_MTU) {
647                         mtu = IPV6_MIN_MTU;
648                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
649                 }
650                 dst->metrics[RTAX_MTU-1] = mtu;
651         }
652 }
653
654 /* Protected by rt6_lock.  */
655 static struct dst_entry *ndisc_dst_gc_list;
656 static int ipv6_get_mtu(struct net_device *dev);
657
658 static inline unsigned int ipv6_advmss(unsigned int mtu)
659 {
660         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
661
662         if (mtu < ip6_rt_min_advmss)
663                 mtu = ip6_rt_min_advmss;
664
665         /*
666          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
667          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
668          * IPV6_MAXPLEN is also valid and means: "any MSS, 
669          * rely only on pmtu discovery"
670          */
671         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
672                 mtu = IPV6_MAXPLEN;
673         return mtu;
674 }
675
676 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
677                                   struct neighbour *neigh,
678                                   struct in6_addr *addr,
679                                   int (*output)(struct sk_buff *))
680 {
681         struct rt6_info *rt;
682         struct inet6_dev *idev = in6_dev_get(dev);
683
684         if (unlikely(idev == NULL))
685                 return NULL;
686
687         rt = ip6_dst_alloc();
688         if (unlikely(rt == NULL)) {
689                 in6_dev_put(idev);
690                 goto out;
691         }
692
693         dev_hold(dev);
694         if (neigh)
695                 neigh_hold(neigh);
696         else
697                 neigh = ndisc_get_neigh(dev, addr);
698
699         rt->rt6i_dev      = dev;
700         rt->rt6i_idev     = idev;
701         rt->rt6i_nexthop  = neigh;
702         atomic_set(&rt->u.dst.__refcnt, 1);
703         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
704         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
705         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
706         rt->u.dst.output  = output;
707
708 #if 0   /* there's no chance to use these for ndisc */
709         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
710                                 ? DST_HOST 
711                                 : 0;
712         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
713         rt->rt6i_dst.plen = 128;
714 #endif
715
716         write_lock_bh(&rt6_lock);
717         rt->u.dst.next = ndisc_dst_gc_list;
718         ndisc_dst_gc_list = &rt->u.dst;
719         write_unlock_bh(&rt6_lock);
720
721         fib6_force_start_gc();
722
723 out:
724         return (struct dst_entry *)rt;
725 }
726
727 int ndisc_dst_gc(int *more)
728 {
729         struct dst_entry *dst, *next, **pprev;
730         int freed;
731
732         next = NULL;
733         pprev = &ndisc_dst_gc_list;
734         freed = 0;
735         while ((dst = *pprev) != NULL) {
736                 if (!atomic_read(&dst->__refcnt)) {
737                         *pprev = dst->next;
738                         dst_free(dst);
739                         freed++;
740                 } else {
741                         pprev = &dst->next;
742                         (*more)++;
743                 }
744         }
745
746         return freed;
747 }
748
749 static int ip6_dst_gc(void)
750 {
751         static unsigned expire = 30*HZ;
752         static unsigned long last_gc;
753         unsigned long now = jiffies;
754
755         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
756             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
757                 goto out;
758
759         expire++;
760         fib6_run_gc(expire);
761         last_gc = now;
762         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
763                 expire = ip6_rt_gc_timeout>>1;
764
765 out:
766         expire -= expire>>ip6_rt_gc_elasticity;
767         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
768 }
769
770 /* Clean host part of a prefix. Not necessary in radix tree,
771    but results in cleaner routing tables.
772
773    Remove it only when all the things will work!
774  */
775
776 static int ipv6_get_mtu(struct net_device *dev)
777 {
778         int mtu = IPV6_MIN_MTU;
779         struct inet6_dev *idev;
780
781         idev = in6_dev_get(dev);
782         if (idev) {
783                 mtu = idev->cnf.mtu6;
784                 in6_dev_put(idev);
785         }
786         return mtu;
787 }
788
789 int ipv6_get_hoplimit(struct net_device *dev)
790 {
791         int hoplimit = ipv6_devconf.hop_limit;
792         struct inet6_dev *idev;
793
794         idev = in6_dev_get(dev);
795         if (idev) {
796                 hoplimit = idev->cnf.hop_limit;
797                 in6_dev_put(idev);
798         }
799         return hoplimit;
800 }
801
802 /*
803  *
804  */
805
806 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
807                 void *_rtattr, struct netlink_skb_parms *req)
808 {
809         int err;
810         struct rtmsg *r;
811         struct rtattr **rta;
812         struct rt6_info *rt = NULL;
813         struct net_device *dev = NULL;
814         struct inet6_dev *idev = NULL;
815         int addr_type;
816
817         rta = (struct rtattr **) _rtattr;
818
819         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
820                 return -EINVAL;
821 #ifndef CONFIG_IPV6_SUBTREES
822         if (rtmsg->rtmsg_src_len)
823                 return -EINVAL;
824 #endif
825         if (rtmsg->rtmsg_ifindex) {
826                 err = -ENODEV;
827                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
828                 if (!dev)
829                         goto out;
830                 idev = in6_dev_get(dev);
831                 if (!idev)
832                         goto out;
833         }
834
835         if (rtmsg->rtmsg_metric == 0)
836                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
837
838         rt = ip6_dst_alloc();
839
840         if (rt == NULL) {
841                 err = -ENOMEM;
842                 goto out;
843         }
844
845         rt->u.dst.obsolete = -1;
846         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
847         if (nlh && (r = NLMSG_DATA(nlh))) {
848                 rt->rt6i_protocol = r->rtm_protocol;
849         } else {
850                 rt->rt6i_protocol = RTPROT_BOOT;
851         }
852
853         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
854
855         if (addr_type & IPV6_ADDR_MULTICAST)
856                 rt->u.dst.input = ip6_mc_input;
857         else
858                 rt->u.dst.input = ip6_forward;
859
860         rt->u.dst.output = ip6_output;
861
862         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
863                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
864         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
865         if (rt->rt6i_dst.plen == 128)
866                rt->u.dst.flags = DST_HOST;
867
868 #ifdef CONFIG_IPV6_SUBTREES
869         ipv6_addr_prefix(&rt->rt6i_src.addr, 
870                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
871         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
872 #endif
873
874         rt->rt6i_metric = rtmsg->rtmsg_metric;
875
876         /* We cannot add true routes via loopback here,
877            they would result in kernel looping; promote them to reject routes
878          */
879         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
880             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
881                 /* hold loopback dev/idev if we haven't done so. */
882                 if (dev != &loopback_dev) {
883                         if (dev) {
884                                 dev_put(dev);
885                                 in6_dev_put(idev);
886                         }
887                         dev = &loopback_dev;
888                         dev_hold(dev);
889                         idev = in6_dev_get(dev);
890                         if (!idev) {
891                                 err = -ENODEV;
892                                 goto out;
893                         }
894                 }
895                 rt->u.dst.output = ip6_pkt_discard_out;
896                 rt->u.dst.input = ip6_pkt_discard;
897                 rt->u.dst.error = -ENETUNREACH;
898                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
899                 goto install_route;
900         }
901
902         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
903                 struct in6_addr *gw_addr;
904                 int gwa_type;
905
906                 gw_addr = &rtmsg->rtmsg_gateway;
907                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
908                 gwa_type = ipv6_addr_type(gw_addr);
909
910                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
911                         struct rt6_info *grt;
912
913                         /* IPv6 strictly inhibits using not link-local
914                            addresses as nexthop address.
915                            Otherwise, router will not able to send redirects.
916                            It is very good, but in some (rare!) circumstances
917                            (SIT, PtP, NBMA NOARP links) it is handy to allow
918                            some exceptions. --ANK
919                          */
920                         err = -EINVAL;
921                         if (!(gwa_type&IPV6_ADDR_UNICAST))
922                                 goto out;
923
924                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
925
926                         err = -EHOSTUNREACH;
927                         if (grt == NULL)
928                                 goto out;
929                         if (dev) {
930                                 if (dev != grt->rt6i_dev) {
931                                         dst_release(&grt->u.dst);
932                                         goto out;
933                                 }
934                         } else {
935                                 dev = grt->rt6i_dev;
936                                 idev = grt->rt6i_idev;
937                                 dev_hold(dev);
938                                 in6_dev_hold(grt->rt6i_idev);
939                         }
940                         if (!(grt->rt6i_flags&RTF_GATEWAY))
941                                 err = 0;
942                         dst_release(&grt->u.dst);
943
944                         if (err)
945                                 goto out;
946                 }
947                 err = -EINVAL;
948                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
949                         goto out;
950         }
951
952         err = -ENODEV;
953         if (dev == NULL)
954                 goto out;
955
956         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
957                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
958                 if (IS_ERR(rt->rt6i_nexthop)) {
959                         err = PTR_ERR(rt->rt6i_nexthop);
960                         rt->rt6i_nexthop = NULL;
961                         goto out;
962                 }
963         }
964
965         rt->rt6i_flags = rtmsg->rtmsg_flags;
966
967 install_route:
968         if (rta && rta[RTA_METRICS-1]) {
969                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
970                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
971
972                 while (RTA_OK(attr, attrlen)) {
973                         unsigned flavor = attr->rta_type;
974                         if (flavor) {
975                                 if (flavor > RTAX_MAX) {
976                                         err = -EINVAL;
977                                         goto out;
978                                 }
979                                 rt->u.dst.metrics[flavor-1] =
980                                         *(u32 *)RTA_DATA(attr);
981                         }
982                         attr = RTA_NEXT(attr, attrlen);
983                 }
984         }
985
986         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
987                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
988         if (!rt->u.dst.metrics[RTAX_MTU-1])
989                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
990         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
991                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
992         rt->u.dst.dev = dev;
993         rt->rt6i_idev = idev;
994         return ip6_ins_rt(rt, nlh, _rtattr, req);
995
996 out:
997         if (dev)
998                 dev_put(dev);
999         if (idev)
1000                 in6_dev_put(idev);
1001         if (rt)
1002                 dst_free((struct dst_entry *) rt);
1003         return err;
1004 }
1005
1006 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1007 {
1008         int err;
1009
1010         write_lock_bh(&rt6_lock);
1011
1012         rt6_reset_dflt_pointer(NULL);
1013
1014         err = fib6_del(rt, nlh, _rtattr, req);
1015         dst_release(&rt->u.dst);
1016
1017         write_unlock_bh(&rt6_lock);
1018
1019         return err;
1020 }
1021
1022 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1023 {
1024         struct fib6_node *fn;
1025         struct rt6_info *rt;
1026         int err = -ESRCH;
1027
1028         read_lock_bh(&rt6_lock);
1029
1030         fn = fib6_locate(&ip6_routing_table,
1031                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1032                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1033         
1034         if (fn) {
1035                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1036                         if (rtmsg->rtmsg_ifindex &&
1037                             (rt->rt6i_dev == NULL ||
1038                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1039                                 continue;
1040                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1041                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1042                                 continue;
1043                         if (rtmsg->rtmsg_metric &&
1044                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1045                                 continue;
1046                         dst_hold(&rt->u.dst);
1047                         read_unlock_bh(&rt6_lock);
1048
1049                         return ip6_del_rt(rt, nlh, _rtattr, req);
1050                 }
1051         }
1052         read_unlock_bh(&rt6_lock);
1053
1054         return err;
1055 }
1056
1057 /*
1058  *      Handle redirects
1059  */
1060 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1061                   struct neighbour *neigh, u8 *lladdr, int on_link)
1062 {
1063         struct rt6_info *rt, *nrt;
1064
1065         /* Locate old route to this destination. */
1066         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1067
1068         if (rt == NULL)
1069                 return;
1070
1071         if (neigh->dev != rt->rt6i_dev)
1072                 goto out;
1073
1074         /*
1075          * Current route is on-link; redirect is always invalid.
1076          * 
1077          * Seems, previous statement is not true. It could
1078          * be node, which looks for us as on-link (f.e. proxy ndisc)
1079          * But then router serving it might decide, that we should
1080          * know truth 8)8) --ANK (980726).
1081          */
1082         if (!(rt->rt6i_flags&RTF_GATEWAY))
1083                 goto out;
1084
1085         /*
1086          *      RFC 2461 specifies that redirects should only be
1087          *      accepted if they come from the nexthop to the target.
1088          *      Due to the way default routers are chosen, this notion
1089          *      is a bit fuzzy and one might need to check all default
1090          *      routers.
1091          */
1092         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1093                 if (rt->rt6i_flags & RTF_DEFAULT) {
1094                         struct rt6_info *rt1;
1095
1096                         read_lock(&rt6_lock);
1097                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1098                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1099                                         dst_hold(&rt1->u.dst);
1100                                         dst_release(&rt->u.dst);
1101                                         read_unlock(&rt6_lock);
1102                                         rt = rt1;
1103                                         goto source_ok;
1104                                 }
1105                         }
1106                         read_unlock(&rt6_lock);
1107                 }
1108                 if (net_ratelimit())
1109                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1110                                "for redirect target\n");
1111                 goto out;
1112         }
1113
1114 source_ok:
1115
1116         /*
1117          *      We have finally decided to accept it.
1118          */
1119
1120         neigh_update(neigh, lladdr, NUD_STALE, 
1121                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1122                      NEIGH_UPDATE_F_OVERRIDE|
1123                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1124                                      NEIGH_UPDATE_F_ISROUTER))
1125                      );
1126
1127         /*
1128          * Redirect received -> path was valid.
1129          * Look, redirects are sent only in response to data packets,
1130          * so that this nexthop apparently is reachable. --ANK
1131          */
1132         dst_confirm(&rt->u.dst);
1133
1134         /* Duplicate redirect: silently ignore. */
1135         if (neigh == rt->u.dst.neighbour)
1136                 goto out;
1137
1138         nrt = ip6_rt_copy(rt);
1139         if (nrt == NULL)
1140                 goto out;
1141
1142         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1143         if (on_link)
1144                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1145
1146         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1147         nrt->rt6i_dst.plen = 128;
1148         nrt->u.dst.flags |= DST_HOST;
1149
1150         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1151         nrt->rt6i_nexthop = neigh_clone(neigh);
1152         /* Reset pmtu, it may be better */
1153         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1154         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1155
1156         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1157                 goto out;
1158
1159         if (rt->rt6i_flags&RTF_CACHE) {
1160                 ip6_del_rt(rt, NULL, NULL, NULL);
1161                 return;
1162         }
1163
1164 out:
1165         dst_release(&rt->u.dst);
1166         return;
1167 }
1168
1169 /*
1170  *      Handle ICMP "packet too big" messages
1171  *      i.e. Path MTU discovery
1172  */
1173
1174 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1175                         struct net_device *dev, u32 pmtu)
1176 {
1177         struct rt6_info *rt, *nrt;
1178         int allfrag = 0;
1179
1180         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1181         if (rt == NULL)
1182                 return;
1183
1184         if (pmtu >= dst_mtu(&rt->u.dst))
1185                 goto out;
1186
1187         if (pmtu < IPV6_MIN_MTU) {
1188                 /*
1189                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1190                  * MTU (1280) and a fragment header should always be included
1191                  * after a node receiving Too Big message reporting PMTU is
1192                  * less than the IPv6 Minimum Link MTU.
1193                  */
1194                 pmtu = IPV6_MIN_MTU;
1195                 allfrag = 1;
1196         }
1197
1198         /* New mtu received -> path was valid.
1199            They are sent only in response to data packets,
1200            so that this nexthop apparently is reachable. --ANK
1201          */
1202         dst_confirm(&rt->u.dst);
1203
1204         /* Host route. If it is static, it would be better
1205            not to override it, but add new one, so that
1206            when cache entry will expire old pmtu
1207            would return automatically.
1208          */
1209         if (rt->rt6i_flags & RTF_CACHE) {
1210                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1211                 if (allfrag)
1212                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1213                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1214                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1215                 goto out;
1216         }
1217
1218         /* Network route.
1219            Two cases are possible:
1220            1. It is connected route. Action: COW
1221            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1222          */
1223         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1224                 nrt = rt6_cow(rt, daddr, saddr, NULL);
1225                 if (!nrt->u.dst.error) {
1226                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1227                         if (allfrag)
1228                                 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1229                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1230                            happened within 5 mins, the recommended timer is 10 mins.
1231                            Here this route expiration time is set to ip6_rt_mtu_expires
1232                            which is 10 mins. After 10 mins the decreased pmtu is expired
1233                            and detecting PMTU increase will be automatically happened.
1234                          */
1235                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1236                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1237                 }
1238                 dst_release(&nrt->u.dst);
1239         } else {
1240                 nrt = ip6_rt_copy(rt);
1241                 if (nrt == NULL)
1242                         goto out;
1243                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1244                 nrt->rt6i_dst.plen = 128;
1245                 nrt->u.dst.flags |= DST_HOST;
1246                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1247                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1248                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1249                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1250                 if (allfrag)
1251                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1252                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1253         }
1254
1255 out:
1256         dst_release(&rt->u.dst);
1257 }
1258
1259 /*
1260  *      Misc support functions
1261  */
1262
1263 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1264 {
1265         struct rt6_info *rt = ip6_dst_alloc();
1266
1267         if (rt) {
1268                 rt->u.dst.input = ort->u.dst.input;
1269                 rt->u.dst.output = ort->u.dst.output;
1270
1271                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1272                 rt->u.dst.dev = ort->u.dst.dev;
1273                 if (rt->u.dst.dev)
1274                         dev_hold(rt->u.dst.dev);
1275                 rt->rt6i_idev = ort->rt6i_idev;
1276                 if (rt->rt6i_idev)
1277                         in6_dev_hold(rt->rt6i_idev);
1278                 rt->u.dst.lastuse = jiffies;
1279                 rt->rt6i_expires = 0;
1280
1281                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1282                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1283                 rt->rt6i_metric = 0;
1284
1285                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1286 #ifdef CONFIG_IPV6_SUBTREES
1287                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1288 #endif
1289         }
1290         return rt;
1291 }
1292
1293 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1294 {       
1295         struct rt6_info *rt;
1296         struct fib6_node *fn;
1297
1298         fn = &ip6_routing_table;
1299
1300         write_lock_bh(&rt6_lock);
1301         for (rt = fn->leaf; rt; rt=rt->u.next) {
1302                 if (dev == rt->rt6i_dev &&
1303                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1304                         break;
1305         }
1306         if (rt)
1307                 dst_hold(&rt->u.dst);
1308         write_unlock_bh(&rt6_lock);
1309         return rt;
1310 }
1311
1312 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1313                                      struct net_device *dev)
1314 {
1315         struct in6_rtmsg rtmsg;
1316
1317         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1318         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1319         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1320         rtmsg.rtmsg_metric = 1024;
1321         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1322
1323         rtmsg.rtmsg_ifindex = dev->ifindex;
1324
1325         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1326         return rt6_get_dflt_router(gwaddr, dev);
1327 }
1328
1329 void rt6_purge_dflt_routers(void)
1330 {
1331         struct rt6_info *rt;
1332
1333 restart:
1334         read_lock_bh(&rt6_lock);
1335         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1336                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1337                         dst_hold(&rt->u.dst);
1338
1339                         rt6_reset_dflt_pointer(NULL);
1340
1341                         read_unlock_bh(&rt6_lock);
1342
1343                         ip6_del_rt(rt, NULL, NULL, NULL);
1344
1345                         goto restart;
1346                 }
1347         }
1348         read_unlock_bh(&rt6_lock);
1349 }
1350
1351 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1352 {
1353         struct in6_rtmsg rtmsg;
1354         int err;
1355
1356         switch(cmd) {
1357         case SIOCADDRT:         /* Add a route */
1358         case SIOCDELRT:         /* Delete a route */
1359                 if (!capable(CAP_NET_ADMIN))
1360                         return -EPERM;
1361                 err = copy_from_user(&rtmsg, arg,
1362                                      sizeof(struct in6_rtmsg));
1363                 if (err)
1364                         return -EFAULT;
1365                         
1366                 rtnl_lock();
1367                 switch (cmd) {
1368                 case SIOCADDRT:
1369                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1370                         break;
1371                 case SIOCDELRT:
1372                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1373                         break;
1374                 default:
1375                         err = -EINVAL;
1376                 }
1377                 rtnl_unlock();
1378
1379                 return err;
1380         };
1381
1382         return -EINVAL;
1383 }
1384
1385 /*
1386  *      Drop the packet on the floor
1387  */
1388
1389 static int ip6_pkt_discard(struct sk_buff *skb)
1390 {
1391         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1392         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1393         kfree_skb(skb);
1394         return 0;
1395 }
1396
1397 static int ip6_pkt_discard_out(struct sk_buff *skb)
1398 {
1399         skb->dev = skb->dst->dev;
1400         return ip6_pkt_discard(skb);
1401 }
1402
1403 /*
1404  *      Allocate a dst for local (unicast / anycast) address.
1405  */
1406
1407 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1408                                     const struct in6_addr *addr,
1409                                     int anycast)
1410 {
1411         struct rt6_info *rt = ip6_dst_alloc();
1412
1413         if (rt == NULL)
1414                 return ERR_PTR(-ENOMEM);
1415
1416         dev_hold(&loopback_dev);
1417         in6_dev_hold(idev);
1418
1419         rt->u.dst.flags = DST_HOST;
1420         rt->u.dst.input = ip6_input;
1421         rt->u.dst.output = ip6_output;
1422         rt->rt6i_dev = &loopback_dev;
1423         rt->rt6i_idev = idev;
1424         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1425         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1426         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1427         rt->u.dst.obsolete = -1;
1428
1429         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1430         if (anycast)
1431                 rt->rt6i_flags |= RTF_ANYCAST;
1432         else
1433                 rt->rt6i_flags |= RTF_LOCAL;
1434         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1435         if (rt->rt6i_nexthop == NULL) {
1436                 dst_free((struct dst_entry *) rt);
1437                 return ERR_PTR(-ENOMEM);
1438         }
1439
1440         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1441         rt->rt6i_dst.plen = 128;
1442
1443         atomic_set(&rt->u.dst.__refcnt, 1);
1444
1445         return rt;
1446 }
1447
1448 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1449 {
1450         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1451             rt != &ip6_null_entry) {
1452                 RT6_TRACE("deleted by ifdown %p\n", rt);
1453                 return -1;
1454         }
1455         return 0;
1456 }
1457
1458 void rt6_ifdown(struct net_device *dev)
1459 {
1460         write_lock_bh(&rt6_lock);
1461         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1462         write_unlock_bh(&rt6_lock);
1463 }
1464
1465 struct rt6_mtu_change_arg
1466 {
1467         struct net_device *dev;
1468         unsigned mtu;
1469 };
1470
1471 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1472 {
1473         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1474         struct inet6_dev *idev;
1475
1476         /* In IPv6 pmtu discovery is not optional,
1477            so that RTAX_MTU lock cannot disable it.
1478            We still use this lock to block changes
1479            caused by addrconf/ndisc.
1480         */
1481
1482         idev = __in6_dev_get(arg->dev);
1483         if (idev == NULL)
1484                 return 0;
1485
1486         /* For administrative MTU increase, there is no way to discover
1487            IPv6 PMTU increase, so PMTU increase should be updated here.
1488            Since RFC 1981 doesn't include administrative MTU increase
1489            update PMTU increase is a MUST. (i.e. jumbo frame)
1490          */
1491         /*
1492            If new MTU is less than route PMTU, this new MTU will be the
1493            lowest MTU in the path, update the route PMTU to reflect PMTU
1494            decreases; if new MTU is greater than route PMTU, and the
1495            old MTU is the lowest MTU in the path, update the route PMTU
1496            to reflect the increase. In this case if the other nodes' MTU
1497            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1498            PMTU discouvery.
1499          */
1500         if (rt->rt6i_dev == arg->dev &&
1501             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1502             (dst_mtu(&rt->u.dst) > arg->mtu ||
1503              (dst_mtu(&rt->u.dst) < arg->mtu &&
1504               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1505                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1506         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1507         return 0;
1508 }
1509
1510 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1511 {
1512         struct rt6_mtu_change_arg arg;
1513
1514         arg.dev = dev;
1515         arg.mtu = mtu;
1516         read_lock_bh(&rt6_lock);
1517         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1518         read_unlock_bh(&rt6_lock);
1519 }
1520
1521 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1522                               struct in6_rtmsg *rtmsg)
1523 {
1524         memset(rtmsg, 0, sizeof(*rtmsg));
1525
1526         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1527         rtmsg->rtmsg_src_len = r->rtm_src_len;
1528         rtmsg->rtmsg_flags = RTF_UP;
1529         if (r->rtm_type == RTN_UNREACHABLE)
1530                 rtmsg->rtmsg_flags |= RTF_REJECT;
1531
1532         if (rta[RTA_GATEWAY-1]) {
1533                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1534                         return -EINVAL;
1535                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1536                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1537         }
1538         if (rta[RTA_DST-1]) {
1539                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1540                         return -EINVAL;
1541                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1542         }
1543         if (rta[RTA_SRC-1]) {
1544                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1545                         return -EINVAL;
1546                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1547         }
1548         if (rta[RTA_OIF-1]) {
1549                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1550                         return -EINVAL;
1551                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1552         }
1553         if (rta[RTA_PRIORITY-1]) {
1554                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1555                         return -EINVAL;
1556                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1557         }
1558         return 0;
1559 }
1560
1561 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1562 {
1563         struct rtmsg *r = NLMSG_DATA(nlh);
1564         struct in6_rtmsg rtmsg;
1565
1566         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1567                 return -EINVAL;
1568         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1569 }
1570
1571 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1572 {
1573         struct rtmsg *r = NLMSG_DATA(nlh);
1574         struct in6_rtmsg rtmsg;
1575
1576         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1577                 return -EINVAL;
1578         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1579 }
1580
1581 struct rt6_rtnl_dump_arg
1582 {
1583         struct sk_buff *skb;
1584         struct netlink_callback *cb;
1585 };
1586
1587 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1588                          struct in6_addr *dst, struct in6_addr *src,
1589                          int iif, int type, u32 pid, u32 seq,
1590                          int prefix, unsigned int flags)
1591 {
1592         struct rtmsg *rtm;
1593         struct nlmsghdr  *nlh;
1594         unsigned char    *b = skb->tail;
1595         struct rta_cacheinfo ci;
1596
1597         if (prefix) {   /* user wants prefix routes only */
1598                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1599                         /* success since this is not a prefix route */
1600                         return 1;
1601                 }
1602         }
1603
1604         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1605         rtm = NLMSG_DATA(nlh);
1606         rtm->rtm_family = AF_INET6;
1607         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1608         rtm->rtm_src_len = rt->rt6i_src.plen;
1609         rtm->rtm_tos = 0;
1610         rtm->rtm_table = RT_TABLE_MAIN;
1611         if (rt->rt6i_flags&RTF_REJECT)
1612                 rtm->rtm_type = RTN_UNREACHABLE;
1613         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1614                 rtm->rtm_type = RTN_LOCAL;
1615         else
1616                 rtm->rtm_type = RTN_UNICAST;
1617         rtm->rtm_flags = 0;
1618         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1619         rtm->rtm_protocol = rt->rt6i_protocol;
1620         if (rt->rt6i_flags&RTF_DYNAMIC)
1621                 rtm->rtm_protocol = RTPROT_REDIRECT;
1622         else if (rt->rt6i_flags & RTF_ADDRCONF)
1623                 rtm->rtm_protocol = RTPROT_KERNEL;
1624         else if (rt->rt6i_flags&RTF_DEFAULT)
1625                 rtm->rtm_protocol = RTPROT_RA;
1626
1627         if (rt->rt6i_flags&RTF_CACHE)
1628                 rtm->rtm_flags |= RTM_F_CLONED;
1629
1630         if (dst) {
1631                 RTA_PUT(skb, RTA_DST, 16, dst);
1632                 rtm->rtm_dst_len = 128;
1633         } else if (rtm->rtm_dst_len)
1634                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1635 #ifdef CONFIG_IPV6_SUBTREES
1636         if (src) {
1637                 RTA_PUT(skb, RTA_SRC, 16, src);
1638                 rtm->rtm_src_len = 128;
1639         } else if (rtm->rtm_src_len)
1640                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1641 #endif
1642         if (iif)
1643                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1644         else if (dst) {
1645                 struct in6_addr saddr_buf;
1646                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1647                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1648         }
1649         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1650                 goto rtattr_failure;
1651         if (rt->u.dst.neighbour)
1652                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1653         if (rt->u.dst.dev)
1654                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1655         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1656         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1657         if (rt->rt6i_expires)
1658                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1659         else
1660                 ci.rta_expires = 0;
1661         ci.rta_used = rt->u.dst.__use;
1662         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1663         ci.rta_error = rt->u.dst.error;
1664         ci.rta_id = 0;
1665         ci.rta_ts = 0;
1666         ci.rta_tsage = 0;
1667         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1668         nlh->nlmsg_len = skb->tail - b;
1669         return skb->len;
1670
1671 nlmsg_failure:
1672 rtattr_failure:
1673         skb_trim(skb, b - skb->data);
1674         return -1;
1675 }
1676
1677 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1678 {
1679         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1680         int prefix;
1681
1682         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1683                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1684                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1685         } else
1686                 prefix = 0;
1687
1688         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1689                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1690                      prefix, NLM_F_MULTI);
1691 }
1692
1693 static int fib6_dump_node(struct fib6_walker_t *w)
1694 {
1695         int res;
1696         struct rt6_info *rt;
1697
1698         for (rt = w->leaf; rt; rt = rt->u.next) {
1699                 res = rt6_dump_route(rt, w->args);
1700                 if (res < 0) {
1701                         /* Frame is full, suspend walking */
1702                         w->leaf = rt;
1703                         return 1;
1704                 }
1705                 BUG_TRAP(res!=0);
1706         }
1707         w->leaf = NULL;
1708         return 0;
1709 }
1710
1711 static void fib6_dump_end(struct netlink_callback *cb)
1712 {
1713         struct fib6_walker_t *w = (void*)cb->args[0];
1714
1715         if (w) {
1716                 cb->args[0] = 0;
1717                 fib6_walker_unlink(w);
1718                 kfree(w);
1719         }
1720         cb->done = (void*)cb->args[1];
1721         cb->args[1] = 0;
1722 }
1723
1724 static int fib6_dump_done(struct netlink_callback *cb)
1725 {
1726         fib6_dump_end(cb);
1727         return cb->done ? cb->done(cb) : 0;
1728 }
1729
1730 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1731 {
1732         struct rt6_rtnl_dump_arg arg;
1733         struct fib6_walker_t *w;
1734         int res;
1735
1736         arg.skb = skb;
1737         arg.cb = cb;
1738
1739         w = (void*)cb->args[0];
1740         if (w == NULL) {
1741                 /* New dump:
1742                  * 
1743                  * 1. hook callback destructor.
1744                  */
1745                 cb->args[1] = (long)cb->done;
1746                 cb->done = fib6_dump_done;
1747
1748                 /*
1749                  * 2. allocate and initialize walker.
1750                  */
1751                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1752                 if (w == NULL)
1753                         return -ENOMEM;
1754                 RT6_TRACE("dump<%p", w);
1755                 memset(w, 0, sizeof(*w));
1756                 w->root = &ip6_routing_table;
1757                 w->func = fib6_dump_node;
1758                 w->args = &arg;
1759                 cb->args[0] = (long)w;
1760                 read_lock_bh(&rt6_lock);
1761                 res = fib6_walk(w);
1762                 read_unlock_bh(&rt6_lock);
1763         } else {
1764                 w->args = &arg;
1765                 read_lock_bh(&rt6_lock);
1766                 res = fib6_walk_continue(w);
1767                 read_unlock_bh(&rt6_lock);
1768         }
1769 #if RT6_DEBUG >= 3
1770         if (res <= 0 && skb->len == 0)
1771                 RT6_TRACE("%p>dump end\n", w);
1772 #endif
1773         res = res < 0 ? res : skb->len;
1774         /* res < 0 is an error. (really, impossible)
1775            res == 0 means that dump is complete, but skb still can contain data.
1776            res > 0 dump is not complete, but frame is full.
1777          */
1778         /* Destroy walker, if dump of this table is complete. */
1779         if (res <= 0)
1780                 fib6_dump_end(cb);
1781         return res;
1782 }
1783
1784 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1785 {
1786         struct rtattr **rta = arg;
1787         int iif = 0;
1788         int err = -ENOBUFS;
1789         struct sk_buff *skb;
1790         struct flowi fl;
1791         struct rt6_info *rt;
1792
1793         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1794         if (skb == NULL)
1795                 goto out;
1796
1797         /* Reserve room for dummy headers, this skb can pass
1798            through good chunk of routing engine.
1799          */
1800         skb->mac.raw = skb->data;
1801         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1802
1803         memset(&fl, 0, sizeof(fl));
1804         if (rta[RTA_SRC-1])
1805                 ipv6_addr_copy(&fl.fl6_src,
1806                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1807         if (rta[RTA_DST-1])
1808                 ipv6_addr_copy(&fl.fl6_dst,
1809                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1810
1811         if (rta[RTA_IIF-1])
1812                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1813
1814         if (iif) {
1815                 struct net_device *dev;
1816                 dev = __dev_get_by_index(iif);
1817                 if (!dev) {
1818                         err = -ENODEV;
1819                         goto out_free;
1820                 }
1821         }
1822
1823         fl.oif = 0;
1824         if (rta[RTA_OIF-1])
1825                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1826
1827         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1828
1829         skb->dst = &rt->u.dst;
1830
1831         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1832         err = rt6_fill_node(skb, rt, 
1833                             &fl.fl6_dst, &fl.fl6_src,
1834                             iif,
1835                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1836                             nlh->nlmsg_seq, 0, 0);
1837         if (err < 0) {
1838                 err = -EMSGSIZE;
1839                 goto out_free;
1840         }
1841
1842         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1843         if (err > 0)
1844                 err = 0;
1845 out:
1846         return err;
1847 out_free:
1848         kfree_skb(skb);
1849         goto out;       
1850 }
1851
1852 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1853                         struct netlink_skb_parms *req)
1854 {
1855         struct sk_buff *skb;
1856         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1857         u32 pid = current->pid;
1858         u32 seq = 0;
1859
1860         if (req)
1861                 pid = req->pid;
1862         if (nlh)
1863                 seq = nlh->nlmsg_seq;
1864         
1865         skb = alloc_skb(size, gfp_any());
1866         if (!skb) {
1867                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1868                 return;
1869         }
1870         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1871                 kfree_skb(skb);
1872                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1873                 return;
1874         }
1875         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1876         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1877 }
1878
1879 /*
1880  *      /proc
1881  */
1882
1883 #ifdef CONFIG_PROC_FS
1884
1885 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1886
1887 struct rt6_proc_arg
1888 {
1889         char *buffer;
1890         int offset;
1891         int length;
1892         int skip;
1893         int len;
1894 };
1895
1896 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1897 {
1898         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1899         int i;
1900
1901         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1902                 arg->skip++;
1903                 return 0;
1904         }
1905
1906         if (arg->len >= arg->length)
1907                 return 0;
1908
1909         for (i=0; i<16; i++) {
1910                 sprintf(arg->buffer + arg->len, "%02x",
1911                         rt->rt6i_dst.addr.s6_addr[i]);
1912                 arg->len += 2;
1913         }
1914         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1915                             rt->rt6i_dst.plen);
1916
1917 #ifdef CONFIG_IPV6_SUBTREES
1918         for (i=0; i<16; i++) {
1919                 sprintf(arg->buffer + arg->len, "%02x",
1920                         rt->rt6i_src.addr.s6_addr[i]);
1921                 arg->len += 2;
1922         }
1923         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1924                             rt->rt6i_src.plen);
1925 #else
1926         sprintf(arg->buffer + arg->len,
1927                 "00000000000000000000000000000000 00 ");
1928         arg->len += 36;
1929 #endif
1930
1931         if (rt->rt6i_nexthop) {
1932                 for (i=0; i<16; i++) {
1933                         sprintf(arg->buffer + arg->len, "%02x",
1934                                 rt->rt6i_nexthop->primary_key[i]);
1935                         arg->len += 2;
1936                 }
1937         } else {
1938                 sprintf(arg->buffer + arg->len,
1939                         "00000000000000000000000000000000");
1940                 arg->len += 32;
1941         }
1942         arg->len += sprintf(arg->buffer + arg->len,
1943                             " %08x %08x %08x %08x %8s\n",
1944                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1945                             rt->u.dst.__use, rt->rt6i_flags, 
1946                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1947         return 0;
1948 }
1949
1950 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1951 {
1952         struct rt6_proc_arg arg;
1953         arg.buffer = buffer;
1954         arg.offset = offset;
1955         arg.length = length;
1956         arg.skip = 0;
1957         arg.len = 0;
1958
1959         read_lock_bh(&rt6_lock);
1960         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1961         read_unlock_bh(&rt6_lock);
1962
1963         *start = buffer;
1964         if (offset)
1965                 *start += offset % RT6_INFO_LEN;
1966
1967         arg.len -= offset % RT6_INFO_LEN;
1968
1969         if (arg.len > length)
1970                 arg.len = length;
1971         if (arg.len < 0)
1972                 arg.len = 0;
1973
1974         return arg.len;
1975 }
1976
1977 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1978 {
1979         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1980                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1981                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1982                       rt6_stats.fib_rt_cache,
1983                       atomic_read(&ip6_dst_ops.entries),
1984                       rt6_stats.fib_discarded_routes);
1985
1986         return 0;
1987 }
1988
1989 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1990 {
1991         return single_open(file, rt6_stats_seq_show, NULL);
1992 }
1993
1994 static struct file_operations rt6_stats_seq_fops = {
1995         .owner   = THIS_MODULE,
1996         .open    = rt6_stats_seq_open,
1997         .read    = seq_read,
1998         .llseek  = seq_lseek,
1999         .release = single_release,
2000 };
2001 #endif  /* CONFIG_PROC_FS */
2002
2003 #ifdef CONFIG_SYSCTL
2004
2005 static int flush_delay;
2006
2007 static
2008 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2009                               void __user *buffer, size_t *lenp, loff_t *ppos)
2010 {
2011         if (write) {
2012                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2013                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2014                 return 0;
2015         } else
2016                 return -EINVAL;
2017 }
2018
2019 ctl_table ipv6_route_table[] = {
2020         {
2021                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2022                 .procname       =       "flush",
2023                 .data           =       &flush_delay,
2024                 .maxlen         =       sizeof(int),
2025                 .mode           =       0200,
2026                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2027         },
2028         {
2029                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2030                 .procname       =       "gc_thresh",
2031                 .data           =       &ip6_dst_ops.gc_thresh,
2032                 .maxlen         =       sizeof(int),
2033                 .mode           =       0644,
2034                 .proc_handler   =       &proc_dointvec,
2035         },
2036         {
2037                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2038                 .procname       =       "max_size",
2039                 .data           =       &ip6_rt_max_size,
2040                 .maxlen         =       sizeof(int),
2041                 .mode           =       0644,
2042                 .proc_handler   =       &proc_dointvec,
2043         },
2044         {
2045                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2046                 .procname       =       "gc_min_interval",
2047                 .data           =       &ip6_rt_gc_min_interval,
2048                 .maxlen         =       sizeof(int),
2049                 .mode           =       0644,
2050                 .proc_handler   =       &proc_dointvec_jiffies,
2051                 .strategy       =       &sysctl_jiffies,
2052         },
2053         {
2054                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2055                 .procname       =       "gc_timeout",
2056                 .data           =       &ip6_rt_gc_timeout,
2057                 .maxlen         =       sizeof(int),
2058                 .mode           =       0644,
2059                 .proc_handler   =       &proc_dointvec_jiffies,
2060                 .strategy       =       &sysctl_jiffies,
2061         },
2062         {
2063                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2064                 .procname       =       "gc_interval",
2065                 .data           =       &ip6_rt_gc_interval,
2066                 .maxlen         =       sizeof(int),
2067                 .mode           =       0644,
2068                 .proc_handler   =       &proc_dointvec_jiffies,
2069                 .strategy       =       &sysctl_jiffies,
2070         },
2071         {
2072                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2073                 .procname       =       "gc_elasticity",
2074                 .data           =       &ip6_rt_gc_elasticity,
2075                 .maxlen         =       sizeof(int),
2076                 .mode           =       0644,
2077                 .proc_handler   =       &proc_dointvec_jiffies,
2078                 .strategy       =       &sysctl_jiffies,
2079         },
2080         {
2081                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2082                 .procname       =       "mtu_expires",
2083                 .data           =       &ip6_rt_mtu_expires,
2084                 .maxlen         =       sizeof(int),
2085                 .mode           =       0644,
2086                 .proc_handler   =       &proc_dointvec_jiffies,
2087                 .strategy       =       &sysctl_jiffies,
2088         },
2089         {
2090                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2091                 .procname       =       "min_adv_mss",
2092                 .data           =       &ip6_rt_min_advmss,
2093                 .maxlen         =       sizeof(int),
2094                 .mode           =       0644,
2095                 .proc_handler   =       &proc_dointvec_jiffies,
2096                 .strategy       =       &sysctl_jiffies,
2097         },
2098         {
2099                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2100                 .procname       =       "gc_min_interval_ms",
2101                 .data           =       &ip6_rt_gc_min_interval,
2102                 .maxlen         =       sizeof(int),
2103                 .mode           =       0644,
2104                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2105                 .strategy       =       &sysctl_ms_jiffies,
2106         },
2107         { .ctl_name = 0 }
2108 };
2109
2110 #endif
2111
2112 void __init ip6_route_init(void)
2113 {
2114         struct proc_dir_entry *p;
2115
2116         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2117                                                      sizeof(struct rt6_info),
2118                                                      0, SLAB_HWCACHE_ALIGN,
2119                                                      NULL, NULL);
2120         if (!ip6_dst_ops.kmem_cachep)
2121                 panic("cannot create ip6_dst_cache");
2122
2123         fib6_init();
2124 #ifdef  CONFIG_PROC_FS
2125         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2126         if (p)
2127                 p->owner = THIS_MODULE;
2128
2129         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2130 #endif
2131 #ifdef CONFIG_XFRM
2132         xfrm6_init();
2133 #endif
2134 }
2135
2136 void ip6_route_cleanup(void)
2137 {
2138 #ifdef CONFIG_PROC_FS
2139         proc_net_remove("ipv6_route");
2140         proc_net_remove("rt6_stats");
2141 #endif
2142 #ifdef CONFIG_XFRM
2143         xfrm6_fini();
2144 #endif
2145         rt6_ifdown(NULL);
2146         fib6_gc_cleanup();
2147         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2148 }