]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
[IPV6] ROUTE: Try to use router which is not known unreachable.
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59
60 #include <asm/uaccess.h>
61
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76
77 #define CLONE_OFFLINK_ROUTE 0
78
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void             ip6_dst_destroy(struct dst_entry *);
91 static void             ip6_dst_ifdown(struct dst_entry *,
92                                        struct net_device *dev, int how);
93 static int               ip6_dst_gc(void);
94
95 static int              ip6_pkt_discard(struct sk_buff *skb);
96 static int              ip6_pkt_discard_out(struct sk_buff *skb);
97 static void             ip6_link_failure(struct sk_buff *skb);
98 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102                                            struct in6_addr *gwaddr, int ifindex,
103                                            unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105                                            struct in6_addr *gwaddr, int ifindex);
106 #endif
107
108 static struct dst_ops ip6_dst_ops = {
109         .family                 =       AF_INET6,
110         .protocol               =       __constant_htons(ETH_P_IPV6),
111         .gc                     =       ip6_dst_gc,
112         .gc_thresh              =       1024,
113         .check                  =       ip6_dst_check,
114         .destroy                =       ip6_dst_destroy,
115         .ifdown                 =       ip6_dst_ifdown,
116         .negative_advice        =       ip6_negative_advice,
117         .link_failure           =       ip6_link_failure,
118         .update_pmtu            =       ip6_rt_update_pmtu,
119         .entry_size             =       sizeof(struct rt6_info),
120 };
121
122 struct rt6_info ip6_null_entry = {
123         .u = {
124                 .dst = {
125                         .__refcnt       = ATOMIC_INIT(1),
126                         .__use          = 1,
127                         .dev            = &loopback_dev,
128                         .obsolete       = -1,
129                         .error          = -ENETUNREACH,
130                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
131                         .input          = ip6_pkt_discard,
132                         .output         = ip6_pkt_discard_out,
133                         .ops            = &ip6_dst_ops,
134                         .path           = (struct dst_entry*)&ip6_null_entry,
135                 }
136         },
137         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
138         .rt6i_metric    = ~(u32) 0,
139         .rt6i_ref       = ATOMIC_INIT(1),
140 };
141
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
144 static int ip6_pkt_prohibit(struct sk_buff *skb);
145 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
146 static int ip6_pkt_blk_hole(struct sk_buff *skb);
147
148 struct rt6_info ip6_prohibit_entry = {
149         .u = {
150                 .dst = {
151                         .__refcnt       = ATOMIC_INIT(1),
152                         .__use          = 1,
153                         .dev            = &loopback_dev,
154                         .obsolete       = -1,
155                         .error          = -EACCES,
156                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
157                         .input          = ip6_pkt_prohibit,
158                         .output         = ip6_pkt_prohibit_out,
159                         .ops            = &ip6_dst_ops,
160                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
161                 }
162         },
163         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
164         .rt6i_metric    = ~(u32) 0,
165         .rt6i_ref       = ATOMIC_INIT(1),
166 };
167
168 struct rt6_info ip6_blk_hole_entry = {
169         .u = {
170                 .dst = {
171                         .__refcnt       = ATOMIC_INIT(1),
172                         .__use          = 1,
173                         .dev            = &loopback_dev,
174                         .obsolete       = -1,
175                         .error          = -EINVAL,
176                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
177                         .input          = ip6_pkt_blk_hole,
178                         .output         = ip6_pkt_blk_hole,
179                         .ops            = &ip6_dst_ops,
180                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
181                 }
182         },
183         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
184         .rt6i_metric    = ~(u32) 0,
185         .rt6i_ref       = ATOMIC_INIT(1),
186 };
187
188 #endif
189
190 /* allocate dst with ip6_dst_ops */
191 static __inline__ struct rt6_info *ip6_dst_alloc(void)
192 {
193         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
194 }
195
196 static void ip6_dst_destroy(struct dst_entry *dst)
197 {
198         struct rt6_info *rt = (struct rt6_info *)dst;
199         struct inet6_dev *idev = rt->rt6i_idev;
200
201         if (idev != NULL) {
202                 rt->rt6i_idev = NULL;
203                 in6_dev_put(idev);
204         }       
205 }
206
207 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208                            int how)
209 {
210         struct rt6_info *rt = (struct rt6_info *)dst;
211         struct inet6_dev *idev = rt->rt6i_idev;
212
213         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
214                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
215                 if (loopback_idev != NULL) {
216                         rt->rt6i_idev = loopback_idev;
217                         in6_dev_put(idev);
218                 }
219         }
220 }
221
222 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223 {
224         return (rt->rt6i_flags & RTF_EXPIRES &&
225                 time_after(jiffies, rt->rt6i_expires));
226 }
227
228 static inline int rt6_need_strict(struct in6_addr *daddr)
229 {
230         return (ipv6_addr_type(daddr) &
231                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232 }
233
234 /*
235  *      Route lookup. Any table->tb6_lock is implied.
236  */
237
238 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
239                                                     int oif,
240                                                     int strict)
241 {
242         struct rt6_info *local = NULL;
243         struct rt6_info *sprt;
244
245         if (oif) {
246                 for (sprt = rt; sprt; sprt = sprt->u.next) {
247                         struct net_device *dev = sprt->rt6i_dev;
248                         if (dev->ifindex == oif)
249                                 return sprt;
250                         if (dev->flags & IFF_LOOPBACK) {
251                                 if (sprt->rt6i_idev == NULL ||
252                                     sprt->rt6i_idev->dev->ifindex != oif) {
253                                         if (strict && oif)
254                                                 continue;
255                                         if (local && (!oif || 
256                                                       local->rt6i_idev->dev->ifindex == oif))
257                                                 continue;
258                                 }
259                                 local = sprt;
260                         }
261                 }
262
263                 if (local)
264                         return local;
265
266                 if (strict)
267                         return &ip6_null_entry;
268         }
269         return rt;
270 }
271
272 #ifdef CONFIG_IPV6_ROUTER_PREF
273 static void rt6_probe(struct rt6_info *rt)
274 {
275         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
276         /*
277          * Okay, this does not seem to be appropriate
278          * for now, however, we need to check if it
279          * is really so; aka Router Reachability Probing.
280          *
281          * Router Reachability Probe MUST be rate-limited
282          * to no more than one per minute.
283          */
284         if (!neigh || (neigh->nud_state & NUD_VALID))
285                 return;
286         read_lock_bh(&neigh->lock);
287         if (!(neigh->nud_state & NUD_VALID) &&
288             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
289                 struct in6_addr mcaddr;
290                 struct in6_addr *target;
291
292                 neigh->updated = jiffies;
293                 read_unlock_bh(&neigh->lock);
294
295                 target = (struct in6_addr *)&neigh->primary_key;
296                 addrconf_addr_solict_mult(target, &mcaddr);
297                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
298         } else
299                 read_unlock_bh(&neigh->lock);
300 }
301 #else
302 static inline void rt6_probe(struct rt6_info *rt)
303 {
304         return;
305 }
306 #endif
307
308 /*
309  * Default Router Selection (RFC 2461 6.3.6)
310  */
311 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
312 {
313         struct net_device *dev = rt->rt6i_dev;
314         if (!oif || dev->ifindex == oif)
315                 return 2;
316         if ((dev->flags & IFF_LOOPBACK) &&
317             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
318                 return 1;
319         return 0;
320 }
321
322 static int inline rt6_check_neigh(struct rt6_info *rt)
323 {
324         struct neighbour *neigh = rt->rt6i_nexthop;
325         int m = 0;
326         if (rt->rt6i_flags & RTF_NONEXTHOP ||
327             !(rt->rt6i_flags & RTF_GATEWAY))
328                 m = 1;
329         else if (neigh) {
330                 read_lock_bh(&neigh->lock);
331                 if (neigh->nud_state & NUD_VALID)
332                         m = 2;
333                 else if (!(neigh->nud_state & NUD_FAILED))
334                         m = 1;
335                 read_unlock_bh(&neigh->lock);
336         }
337         return m;
338 }
339
340 static int rt6_score_route(struct rt6_info *rt, int oif,
341                            int strict)
342 {
343         int m, n;
344                 
345         m = rt6_check_dev(rt, oif);
346         if (!m && (strict & RT6_LOOKUP_F_IFACE))
347                 return -1;
348 #ifdef CONFIG_IPV6_ROUTER_PREF
349         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
350 #endif
351         n = rt6_check_neigh(rt);
352         if (n > 1)
353                 m |= 16;
354         else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
355                 return -1;
356         return m;
357 }
358
359 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
360                                    int strict)
361 {
362         struct rt6_info *match = NULL, *last = NULL;
363         struct rt6_info *rt, *rt0 = *head;
364         u32 metric;
365         int mpri = -1;
366
367         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
368                   __FUNCTION__, head, head ? *head : NULL, oif);
369
370         for (rt = rt0, metric = rt0->rt6i_metric;
371              rt && rt->rt6i_metric == metric && (!last || rt != rt0);
372              rt = rt->u.next) {
373                 int m;
374
375                 if (rt6_check_expired(rt))
376                         continue;
377
378                 last = rt;
379
380                 m = rt6_score_route(rt, oif, strict);
381                 if (m < 0)
382                         continue;
383
384                 if (m > mpri) {
385                         rt6_probe(match);
386                         match = rt;
387                         mpri = m;
388                 } else {
389                         rt6_probe(rt);
390                 }
391         }
392
393         if (!match &&
394             (strict & RT6_LOOKUP_F_REACHABLE) &&
395             last && last != rt0) {
396                 /* no entries matched; do round-robin */
397                 static DEFINE_SPINLOCK(lock);
398                 spin_lock(&lock);
399                 *head = rt0->u.next;
400                 rt0->u.next = last->u.next;
401                 last->u.next = rt0;
402                 spin_unlock(&lock);
403         }
404
405         RT6_TRACE("%s() => %p, score=%d\n",
406                   __FUNCTION__, match, mpri);
407
408         return (match ? match : &ip6_null_entry);
409 }
410
411 #ifdef CONFIG_IPV6_ROUTE_INFO
412 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
413                   struct in6_addr *gwaddr)
414 {
415         struct route_info *rinfo = (struct route_info *) opt;
416         struct in6_addr prefix_buf, *prefix;
417         unsigned int pref;
418         u32 lifetime;
419         struct rt6_info *rt;
420
421         if (len < sizeof(struct route_info)) {
422                 return -EINVAL;
423         }
424
425         /* Sanity check for prefix_len and length */
426         if (rinfo->length > 3) {
427                 return -EINVAL;
428         } else if (rinfo->prefix_len > 128) {
429                 return -EINVAL;
430         } else if (rinfo->prefix_len > 64) {
431                 if (rinfo->length < 2) {
432                         return -EINVAL;
433                 }
434         } else if (rinfo->prefix_len > 0) {
435                 if (rinfo->length < 1) {
436                         return -EINVAL;
437                 }
438         }
439
440         pref = rinfo->route_pref;
441         if (pref == ICMPV6_ROUTER_PREF_INVALID)
442                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
443
444         lifetime = htonl(rinfo->lifetime);
445         if (lifetime == 0xffffffff) {
446                 /* infinity */
447         } else if (lifetime > 0x7fffffff/HZ) {
448                 /* Avoid arithmetic overflow */
449                 lifetime = 0x7fffffff/HZ - 1;
450         }
451
452         if (rinfo->length == 3)
453                 prefix = (struct in6_addr *)rinfo->prefix;
454         else {
455                 /* this function is safe */
456                 ipv6_addr_prefix(&prefix_buf,
457                                  (struct in6_addr *)rinfo->prefix,
458                                  rinfo->prefix_len);
459                 prefix = &prefix_buf;
460         }
461
462         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
463
464         if (rt && !lifetime) {
465                 ip6_del_rt(rt);
466                 rt = NULL;
467         }
468
469         if (!rt && lifetime)
470                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
471                                         pref);
472         else if (rt)
473                 rt->rt6i_flags = RTF_ROUTEINFO |
474                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
475
476         if (rt) {
477                 if (lifetime == 0xffffffff) {
478                         rt->rt6i_flags &= ~RTF_EXPIRES;
479                 } else {
480                         rt->rt6i_expires = jiffies + HZ * lifetime;
481                         rt->rt6i_flags |= RTF_EXPIRES;
482                 }
483                 dst_release(&rt->u.dst);
484         }
485         return 0;
486 }
487 #endif
488
489 #define BACKTRACK(saddr) \
490 do { \
491         if (rt == &ip6_null_entry) { \
492                 struct fib6_node *pn; \
493                 while (1) { \
494                         if (fn->fn_flags & RTN_TL_ROOT) \
495                                 goto out; \
496                         pn = fn->parent; \
497                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
498                                 fn = fib6_lookup(pn->subtree, NULL, saddr); \
499                         else \
500                                 fn = pn; \
501                         if (fn->fn_flags & RTN_RTINFO) \
502                                 goto restart; \
503                 } \
504         } \
505 } while(0)
506
507 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
508                                              struct flowi *fl, int flags)
509 {
510         struct fib6_node *fn;
511         struct rt6_info *rt;
512
513         read_lock_bh(&table->tb6_lock);
514         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
515 restart:
516         rt = fn->leaf;
517         rt = rt6_device_match(rt, fl->oif, flags);
518         BACKTRACK(&fl->fl6_src);
519 out:
520         dst_hold(&rt->u.dst);
521         read_unlock_bh(&table->tb6_lock);
522
523         rt->u.dst.lastuse = jiffies;
524         rt->u.dst.__use++;
525
526         return rt;
527
528 }
529
530 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
531                             int oif, int strict)
532 {
533         struct flowi fl = {
534                 .oif = oif,
535                 .nl_u = {
536                         .ip6_u = {
537                                 .daddr = *daddr,
538                         },
539                 },
540         };
541         struct dst_entry *dst;
542         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
543
544         if (saddr) {
545                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
546                 flags |= RT6_LOOKUP_F_HAS_SADDR;
547         }
548
549         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
550         if (dst->error == 0)
551                 return (struct rt6_info *) dst;
552
553         dst_release(dst);
554
555         return NULL;
556 }
557
558 /* ip6_ins_rt is called with FREE table->tb6_lock.
559    It takes new route entry, the addition fails by any reason the
560    route is freed. In any case, if caller does not hold it, it may
561    be destroyed.
562  */
563
564 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
565 {
566         int err;
567         struct fib6_table *table;
568
569         table = rt->rt6i_table;
570         write_lock_bh(&table->tb6_lock);
571         err = fib6_add(&table->tb6_root, rt, info);
572         write_unlock_bh(&table->tb6_lock);
573
574         return err;
575 }
576
577 int ip6_ins_rt(struct rt6_info *rt)
578 {
579         return __ip6_ins_rt(rt, NULL);
580 }
581
582 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
583                                       struct in6_addr *saddr)
584 {
585         struct rt6_info *rt;
586
587         /*
588          *      Clone the route.
589          */
590
591         rt = ip6_rt_copy(ort);
592
593         if (rt) {
594                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
595                         if (rt->rt6i_dst.plen != 128 &&
596                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
597                                 rt->rt6i_flags |= RTF_ANYCAST;
598                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
599                 }
600
601                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
602                 rt->rt6i_dst.plen = 128;
603                 rt->rt6i_flags |= RTF_CACHE;
604                 rt->u.dst.flags |= DST_HOST;
605
606 #ifdef CONFIG_IPV6_SUBTREES
607                 if (rt->rt6i_src.plen && saddr) {
608                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
609                         rt->rt6i_src.plen = 128;
610                 }
611 #endif
612
613                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
614
615         }
616
617         return rt;
618 }
619
620 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
621 {
622         struct rt6_info *rt = ip6_rt_copy(ort);
623         if (rt) {
624                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
625                 rt->rt6i_dst.plen = 128;
626                 rt->rt6i_flags |= RTF_CACHE;
627                 rt->u.dst.flags |= DST_HOST;
628                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
629         }
630         return rt;
631 }
632
633 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
634                                             struct flowi *fl, int flags)
635 {
636         struct fib6_node *fn;
637         struct rt6_info *rt, *nrt;
638         int strict = 0;
639         int attempts = 3;
640         int err;
641         int reachable = RT6_LOOKUP_F_REACHABLE;
642
643         strict |= flags & RT6_LOOKUP_F_IFACE;
644
645 relookup:
646         read_lock_bh(&table->tb6_lock);
647
648 restart_2:
649         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
650
651 restart:
652         rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
653         BACKTRACK(&fl->fl6_src);
654         if (rt == &ip6_null_entry ||
655             rt->rt6i_flags & RTF_CACHE)
656                 goto out;
657
658         dst_hold(&rt->u.dst);
659         read_unlock_bh(&table->tb6_lock);
660
661         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
662                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
663         else {
664 #if CLONE_OFFLINK_ROUTE
665                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
666 #else
667                 goto out2;
668 #endif
669         }
670
671         dst_release(&rt->u.dst);
672         rt = nrt ? : &ip6_null_entry;
673
674         dst_hold(&rt->u.dst);
675         if (nrt) {
676                 err = ip6_ins_rt(nrt);
677                 if (!err)
678                         goto out2;
679         }
680
681         if (--attempts <= 0)
682                 goto out2;
683
684         /*
685          * Race condition! In the gap, when table->tb6_lock was
686          * released someone could insert this route.  Relookup.
687          */
688         dst_release(&rt->u.dst);
689         goto relookup;
690
691 out:
692         if (reachable) {
693                 reachable = 0;
694                 goto restart_2;
695         }
696         dst_hold(&rt->u.dst);
697         read_unlock_bh(&table->tb6_lock);
698 out2:
699         rt->u.dst.lastuse = jiffies;
700         rt->u.dst.__use++;
701
702         return rt;
703 }
704
705 void ip6_route_input(struct sk_buff *skb)
706 {
707         struct ipv6hdr *iph = skb->nh.ipv6h;
708         int flags = RT6_LOOKUP_F_HAS_SADDR;
709         struct flowi fl = {
710                 .iif = skb->dev->ifindex,
711                 .nl_u = {
712                         .ip6_u = {
713                                 .daddr = iph->daddr,
714                                 .saddr = iph->saddr,
715 #ifdef CONFIG_IPV6_ROUTE_FWMARK
716                                 .fwmark = skb->nfmark,
717 #endif
718                                 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
719                         },
720                 },
721                 .proto = iph->nexthdr,
722         };
723
724         if (rt6_need_strict(&iph->daddr))
725                 flags |= RT6_LOOKUP_F_IFACE;
726
727         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
728 }
729
730 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
731                                              struct flowi *fl, int flags)
732 {
733         struct fib6_node *fn;
734         struct rt6_info *rt, *nrt;
735         int strict = 0;
736         int attempts = 3;
737         int err;
738         int reachable = RT6_LOOKUP_F_REACHABLE;
739
740         strict |= flags & RT6_LOOKUP_F_IFACE;
741
742 relookup:
743         read_lock_bh(&table->tb6_lock);
744
745 restart_2:
746         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
747
748 restart:
749         rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
750         BACKTRACK(&fl->fl6_src);
751         if (rt == &ip6_null_entry ||
752             rt->rt6i_flags & RTF_CACHE)
753                 goto out;
754
755         dst_hold(&rt->u.dst);
756         read_unlock_bh(&table->tb6_lock);
757
758         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
759                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
760         else {
761 #if CLONE_OFFLINK_ROUTE
762                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
763 #else
764                 goto out2;
765 #endif
766         }
767
768         dst_release(&rt->u.dst);
769         rt = nrt ? : &ip6_null_entry;
770
771         dst_hold(&rt->u.dst);
772         if (nrt) {
773                 err = ip6_ins_rt(nrt);
774                 if (!err)
775                         goto out2;
776         }
777
778         if (--attempts <= 0)
779                 goto out2;
780
781         /*
782          * Race condition! In the gap, when table->tb6_lock was
783          * released someone could insert this route.  Relookup.
784          */
785         dst_release(&rt->u.dst);
786         goto relookup;
787
788 out:
789         if (reachable) {
790                 reachable = 0;
791                 goto restart_2;
792         }
793         dst_hold(&rt->u.dst);
794         read_unlock_bh(&table->tb6_lock);
795 out2:
796         rt->u.dst.lastuse = jiffies;
797         rt->u.dst.__use++;
798         return rt;
799 }
800
801 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
802 {
803         int flags = 0;
804
805         if (rt6_need_strict(&fl->fl6_dst))
806                 flags |= RT6_LOOKUP_F_IFACE;
807
808         if (!ipv6_addr_any(&fl->fl6_src))
809                 flags |= RT6_LOOKUP_F_HAS_SADDR;
810
811         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
812 }
813
814
815 /*
816  *      Destination cache support functions
817  */
818
819 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
820 {
821         struct rt6_info *rt;
822
823         rt = (struct rt6_info *) dst;
824
825         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
826                 return dst;
827
828         return NULL;
829 }
830
831 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
832 {
833         struct rt6_info *rt = (struct rt6_info *) dst;
834
835         if (rt) {
836                 if (rt->rt6i_flags & RTF_CACHE)
837                         ip6_del_rt(rt);
838                 else
839                         dst_release(dst);
840         }
841         return NULL;
842 }
843
844 static void ip6_link_failure(struct sk_buff *skb)
845 {
846         struct rt6_info *rt;
847
848         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
849
850         rt = (struct rt6_info *) skb->dst;
851         if (rt) {
852                 if (rt->rt6i_flags&RTF_CACHE) {
853                         dst_set_expires(&rt->u.dst, 0);
854                         rt->rt6i_flags |= RTF_EXPIRES;
855                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
856                         rt->rt6i_node->fn_sernum = -1;
857         }
858 }
859
860 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
861 {
862         struct rt6_info *rt6 = (struct rt6_info*)dst;
863
864         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
865                 rt6->rt6i_flags |= RTF_MODIFIED;
866                 if (mtu < IPV6_MIN_MTU) {
867                         mtu = IPV6_MIN_MTU;
868                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
869                 }
870                 dst->metrics[RTAX_MTU-1] = mtu;
871                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
872         }
873 }
874
875 static int ipv6_get_mtu(struct net_device *dev);
876
877 static inline unsigned int ipv6_advmss(unsigned int mtu)
878 {
879         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
880
881         if (mtu < ip6_rt_min_advmss)
882                 mtu = ip6_rt_min_advmss;
883
884         /*
885          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
886          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
887          * IPV6_MAXPLEN is also valid and means: "any MSS, 
888          * rely only on pmtu discovery"
889          */
890         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
891                 mtu = IPV6_MAXPLEN;
892         return mtu;
893 }
894
895 static struct dst_entry *ndisc_dst_gc_list;
896 static DEFINE_SPINLOCK(ndisc_lock);
897
898 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
899                                   struct neighbour *neigh,
900                                   struct in6_addr *addr,
901                                   int (*output)(struct sk_buff *))
902 {
903         struct rt6_info *rt;
904         struct inet6_dev *idev = in6_dev_get(dev);
905
906         if (unlikely(idev == NULL))
907                 return NULL;
908
909         rt = ip6_dst_alloc();
910         if (unlikely(rt == NULL)) {
911                 in6_dev_put(idev);
912                 goto out;
913         }
914
915         dev_hold(dev);
916         if (neigh)
917                 neigh_hold(neigh);
918         else
919                 neigh = ndisc_get_neigh(dev, addr);
920
921         rt->rt6i_dev      = dev;
922         rt->rt6i_idev     = idev;
923         rt->rt6i_nexthop  = neigh;
924         atomic_set(&rt->u.dst.__refcnt, 1);
925         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
926         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
927         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
928         rt->u.dst.output  = output;
929
930 #if 0   /* there's no chance to use these for ndisc */
931         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
932                                 ? DST_HOST 
933                                 : 0;
934         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
935         rt->rt6i_dst.plen = 128;
936 #endif
937
938         spin_lock_bh(&ndisc_lock);
939         rt->u.dst.next = ndisc_dst_gc_list;
940         ndisc_dst_gc_list = &rt->u.dst;
941         spin_unlock_bh(&ndisc_lock);
942
943         fib6_force_start_gc();
944
945 out:
946         return (struct dst_entry *)rt;
947 }
948
949 int ndisc_dst_gc(int *more)
950 {
951         struct dst_entry *dst, *next, **pprev;
952         int freed;
953
954         next = NULL;
955         freed = 0;
956
957         spin_lock_bh(&ndisc_lock);
958         pprev = &ndisc_dst_gc_list;
959
960         while ((dst = *pprev) != NULL) {
961                 if (!atomic_read(&dst->__refcnt)) {
962                         *pprev = dst->next;
963                         dst_free(dst);
964                         freed++;
965                 } else {
966                         pprev = &dst->next;
967                         (*more)++;
968                 }
969         }
970
971         spin_unlock_bh(&ndisc_lock);
972
973         return freed;
974 }
975
976 static int ip6_dst_gc(void)
977 {
978         static unsigned expire = 30*HZ;
979         static unsigned long last_gc;
980         unsigned long now = jiffies;
981
982         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
983             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
984                 goto out;
985
986         expire++;
987         fib6_run_gc(expire);
988         last_gc = now;
989         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
990                 expire = ip6_rt_gc_timeout>>1;
991
992 out:
993         expire -= expire>>ip6_rt_gc_elasticity;
994         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
995 }
996
997 /* Clean host part of a prefix. Not necessary in radix tree,
998    but results in cleaner routing tables.
999
1000    Remove it only when all the things will work!
1001  */
1002
1003 static int ipv6_get_mtu(struct net_device *dev)
1004 {
1005         int mtu = IPV6_MIN_MTU;
1006         struct inet6_dev *idev;
1007
1008         idev = in6_dev_get(dev);
1009         if (idev) {
1010                 mtu = idev->cnf.mtu6;
1011                 in6_dev_put(idev);
1012         }
1013         return mtu;
1014 }
1015
1016 int ipv6_get_hoplimit(struct net_device *dev)
1017 {
1018         int hoplimit = ipv6_devconf.hop_limit;
1019         struct inet6_dev *idev;
1020
1021         idev = in6_dev_get(dev);
1022         if (idev) {
1023                 hoplimit = idev->cnf.hop_limit;
1024                 in6_dev_put(idev);
1025         }
1026         return hoplimit;
1027 }
1028
1029 /*
1030  *
1031  */
1032
1033 int ip6_route_add(struct fib6_config *cfg)
1034 {
1035         int err;
1036         struct rt6_info *rt = NULL;
1037         struct net_device *dev = NULL;
1038         struct inet6_dev *idev = NULL;
1039         struct fib6_table *table;
1040         int addr_type;
1041
1042         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1043                 return -EINVAL;
1044 #ifndef CONFIG_IPV6_SUBTREES
1045         if (cfg->fc_src_len)
1046                 return -EINVAL;
1047 #endif
1048         if (cfg->fc_ifindex) {
1049                 err = -ENODEV;
1050                 dev = dev_get_by_index(cfg->fc_ifindex);
1051                 if (!dev)
1052                         goto out;
1053                 idev = in6_dev_get(dev);
1054                 if (!idev)
1055                         goto out;
1056         }
1057
1058         if (cfg->fc_metric == 0)
1059                 cfg->fc_metric = IP6_RT_PRIO_USER;
1060
1061         table = fib6_new_table(cfg->fc_table);
1062         if (table == NULL) {
1063                 err = -ENOBUFS;
1064                 goto out;
1065         }
1066
1067         rt = ip6_dst_alloc();
1068
1069         if (rt == NULL) {
1070                 err = -ENOMEM;
1071                 goto out;
1072         }
1073
1074         rt->u.dst.obsolete = -1;
1075         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1076
1077         if (cfg->fc_protocol == RTPROT_UNSPEC)
1078                 cfg->fc_protocol = RTPROT_BOOT;
1079         rt->rt6i_protocol = cfg->fc_protocol;
1080
1081         addr_type = ipv6_addr_type(&cfg->fc_dst);
1082
1083         if (addr_type & IPV6_ADDR_MULTICAST)
1084                 rt->u.dst.input = ip6_mc_input;
1085         else
1086                 rt->u.dst.input = ip6_forward;
1087
1088         rt->u.dst.output = ip6_output;
1089
1090         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1091         rt->rt6i_dst.plen = cfg->fc_dst_len;
1092         if (rt->rt6i_dst.plen == 128)
1093                rt->u.dst.flags = DST_HOST;
1094
1095 #ifdef CONFIG_IPV6_SUBTREES
1096         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1097         rt->rt6i_src.plen = cfg->fc_src_len;
1098 #endif
1099
1100         rt->rt6i_metric = cfg->fc_metric;
1101
1102         /* We cannot add true routes via loopback here,
1103            they would result in kernel looping; promote them to reject routes
1104          */
1105         if ((cfg->fc_flags & RTF_REJECT) ||
1106             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1107                 /* hold loopback dev/idev if we haven't done so. */
1108                 if (dev != &loopback_dev) {
1109                         if (dev) {
1110                                 dev_put(dev);
1111                                 in6_dev_put(idev);
1112                         }
1113                         dev = &loopback_dev;
1114                         dev_hold(dev);
1115                         idev = in6_dev_get(dev);
1116                         if (!idev) {
1117                                 err = -ENODEV;
1118                                 goto out;
1119                         }
1120                 }
1121                 rt->u.dst.output = ip6_pkt_discard_out;
1122                 rt->u.dst.input = ip6_pkt_discard;
1123                 rt->u.dst.error = -ENETUNREACH;
1124                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1125                 goto install_route;
1126         }
1127
1128         if (cfg->fc_flags & RTF_GATEWAY) {
1129                 struct in6_addr *gw_addr;
1130                 int gwa_type;
1131
1132                 gw_addr = &cfg->fc_gateway;
1133                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1134                 gwa_type = ipv6_addr_type(gw_addr);
1135
1136                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1137                         struct rt6_info *grt;
1138
1139                         /* IPv6 strictly inhibits using not link-local
1140                            addresses as nexthop address.
1141                            Otherwise, router will not able to send redirects.
1142                            It is very good, but in some (rare!) circumstances
1143                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1144                            some exceptions. --ANK
1145                          */
1146                         err = -EINVAL;
1147                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1148                                 goto out;
1149
1150                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1151
1152                         err = -EHOSTUNREACH;
1153                         if (grt == NULL)
1154                                 goto out;
1155                         if (dev) {
1156                                 if (dev != grt->rt6i_dev) {
1157                                         dst_release(&grt->u.dst);
1158                                         goto out;
1159                                 }
1160                         } else {
1161                                 dev = grt->rt6i_dev;
1162                                 idev = grt->rt6i_idev;
1163                                 dev_hold(dev);
1164                                 in6_dev_hold(grt->rt6i_idev);
1165                         }
1166                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1167                                 err = 0;
1168                         dst_release(&grt->u.dst);
1169
1170                         if (err)
1171                                 goto out;
1172                 }
1173                 err = -EINVAL;
1174                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1175                         goto out;
1176         }
1177
1178         err = -ENODEV;
1179         if (dev == NULL)
1180                 goto out;
1181
1182         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1183                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1184                 if (IS_ERR(rt->rt6i_nexthop)) {
1185                         err = PTR_ERR(rt->rt6i_nexthop);
1186                         rt->rt6i_nexthop = NULL;
1187                         goto out;
1188                 }
1189         }
1190
1191         rt->rt6i_flags = cfg->fc_flags;
1192
1193 install_route:
1194         if (cfg->fc_mx) {
1195                 struct nlattr *nla;
1196                 int remaining;
1197
1198                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1199                         int type = nla->nla_type;
1200
1201                         if (type) {
1202                                 if (type > RTAX_MAX) {
1203                                         err = -EINVAL;
1204                                         goto out;
1205                                 }
1206
1207                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1208                         }
1209                 }
1210         }
1211
1212         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1213                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1214         if (!rt->u.dst.metrics[RTAX_MTU-1])
1215                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1216         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1217                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1218         rt->u.dst.dev = dev;
1219         rt->rt6i_idev = idev;
1220         rt->rt6i_table = table;
1221         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1222
1223 out:
1224         if (dev)
1225                 dev_put(dev);
1226         if (idev)
1227                 in6_dev_put(idev);
1228         if (rt)
1229                 dst_free((struct dst_entry *) rt);
1230         return err;
1231 }
1232
1233 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1234 {
1235         int err;
1236         struct fib6_table *table;
1237
1238         if (rt == &ip6_null_entry)
1239                 return -ENOENT;
1240
1241         table = rt->rt6i_table;
1242         write_lock_bh(&table->tb6_lock);
1243
1244         err = fib6_del(rt, info);
1245         dst_release(&rt->u.dst);
1246
1247         write_unlock_bh(&table->tb6_lock);
1248
1249         return err;
1250 }
1251
1252 int ip6_del_rt(struct rt6_info *rt)
1253 {
1254         return __ip6_del_rt(rt, NULL);
1255 }
1256
1257 static int ip6_route_del(struct fib6_config *cfg)
1258 {
1259         struct fib6_table *table;
1260         struct fib6_node *fn;
1261         struct rt6_info *rt;
1262         int err = -ESRCH;
1263
1264         table = fib6_get_table(cfg->fc_table);
1265         if (table == NULL)
1266                 return err;
1267
1268         read_lock_bh(&table->tb6_lock);
1269
1270         fn = fib6_locate(&table->tb6_root,
1271                          &cfg->fc_dst, cfg->fc_dst_len,
1272                          &cfg->fc_src, cfg->fc_src_len);
1273         
1274         if (fn) {
1275                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1276                         if (cfg->fc_ifindex &&
1277                             (rt->rt6i_dev == NULL ||
1278                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1279                                 continue;
1280                         if (cfg->fc_flags & RTF_GATEWAY &&
1281                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1282                                 continue;
1283                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1284                                 continue;
1285                         dst_hold(&rt->u.dst);
1286                         read_unlock_bh(&table->tb6_lock);
1287
1288                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1289                 }
1290         }
1291         read_unlock_bh(&table->tb6_lock);
1292
1293         return err;
1294 }
1295
1296 /*
1297  *      Handle redirects
1298  */
1299 struct ip6rd_flowi {
1300         struct flowi fl;
1301         struct in6_addr gateway;
1302 };
1303
1304 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1305                                              struct flowi *fl,
1306                                              int flags)
1307 {
1308         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1309         struct rt6_info *rt;
1310         struct fib6_node *fn;
1311
1312         /*
1313          * Get the "current" route for this destination and
1314          * check if the redirect has come from approriate router.
1315          *
1316          * RFC 2461 specifies that redirects should only be
1317          * accepted if they come from the nexthop to the target.
1318          * Due to the way the routes are chosen, this notion
1319          * is a bit fuzzy and one might need to check all possible
1320          * routes.
1321          */
1322
1323         read_lock_bh(&table->tb6_lock);
1324         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1325 restart:
1326         for (rt = fn->leaf; rt; rt = rt->u.next) {
1327                 /*
1328                  * Current route is on-link; redirect is always invalid.
1329                  *
1330                  * Seems, previous statement is not true. It could
1331                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1332                  * But then router serving it might decide, that we should
1333                  * know truth 8)8) --ANK (980726).
1334                  */
1335                 if (rt6_check_expired(rt))
1336                         continue;
1337                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1338                         continue;
1339                 if (fl->oif != rt->rt6i_dev->ifindex)
1340                         continue;
1341                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1342                         continue;
1343                 break;
1344         }
1345
1346         if (!rt)
1347                 rt = &ip6_null_entry;
1348         BACKTRACK(&fl->fl6_src);
1349 out:
1350         dst_hold(&rt->u.dst);
1351
1352         read_unlock_bh(&table->tb6_lock);
1353
1354         return rt;
1355 };
1356
1357 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1358                                            struct in6_addr *src,
1359                                            struct in6_addr *gateway,
1360                                            struct net_device *dev)
1361 {
1362         int flags = RT6_LOOKUP_F_HAS_SADDR;
1363         struct ip6rd_flowi rdfl = {
1364                 .fl = {
1365                         .oif = dev->ifindex,
1366                         .nl_u = {
1367                                 .ip6_u = {
1368                                         .daddr = *dest,
1369                                         .saddr = *src,
1370                                 },
1371                         },
1372                 },
1373                 .gateway = *gateway,
1374         };
1375
1376         if (rt6_need_strict(dest))
1377                 flags |= RT6_LOOKUP_F_IFACE;
1378
1379         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1380 }
1381
1382 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1383                   struct in6_addr *saddr,
1384                   struct neighbour *neigh, u8 *lladdr, int on_link)
1385 {
1386         struct rt6_info *rt, *nrt = NULL;
1387         struct netevent_redirect netevent;
1388
1389         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1390
1391         if (rt == &ip6_null_entry) {
1392                 if (net_ratelimit())
1393                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1394                                "for redirect target\n");
1395                 goto out;
1396         }
1397
1398         /*
1399          *      We have finally decided to accept it.
1400          */
1401
1402         neigh_update(neigh, lladdr, NUD_STALE, 
1403                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1404                      NEIGH_UPDATE_F_OVERRIDE|
1405                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1406                                      NEIGH_UPDATE_F_ISROUTER))
1407                      );
1408
1409         /*
1410          * Redirect received -> path was valid.
1411          * Look, redirects are sent only in response to data packets,
1412          * so that this nexthop apparently is reachable. --ANK
1413          */
1414         dst_confirm(&rt->u.dst);
1415
1416         /* Duplicate redirect: silently ignore. */
1417         if (neigh == rt->u.dst.neighbour)
1418                 goto out;
1419
1420         nrt = ip6_rt_copy(rt);
1421         if (nrt == NULL)
1422                 goto out;
1423
1424         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1425         if (on_link)
1426                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1427
1428         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1429         nrt->rt6i_dst.plen = 128;
1430         nrt->u.dst.flags |= DST_HOST;
1431
1432         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1433         nrt->rt6i_nexthop = neigh_clone(neigh);
1434         /* Reset pmtu, it may be better */
1435         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1436         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1437
1438         if (ip6_ins_rt(nrt))
1439                 goto out;
1440
1441         netevent.old = &rt->u.dst;
1442         netevent.new = &nrt->u.dst;
1443         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1444
1445         if (rt->rt6i_flags&RTF_CACHE) {
1446                 ip6_del_rt(rt);
1447                 return;
1448         }
1449
1450 out:
1451         dst_release(&rt->u.dst);
1452         return;
1453 }
1454
1455 /*
1456  *      Handle ICMP "packet too big" messages
1457  *      i.e. Path MTU discovery
1458  */
1459
1460 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1461                         struct net_device *dev, u32 pmtu)
1462 {
1463         struct rt6_info *rt, *nrt;
1464         int allfrag = 0;
1465
1466         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1467         if (rt == NULL)
1468                 return;
1469
1470         if (pmtu >= dst_mtu(&rt->u.dst))
1471                 goto out;
1472
1473         if (pmtu < IPV6_MIN_MTU) {
1474                 /*
1475                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1476                  * MTU (1280) and a fragment header should always be included
1477                  * after a node receiving Too Big message reporting PMTU is
1478                  * less than the IPv6 Minimum Link MTU.
1479                  */
1480                 pmtu = IPV6_MIN_MTU;
1481                 allfrag = 1;
1482         }
1483
1484         /* New mtu received -> path was valid.
1485            They are sent only in response to data packets,
1486            so that this nexthop apparently is reachable. --ANK
1487          */
1488         dst_confirm(&rt->u.dst);
1489
1490         /* Host route. If it is static, it would be better
1491            not to override it, but add new one, so that
1492            when cache entry will expire old pmtu
1493            would return automatically.
1494          */
1495         if (rt->rt6i_flags & RTF_CACHE) {
1496                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1497                 if (allfrag)
1498                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1499                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1500                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1501                 goto out;
1502         }
1503
1504         /* Network route.
1505            Two cases are possible:
1506            1. It is connected route. Action: COW
1507            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1508          */
1509         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1510                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1511         else
1512                 nrt = rt6_alloc_clone(rt, daddr);
1513
1514         if (nrt) {
1515                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1516                 if (allfrag)
1517                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1518
1519                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1520                  * happened within 5 mins, the recommended timer is 10 mins.
1521                  * Here this route expiration time is set to ip6_rt_mtu_expires
1522                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1523                  * and detecting PMTU increase will be automatically happened.
1524                  */
1525                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1526                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1527
1528                 ip6_ins_rt(nrt);
1529         }
1530 out:
1531         dst_release(&rt->u.dst);
1532 }
1533
1534 /*
1535  *      Misc support functions
1536  */
1537
1538 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1539 {
1540         struct rt6_info *rt = ip6_dst_alloc();
1541
1542         if (rt) {
1543                 rt->u.dst.input = ort->u.dst.input;
1544                 rt->u.dst.output = ort->u.dst.output;
1545
1546                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1547                 rt->u.dst.error = ort->u.dst.error;
1548                 rt->u.dst.dev = ort->u.dst.dev;
1549                 if (rt->u.dst.dev)
1550                         dev_hold(rt->u.dst.dev);
1551                 rt->rt6i_idev = ort->rt6i_idev;
1552                 if (rt->rt6i_idev)
1553                         in6_dev_hold(rt->rt6i_idev);
1554                 rt->u.dst.lastuse = jiffies;
1555                 rt->rt6i_expires = 0;
1556
1557                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1558                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1559                 rt->rt6i_metric = 0;
1560
1561                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1562 #ifdef CONFIG_IPV6_SUBTREES
1563                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1564 #endif
1565                 rt->rt6i_table = ort->rt6i_table;
1566         }
1567         return rt;
1568 }
1569
1570 #ifdef CONFIG_IPV6_ROUTE_INFO
1571 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1572                                            struct in6_addr *gwaddr, int ifindex)
1573 {
1574         struct fib6_node *fn;
1575         struct rt6_info *rt = NULL;
1576         struct fib6_table *table;
1577
1578         table = fib6_get_table(RT6_TABLE_INFO);
1579         if (table == NULL)
1580                 return NULL;
1581
1582         write_lock_bh(&table->tb6_lock);
1583         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1584         if (!fn)
1585                 goto out;
1586
1587         for (rt = fn->leaf; rt; rt = rt->u.next) {
1588                 if (rt->rt6i_dev->ifindex != ifindex)
1589                         continue;
1590                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1591                         continue;
1592                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1593                         continue;
1594                 dst_hold(&rt->u.dst);
1595                 break;
1596         }
1597 out:
1598         write_unlock_bh(&table->tb6_lock);
1599         return rt;
1600 }
1601
1602 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1603                                            struct in6_addr *gwaddr, int ifindex,
1604                                            unsigned pref)
1605 {
1606         struct fib6_config cfg = {
1607                 .fc_table       = RT6_TABLE_INFO,
1608                 .fc_metric      = 1024,
1609                 .fc_ifindex     = ifindex,
1610                 .fc_dst_len     = prefixlen,
1611                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1612                                   RTF_UP | RTF_PREF(pref),
1613         };
1614
1615         ipv6_addr_copy(&cfg.fc_dst, prefix);
1616         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1617
1618         /* We should treat it as a default route if prefix length is 0. */
1619         if (!prefixlen)
1620                 cfg.fc_flags |= RTF_DEFAULT;
1621
1622         ip6_route_add(&cfg);
1623
1624         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1625 }
1626 #endif
1627
1628 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1629 {       
1630         struct rt6_info *rt;
1631         struct fib6_table *table;
1632
1633         table = fib6_get_table(RT6_TABLE_DFLT);
1634         if (table == NULL)
1635                 return NULL;
1636
1637         write_lock_bh(&table->tb6_lock);
1638         for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1639                 if (dev == rt->rt6i_dev &&
1640                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1641                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1642                         break;
1643         }
1644         if (rt)
1645                 dst_hold(&rt->u.dst);
1646         write_unlock_bh(&table->tb6_lock);
1647         return rt;
1648 }
1649
1650 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1651                                      struct net_device *dev,
1652                                      unsigned int pref)
1653 {
1654         struct fib6_config cfg = {
1655                 .fc_table       = RT6_TABLE_DFLT,
1656                 .fc_metric      = 1024,
1657                 .fc_ifindex     = dev->ifindex,
1658                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1659                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1660         };
1661
1662         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1663
1664         ip6_route_add(&cfg);
1665
1666         return rt6_get_dflt_router(gwaddr, dev);
1667 }
1668
1669 void rt6_purge_dflt_routers(void)
1670 {
1671         struct rt6_info *rt;
1672         struct fib6_table *table;
1673
1674         /* NOTE: Keep consistent with rt6_get_dflt_router */
1675         table = fib6_get_table(RT6_TABLE_DFLT);
1676         if (table == NULL)
1677                 return;
1678
1679 restart:
1680         read_lock_bh(&table->tb6_lock);
1681         for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1682                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1683                         dst_hold(&rt->u.dst);
1684                         read_unlock_bh(&table->tb6_lock);
1685                         ip6_del_rt(rt);
1686                         goto restart;
1687                 }
1688         }
1689         read_unlock_bh(&table->tb6_lock);
1690 }
1691
1692 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1693                                  struct fib6_config *cfg)
1694 {
1695         memset(cfg, 0, sizeof(*cfg));
1696
1697         cfg->fc_table = RT6_TABLE_MAIN;
1698         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1699         cfg->fc_metric = rtmsg->rtmsg_metric;
1700         cfg->fc_expires = rtmsg->rtmsg_info;
1701         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1702         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1703         cfg->fc_flags = rtmsg->rtmsg_flags;
1704
1705         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1706         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1707         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1708 }
1709
1710 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1711 {
1712         struct fib6_config cfg;
1713         struct in6_rtmsg rtmsg;
1714         int err;
1715
1716         switch(cmd) {
1717         case SIOCADDRT:         /* Add a route */
1718         case SIOCDELRT:         /* Delete a route */
1719                 if (!capable(CAP_NET_ADMIN))
1720                         return -EPERM;
1721                 err = copy_from_user(&rtmsg, arg,
1722                                      sizeof(struct in6_rtmsg));
1723                 if (err)
1724                         return -EFAULT;
1725
1726                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1727
1728                 rtnl_lock();
1729                 switch (cmd) {
1730                 case SIOCADDRT:
1731                         err = ip6_route_add(&cfg);
1732                         break;
1733                 case SIOCDELRT:
1734                         err = ip6_route_del(&cfg);
1735                         break;
1736                 default:
1737                         err = -EINVAL;
1738                 }
1739                 rtnl_unlock();
1740
1741                 return err;
1742         };
1743
1744         return -EINVAL;
1745 }
1746
1747 /*
1748  *      Drop the packet on the floor
1749  */
1750
1751 static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
1752 {
1753         int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1754         if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1755                 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1756
1757         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1758         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1759         kfree_skb(skb);
1760         return 0;
1761 }
1762
1763 static int ip6_pkt_discard(struct sk_buff *skb)
1764 {
1765         return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
1766 }
1767
1768 static int ip6_pkt_discard_out(struct sk_buff *skb)
1769 {
1770         skb->dev = skb->dst->dev;
1771         return ip6_pkt_discard(skb);
1772 }
1773
1774 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1775
1776 static int ip6_pkt_prohibit(struct sk_buff *skb)
1777 {
1778         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
1779 }
1780
1781 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1782 {
1783         skb->dev = skb->dst->dev;
1784         return ip6_pkt_prohibit(skb);
1785 }
1786
1787 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1788 {
1789         kfree_skb(skb);
1790         return 0;
1791 }
1792
1793 #endif
1794
1795 /*
1796  *      Allocate a dst for local (unicast / anycast) address.
1797  */
1798
1799 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1800                                     const struct in6_addr *addr,
1801                                     int anycast)
1802 {
1803         struct rt6_info *rt = ip6_dst_alloc();
1804
1805         if (rt == NULL)
1806                 return ERR_PTR(-ENOMEM);
1807
1808         dev_hold(&loopback_dev);
1809         in6_dev_hold(idev);
1810
1811         rt->u.dst.flags = DST_HOST;
1812         rt->u.dst.input = ip6_input;
1813         rt->u.dst.output = ip6_output;
1814         rt->rt6i_dev = &loopback_dev;
1815         rt->rt6i_idev = idev;
1816         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1817         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1818         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1819         rt->u.dst.obsolete = -1;
1820
1821         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1822         if (anycast)
1823                 rt->rt6i_flags |= RTF_ANYCAST;
1824         else
1825                 rt->rt6i_flags |= RTF_LOCAL;
1826         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1827         if (rt->rt6i_nexthop == NULL) {
1828                 dst_free((struct dst_entry *) rt);
1829                 return ERR_PTR(-ENOMEM);
1830         }
1831
1832         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1833         rt->rt6i_dst.plen = 128;
1834         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1835
1836         atomic_set(&rt->u.dst.__refcnt, 1);
1837
1838         return rt;
1839 }
1840
1841 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1842 {
1843         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1844             rt != &ip6_null_entry) {
1845                 RT6_TRACE("deleted by ifdown %p\n", rt);
1846                 return -1;
1847         }
1848         return 0;
1849 }
1850
1851 void rt6_ifdown(struct net_device *dev)
1852 {
1853         fib6_clean_all(fib6_ifdown, 0, dev);
1854 }
1855
1856 struct rt6_mtu_change_arg
1857 {
1858         struct net_device *dev;
1859         unsigned mtu;
1860 };
1861
1862 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1863 {
1864         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1865         struct inet6_dev *idev;
1866
1867         /* In IPv6 pmtu discovery is not optional,
1868            so that RTAX_MTU lock cannot disable it.
1869            We still use this lock to block changes
1870            caused by addrconf/ndisc.
1871         */
1872
1873         idev = __in6_dev_get(arg->dev);
1874         if (idev == NULL)
1875                 return 0;
1876
1877         /* For administrative MTU increase, there is no way to discover
1878            IPv6 PMTU increase, so PMTU increase should be updated here.
1879            Since RFC 1981 doesn't include administrative MTU increase
1880            update PMTU increase is a MUST. (i.e. jumbo frame)
1881          */
1882         /*
1883            If new MTU is less than route PMTU, this new MTU will be the
1884            lowest MTU in the path, update the route PMTU to reflect PMTU
1885            decreases; if new MTU is greater than route PMTU, and the
1886            old MTU is the lowest MTU in the path, update the route PMTU
1887            to reflect the increase. In this case if the other nodes' MTU
1888            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1889            PMTU discouvery.
1890          */
1891         if (rt->rt6i_dev == arg->dev &&
1892             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1893             (dst_mtu(&rt->u.dst) > arg->mtu ||
1894              (dst_mtu(&rt->u.dst) < arg->mtu &&
1895               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1896                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1897         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1898         return 0;
1899 }
1900
1901 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1902 {
1903         struct rt6_mtu_change_arg arg = {
1904                 .dev = dev,
1905                 .mtu = mtu,
1906         };
1907
1908         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1909 }
1910
1911 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1912         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1913         [RTA_OIF]               = { .type = NLA_U32 },
1914         [RTA_IIF]               = { .type = NLA_U32 },
1915         [RTA_PRIORITY]          = { .type = NLA_U32 },
1916         [RTA_METRICS]           = { .type = NLA_NESTED },
1917 };
1918
1919 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1920                               struct fib6_config *cfg)
1921 {
1922         struct rtmsg *rtm;
1923         struct nlattr *tb[RTA_MAX+1];
1924         int err;
1925
1926         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1927         if (err < 0)
1928                 goto errout;
1929
1930         err = -EINVAL;
1931         rtm = nlmsg_data(nlh);
1932         memset(cfg, 0, sizeof(*cfg));
1933
1934         cfg->fc_table = rtm->rtm_table;
1935         cfg->fc_dst_len = rtm->rtm_dst_len;
1936         cfg->fc_src_len = rtm->rtm_src_len;
1937         cfg->fc_flags = RTF_UP;
1938         cfg->fc_protocol = rtm->rtm_protocol;
1939
1940         if (rtm->rtm_type == RTN_UNREACHABLE)
1941                 cfg->fc_flags |= RTF_REJECT;
1942
1943         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1944         cfg->fc_nlinfo.nlh = nlh;
1945
1946         if (tb[RTA_GATEWAY]) {
1947                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1948                 cfg->fc_flags |= RTF_GATEWAY;
1949         }
1950
1951         if (tb[RTA_DST]) {
1952                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1953
1954                 if (nla_len(tb[RTA_DST]) < plen)
1955                         goto errout;
1956
1957                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1958         }
1959
1960         if (tb[RTA_SRC]) {
1961                 int plen = (rtm->rtm_src_len + 7) >> 3;
1962
1963                 if (nla_len(tb[RTA_SRC]) < plen)
1964                         goto errout;
1965
1966                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1967         }
1968
1969         if (tb[RTA_OIF])
1970                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1971
1972         if (tb[RTA_PRIORITY])
1973                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1974
1975         if (tb[RTA_METRICS]) {
1976                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1977                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1978         }
1979
1980         if (tb[RTA_TABLE])
1981                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1982
1983         err = 0;
1984 errout:
1985         return err;
1986 }
1987
1988 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1989 {
1990         struct fib6_config cfg;
1991         int err;
1992
1993         err = rtm_to_fib6_config(skb, nlh, &cfg);
1994         if (err < 0)
1995                 return err;
1996
1997         return ip6_route_del(&cfg);
1998 }
1999
2000 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2001 {
2002         struct fib6_config cfg;
2003         int err;
2004
2005         err = rtm_to_fib6_config(skb, nlh, &cfg);
2006         if (err < 0)
2007                 return err;
2008
2009         return ip6_route_add(&cfg);
2010 }
2011
2012 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2013                          struct in6_addr *dst, struct in6_addr *src,
2014                          int iif, int type, u32 pid, u32 seq,
2015                          int prefix, unsigned int flags)
2016 {
2017         struct rtmsg *rtm;
2018         struct nlmsghdr *nlh;
2019         struct rta_cacheinfo ci;
2020         u32 table;
2021
2022         if (prefix) {   /* user wants prefix routes only */
2023                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2024                         /* success since this is not a prefix route */
2025                         return 1;
2026                 }
2027         }
2028
2029         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2030         if (nlh == NULL)
2031                 return -ENOBUFS;
2032
2033         rtm = nlmsg_data(nlh);
2034         rtm->rtm_family = AF_INET6;
2035         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2036         rtm->rtm_src_len = rt->rt6i_src.plen;
2037         rtm->rtm_tos = 0;
2038         if (rt->rt6i_table)
2039                 table = rt->rt6i_table->tb6_id;
2040         else
2041                 table = RT6_TABLE_UNSPEC;
2042         rtm->rtm_table = table;
2043         NLA_PUT_U32(skb, RTA_TABLE, table);
2044         if (rt->rt6i_flags&RTF_REJECT)
2045                 rtm->rtm_type = RTN_UNREACHABLE;
2046         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2047                 rtm->rtm_type = RTN_LOCAL;
2048         else
2049                 rtm->rtm_type = RTN_UNICAST;
2050         rtm->rtm_flags = 0;
2051         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2052         rtm->rtm_protocol = rt->rt6i_protocol;
2053         if (rt->rt6i_flags&RTF_DYNAMIC)
2054                 rtm->rtm_protocol = RTPROT_REDIRECT;
2055         else if (rt->rt6i_flags & RTF_ADDRCONF)
2056                 rtm->rtm_protocol = RTPROT_KERNEL;
2057         else if (rt->rt6i_flags&RTF_DEFAULT)
2058                 rtm->rtm_protocol = RTPROT_RA;
2059
2060         if (rt->rt6i_flags&RTF_CACHE)
2061                 rtm->rtm_flags |= RTM_F_CLONED;
2062
2063         if (dst) {
2064                 NLA_PUT(skb, RTA_DST, 16, dst);
2065                 rtm->rtm_dst_len = 128;
2066         } else if (rtm->rtm_dst_len)
2067                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2068 #ifdef CONFIG_IPV6_SUBTREES
2069         if (src) {
2070                 NLA_PUT(skb, RTA_SRC, 16, src);
2071                 rtm->rtm_src_len = 128;
2072         } else if (rtm->rtm_src_len)
2073                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2074 #endif
2075         if (iif)
2076                 NLA_PUT_U32(skb, RTA_IIF, iif);
2077         else if (dst) {
2078                 struct in6_addr saddr_buf;
2079                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2080                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2081         }
2082
2083         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2084                 goto nla_put_failure;
2085
2086         if (rt->u.dst.neighbour)
2087                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2088
2089         if (rt->u.dst.dev)
2090                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2091
2092         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2093         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2094         if (rt->rt6i_expires)
2095                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2096         else
2097                 ci.rta_expires = 0;
2098         ci.rta_used = rt->u.dst.__use;
2099         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2100         ci.rta_error = rt->u.dst.error;
2101         ci.rta_id = 0;
2102         ci.rta_ts = 0;
2103         ci.rta_tsage = 0;
2104         NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2105
2106         return nlmsg_end(skb, nlh);
2107
2108 nla_put_failure:
2109         return nlmsg_cancel(skb, nlh);
2110 }
2111
2112 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2113 {
2114         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2115         int prefix;
2116
2117         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2118                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2119                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2120         } else
2121                 prefix = 0;
2122
2123         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2124                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2125                      prefix, NLM_F_MULTI);
2126 }
2127
2128 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2129 {
2130         struct nlattr *tb[RTA_MAX+1];
2131         struct rt6_info *rt;
2132         struct sk_buff *skb;
2133         struct rtmsg *rtm;
2134         struct flowi fl;
2135         int err, iif = 0;
2136
2137         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2138         if (err < 0)
2139                 goto errout;
2140
2141         err = -EINVAL;
2142         memset(&fl, 0, sizeof(fl));
2143
2144         if (tb[RTA_SRC]) {
2145                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2146                         goto errout;
2147
2148                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2149         }
2150
2151         if (tb[RTA_DST]) {
2152                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2153                         goto errout;
2154
2155                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2156         }
2157
2158         if (tb[RTA_IIF])
2159                 iif = nla_get_u32(tb[RTA_IIF]);
2160
2161         if (tb[RTA_OIF])
2162                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2163
2164         if (iif) {
2165                 struct net_device *dev;
2166                 dev = __dev_get_by_index(iif);
2167                 if (!dev) {
2168                         err = -ENODEV;
2169                         goto errout;
2170                 }
2171         }
2172
2173         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2174         if (skb == NULL) {
2175                 err = -ENOBUFS;
2176                 goto errout;
2177         }
2178
2179         /* Reserve room for dummy headers, this skb can pass
2180            through good chunk of routing engine.
2181          */
2182         skb->mac.raw = skb->data;
2183         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2184
2185         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2186         skb->dst = &rt->u.dst;
2187
2188         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2189                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2190                             nlh->nlmsg_seq, 0, 0);
2191         if (err < 0) {
2192                 kfree_skb(skb);
2193                 goto errout;
2194         }
2195
2196         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2197 errout:
2198         return err;
2199 }
2200
2201 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2202 {
2203         struct sk_buff *skb;
2204         u32 pid = 0, seq = 0;
2205         struct nlmsghdr *nlh = NULL;
2206         int payload = sizeof(struct rtmsg) + 256;
2207         int err = -ENOBUFS;
2208
2209         if (info) {
2210                 pid = info->pid;
2211                 nlh = info->nlh;
2212                 if (nlh)
2213                         seq = nlh->nlmsg_seq;
2214         }
2215
2216         skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2217         if (skb == NULL)
2218                 goto errout;
2219
2220         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2221         if (err < 0) {
2222                 kfree_skb(skb);
2223                 goto errout;
2224         }
2225
2226         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2227 errout:
2228         if (err < 0)
2229                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2230 }
2231
2232 /*
2233  *      /proc
2234  */
2235
2236 #ifdef CONFIG_PROC_FS
2237
2238 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2239
2240 struct rt6_proc_arg
2241 {
2242         char *buffer;
2243         int offset;
2244         int length;
2245         int skip;
2246         int len;
2247 };
2248
2249 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2250 {
2251         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2252         int i;
2253
2254         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2255                 arg->skip++;
2256                 return 0;
2257         }
2258
2259         if (arg->len >= arg->length)
2260                 return 0;
2261
2262         for (i=0; i<16; i++) {
2263                 sprintf(arg->buffer + arg->len, "%02x",
2264                         rt->rt6i_dst.addr.s6_addr[i]);
2265                 arg->len += 2;
2266         }
2267         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2268                             rt->rt6i_dst.plen);
2269
2270 #ifdef CONFIG_IPV6_SUBTREES
2271         for (i=0; i<16; i++) {
2272                 sprintf(arg->buffer + arg->len, "%02x",
2273                         rt->rt6i_src.addr.s6_addr[i]);
2274                 arg->len += 2;
2275         }
2276         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2277                             rt->rt6i_src.plen);
2278 #else
2279         sprintf(arg->buffer + arg->len,
2280                 "00000000000000000000000000000000 00 ");
2281         arg->len += 36;
2282 #endif
2283
2284         if (rt->rt6i_nexthop) {
2285                 for (i=0; i<16; i++) {
2286                         sprintf(arg->buffer + arg->len, "%02x",
2287                                 rt->rt6i_nexthop->primary_key[i]);
2288                         arg->len += 2;
2289                 }
2290         } else {
2291                 sprintf(arg->buffer + arg->len,
2292                         "00000000000000000000000000000000");
2293                 arg->len += 32;
2294         }
2295         arg->len += sprintf(arg->buffer + arg->len,
2296                             " %08x %08x %08x %08x %8s\n",
2297                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2298                             rt->u.dst.__use, rt->rt6i_flags, 
2299                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2300         return 0;
2301 }
2302
2303 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2304 {
2305         struct rt6_proc_arg arg = {
2306                 .buffer = buffer,
2307                 .offset = offset,
2308                 .length = length,
2309         };
2310
2311         fib6_clean_all(rt6_info_route, 0, &arg);
2312
2313         *start = buffer;
2314         if (offset)
2315                 *start += offset % RT6_INFO_LEN;
2316
2317         arg.len -= offset % RT6_INFO_LEN;
2318
2319         if (arg.len > length)
2320                 arg.len = length;
2321         if (arg.len < 0)
2322                 arg.len = 0;
2323
2324         return arg.len;
2325 }
2326
2327 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2328 {
2329         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2330                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2331                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2332                       rt6_stats.fib_rt_cache,
2333                       atomic_read(&ip6_dst_ops.entries),
2334                       rt6_stats.fib_discarded_routes);
2335
2336         return 0;
2337 }
2338
2339 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2340 {
2341         return single_open(file, rt6_stats_seq_show, NULL);
2342 }
2343
2344 static struct file_operations rt6_stats_seq_fops = {
2345         .owner   = THIS_MODULE,
2346         .open    = rt6_stats_seq_open,
2347         .read    = seq_read,
2348         .llseek  = seq_lseek,
2349         .release = single_release,
2350 };
2351 #endif  /* CONFIG_PROC_FS */
2352
2353 #ifdef CONFIG_SYSCTL
2354
2355 static int flush_delay;
2356
2357 static
2358 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2359                               void __user *buffer, size_t *lenp, loff_t *ppos)
2360 {
2361         if (write) {
2362                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2363                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2364                 return 0;
2365         } else
2366                 return -EINVAL;
2367 }
2368
2369 ctl_table ipv6_route_table[] = {
2370         {
2371                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2372                 .procname       =       "flush",
2373                 .data           =       &flush_delay,
2374                 .maxlen         =       sizeof(int),
2375                 .mode           =       0200,
2376                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2377         },
2378         {
2379                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2380                 .procname       =       "gc_thresh",
2381                 .data           =       &ip6_dst_ops.gc_thresh,
2382                 .maxlen         =       sizeof(int),
2383                 .mode           =       0644,
2384                 .proc_handler   =       &proc_dointvec,
2385         },
2386         {
2387                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2388                 .procname       =       "max_size",
2389                 .data           =       &ip6_rt_max_size,
2390                 .maxlen         =       sizeof(int),
2391                 .mode           =       0644,
2392                 .proc_handler   =       &proc_dointvec,
2393         },
2394         {
2395                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2396                 .procname       =       "gc_min_interval",
2397                 .data           =       &ip6_rt_gc_min_interval,
2398                 .maxlen         =       sizeof(int),
2399                 .mode           =       0644,
2400                 .proc_handler   =       &proc_dointvec_jiffies,
2401                 .strategy       =       &sysctl_jiffies,
2402         },
2403         {
2404                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2405                 .procname       =       "gc_timeout",
2406                 .data           =       &ip6_rt_gc_timeout,
2407                 .maxlen         =       sizeof(int),
2408                 .mode           =       0644,
2409                 .proc_handler   =       &proc_dointvec_jiffies,
2410                 .strategy       =       &sysctl_jiffies,
2411         },
2412         {
2413                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2414                 .procname       =       "gc_interval",
2415                 .data           =       &ip6_rt_gc_interval,
2416                 .maxlen         =       sizeof(int),
2417                 .mode           =       0644,
2418                 .proc_handler   =       &proc_dointvec_jiffies,
2419                 .strategy       =       &sysctl_jiffies,
2420         },
2421         {
2422                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2423                 .procname       =       "gc_elasticity",
2424                 .data           =       &ip6_rt_gc_elasticity,
2425                 .maxlen         =       sizeof(int),
2426                 .mode           =       0644,
2427                 .proc_handler   =       &proc_dointvec_jiffies,
2428                 .strategy       =       &sysctl_jiffies,
2429         },
2430         {
2431                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2432                 .procname       =       "mtu_expires",
2433                 .data           =       &ip6_rt_mtu_expires,
2434                 .maxlen         =       sizeof(int),
2435                 .mode           =       0644,
2436                 .proc_handler   =       &proc_dointvec_jiffies,
2437                 .strategy       =       &sysctl_jiffies,
2438         },
2439         {
2440                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2441                 .procname       =       "min_adv_mss",
2442                 .data           =       &ip6_rt_min_advmss,
2443                 .maxlen         =       sizeof(int),
2444                 .mode           =       0644,
2445                 .proc_handler   =       &proc_dointvec_jiffies,
2446                 .strategy       =       &sysctl_jiffies,
2447         },
2448         {
2449                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2450                 .procname       =       "gc_min_interval_ms",
2451                 .data           =       &ip6_rt_gc_min_interval,
2452                 .maxlen         =       sizeof(int),
2453                 .mode           =       0644,
2454                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2455                 .strategy       =       &sysctl_ms_jiffies,
2456         },
2457         { .ctl_name = 0 }
2458 };
2459
2460 #endif
2461
2462 void __init ip6_route_init(void)
2463 {
2464         struct proc_dir_entry *p;
2465
2466         ip6_dst_ops.kmem_cachep =
2467                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2468                                   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2469         fib6_init();
2470 #ifdef  CONFIG_PROC_FS
2471         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2472         if (p)
2473                 p->owner = THIS_MODULE;
2474
2475         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2476 #endif
2477 #ifdef CONFIG_XFRM
2478         xfrm6_init();
2479 #endif
2480 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2481         fib6_rules_init();
2482 #endif
2483 }
2484
2485 void ip6_route_cleanup(void)
2486 {
2487 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2488         fib6_rules_cleanup();
2489 #endif
2490 #ifdef CONFIG_PROC_FS
2491         proc_net_remove("ipv6_route");
2492         proc_net_remove("rt6_stats");
2493 #endif
2494 #ifdef CONFIG_XFRM
2495         xfrm6_fini();
2496 #endif
2497         rt6_ifdown(NULL);
2498         fib6_gc_cleanup();
2499         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2500 }