]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/route.c
[IPV6] NDISC: Search subtrees when backtracking on receipt of redirects.
[net-next-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/if_arp.h>
39
40 #ifdef  CONFIG_PROC_FS
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #endif
44
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103                                            struct in6_addr *gwaddr, int ifindex,
104                                            unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106                                            struct in6_addr *gwaddr, int ifindex);
107 #endif
108
109 static struct dst_ops ip6_dst_ops = {
110         .family                 =       AF_INET6,
111         .protocol               =       __constant_htons(ETH_P_IPV6),
112         .gc                     =       ip6_dst_gc,
113         .gc_thresh              =       1024,
114         .check                  =       ip6_dst_check,
115         .destroy                =       ip6_dst_destroy,
116         .ifdown                 =       ip6_dst_ifdown,
117         .negative_advice        =       ip6_negative_advice,
118         .link_failure           =       ip6_link_failure,
119         .update_pmtu            =       ip6_rt_update_pmtu,
120         .entry_size             =       sizeof(struct rt6_info),
121 };
122
123 struct rt6_info ip6_null_entry = {
124         .u = {
125                 .dst = {
126                         .__refcnt       = ATOMIC_INIT(1),
127                         .__use          = 1,
128                         .dev            = &loopback_dev,
129                         .obsolete       = -1,
130                         .error          = -ENETUNREACH,
131                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
132                         .input          = ip6_pkt_discard,
133                         .output         = ip6_pkt_discard_out,
134                         .ops            = &ip6_dst_ops,
135                         .path           = (struct dst_entry*)&ip6_null_entry,
136                 }
137         },
138         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
139         .rt6i_metric    = ~(u32) 0,
140         .rt6i_ref       = ATOMIC_INIT(1),
141 };
142
143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145 struct rt6_info ip6_prohibit_entry = {
146         .u = {
147                 .dst = {
148                         .__refcnt       = ATOMIC_INIT(1),
149                         .__use          = 1,
150                         .dev            = &loopback_dev,
151                         .obsolete       = -1,
152                         .error          = -EACCES,
153                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
154                         .input          = ip6_pkt_discard,
155                         .output         = ip6_pkt_discard_out,
156                         .ops            = &ip6_dst_ops,
157                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
158                 }
159         },
160         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
161         .rt6i_metric    = ~(u32) 0,
162         .rt6i_ref       = ATOMIC_INIT(1),
163 };
164
165 struct rt6_info ip6_blk_hole_entry = {
166         .u = {
167                 .dst = {
168                         .__refcnt       = ATOMIC_INIT(1),
169                         .__use          = 1,
170                         .dev            = &loopback_dev,
171                         .obsolete       = -1,
172                         .error          = -EINVAL,
173                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
174                         .input          = ip6_pkt_discard,
175                         .output         = ip6_pkt_discard_out,
176                         .ops            = &ip6_dst_ops,
177                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
178                 }
179         },
180         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
181         .rt6i_metric    = ~(u32) 0,
182         .rt6i_ref       = ATOMIC_INIT(1),
183 };
184
185 #endif
186
187 /* allocate dst with ip6_dst_ops */
188 static __inline__ struct rt6_info *ip6_dst_alloc(void)
189 {
190         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191 }
192
193 static void ip6_dst_destroy(struct dst_entry *dst)
194 {
195         struct rt6_info *rt = (struct rt6_info *)dst;
196         struct inet6_dev *idev = rt->rt6i_idev;
197
198         if (idev != NULL) {
199                 rt->rt6i_idev = NULL;
200                 in6_dev_put(idev);
201         }       
202 }
203
204 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205                            int how)
206 {
207         struct rt6_info *rt = (struct rt6_info *)dst;
208         struct inet6_dev *idev = rt->rt6i_idev;
209
210         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212                 if (loopback_idev != NULL) {
213                         rt->rt6i_idev = loopback_idev;
214                         in6_dev_put(idev);
215                 }
216         }
217 }
218
219 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220 {
221         return (rt->rt6i_flags & RTF_EXPIRES &&
222                 time_after(jiffies, rt->rt6i_expires));
223 }
224
225 static inline int rt6_need_strict(struct in6_addr *daddr)
226 {
227         return (ipv6_addr_type(daddr) &
228                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229 }
230
231 /*
232  *      Route lookup. Any table->tb6_lock is implied.
233  */
234
235 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236                                                     int oif,
237                                                     int strict)
238 {
239         struct rt6_info *local = NULL;
240         struct rt6_info *sprt;
241
242         if (oif) {
243                 for (sprt = rt; sprt; sprt = sprt->u.next) {
244                         struct net_device *dev = sprt->rt6i_dev;
245                         if (dev->ifindex == oif)
246                                 return sprt;
247                         if (dev->flags & IFF_LOOPBACK) {
248                                 if (sprt->rt6i_idev == NULL ||
249                                     sprt->rt6i_idev->dev->ifindex != oif) {
250                                         if (strict && oif)
251                                                 continue;
252                                         if (local && (!oif || 
253                                                       local->rt6i_idev->dev->ifindex == oif))
254                                                 continue;
255                                 }
256                                 local = sprt;
257                         }
258                 }
259
260                 if (local)
261                         return local;
262
263                 if (strict)
264                         return &ip6_null_entry;
265         }
266         return rt;
267 }
268
269 #ifdef CONFIG_IPV6_ROUTER_PREF
270 static void rt6_probe(struct rt6_info *rt)
271 {
272         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273         /*
274          * Okay, this does not seem to be appropriate
275          * for now, however, we need to check if it
276          * is really so; aka Router Reachability Probing.
277          *
278          * Router Reachability Probe MUST be rate-limited
279          * to no more than one per minute.
280          */
281         if (!neigh || (neigh->nud_state & NUD_VALID))
282                 return;
283         read_lock_bh(&neigh->lock);
284         if (!(neigh->nud_state & NUD_VALID) &&
285             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
286                 struct in6_addr mcaddr;
287                 struct in6_addr *target;
288
289                 neigh->updated = jiffies;
290                 read_unlock_bh(&neigh->lock);
291
292                 target = (struct in6_addr *)&neigh->primary_key;
293                 addrconf_addr_solict_mult(target, &mcaddr);
294                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295         } else
296                 read_unlock_bh(&neigh->lock);
297 }
298 #else
299 static inline void rt6_probe(struct rt6_info *rt)
300 {
301         return;
302 }
303 #endif
304
305 /*
306  * Default Router Selection (RFC 2461 6.3.6)
307  */
308 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
309 {
310         struct net_device *dev = rt->rt6i_dev;
311         if (!oif || dev->ifindex == oif)
312                 return 2;
313         if ((dev->flags & IFF_LOOPBACK) &&
314             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315                 return 1;
316         return 0;
317 }
318
319 static int inline rt6_check_neigh(struct rt6_info *rt)
320 {
321         struct neighbour *neigh = rt->rt6i_nexthop;
322         int m = 0;
323         if (rt->rt6i_flags & RTF_NONEXTHOP ||
324             !(rt->rt6i_flags & RTF_GATEWAY))
325                 m = 1;
326         else if (neigh) {
327                 read_lock_bh(&neigh->lock);
328                 if (neigh->nud_state & NUD_VALID)
329                         m = 2;
330                 read_unlock_bh(&neigh->lock);
331         }
332         return m;
333 }
334
335 static int rt6_score_route(struct rt6_info *rt, int oif,
336                            int strict)
337 {
338         int m, n;
339                 
340         m = rt6_check_dev(rt, oif);
341         if (!m && (strict & RT6_SELECT_F_IFACE))
342                 return -1;
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345 #endif
346         n = rt6_check_neigh(rt);
347         if (n > 1)
348                 m |= 16;
349         else if (!n && strict & RT6_SELECT_F_REACHABLE)
350                 return -1;
351         return m;
352 }
353
354 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355                                    int strict)
356 {
357         struct rt6_info *match = NULL, *last = NULL;
358         struct rt6_info *rt, *rt0 = *head;
359         u32 metric;
360         int mpri = -1;
361
362         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363                   __FUNCTION__, head, head ? *head : NULL, oif);
364
365         for (rt = rt0, metric = rt0->rt6i_metric;
366              rt && rt->rt6i_metric == metric && (!last || rt != rt0);
367              rt = rt->u.next) {
368                 int m;
369
370                 if (rt6_check_expired(rt))
371                         continue;
372
373                 last = rt;
374
375                 m = rt6_score_route(rt, oif, strict);
376                 if (m < 0)
377                         continue;
378
379                 if (m > mpri) {
380                         rt6_probe(match);
381                         match = rt;
382                         mpri = m;
383                 } else {
384                         rt6_probe(rt);
385                 }
386         }
387
388         if (!match &&
389             (strict & RT6_SELECT_F_REACHABLE) &&
390             last && last != rt0) {
391                 /* no entries matched; do round-robin */
392                 static DEFINE_SPINLOCK(lock);
393                 spin_lock(&lock);
394                 *head = rt0->u.next;
395                 rt0->u.next = last->u.next;
396                 last->u.next = rt0;
397                 spin_unlock(&lock);
398         }
399
400         RT6_TRACE("%s() => %p, score=%d\n",
401                   __FUNCTION__, match, mpri);
402
403         return (match ? match : &ip6_null_entry);
404 }
405
406 #ifdef CONFIG_IPV6_ROUTE_INFO
407 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408                   struct in6_addr *gwaddr)
409 {
410         struct route_info *rinfo = (struct route_info *) opt;
411         struct in6_addr prefix_buf, *prefix;
412         unsigned int pref;
413         u32 lifetime;
414         struct rt6_info *rt;
415
416         if (len < sizeof(struct route_info)) {
417                 return -EINVAL;
418         }
419
420         /* Sanity check for prefix_len and length */
421         if (rinfo->length > 3) {
422                 return -EINVAL;
423         } else if (rinfo->prefix_len > 128) {
424                 return -EINVAL;
425         } else if (rinfo->prefix_len > 64) {
426                 if (rinfo->length < 2) {
427                         return -EINVAL;
428                 }
429         } else if (rinfo->prefix_len > 0) {
430                 if (rinfo->length < 1) {
431                         return -EINVAL;
432                 }
433         }
434
435         pref = rinfo->route_pref;
436         if (pref == ICMPV6_ROUTER_PREF_INVALID)
437                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439         lifetime = htonl(rinfo->lifetime);
440         if (lifetime == 0xffffffff) {
441                 /* infinity */
442         } else if (lifetime > 0x7fffffff/HZ) {
443                 /* Avoid arithmetic overflow */
444                 lifetime = 0x7fffffff/HZ - 1;
445         }
446
447         if (rinfo->length == 3)
448                 prefix = (struct in6_addr *)rinfo->prefix;
449         else {
450                 /* this function is safe */
451                 ipv6_addr_prefix(&prefix_buf,
452                                  (struct in6_addr *)rinfo->prefix,
453                                  rinfo->prefix_len);
454                 prefix = &prefix_buf;
455         }
456
457         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459         if (rt && !lifetime) {
460                 ip6_del_rt(rt);
461                 rt = NULL;
462         }
463
464         if (!rt && lifetime)
465                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466                                         pref);
467         else if (rt)
468                 rt->rt6i_flags = RTF_ROUTEINFO |
469                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471         if (rt) {
472                 if (lifetime == 0xffffffff) {
473                         rt->rt6i_flags &= ~RTF_EXPIRES;
474                 } else {
475                         rt->rt6i_expires = jiffies + HZ * lifetime;
476                         rt->rt6i_flags |= RTF_EXPIRES;
477                 }
478                 dst_release(&rt->u.dst);
479         }
480         return 0;
481 }
482 #endif
483
484 #define BACKTRACK(saddr) \
485 do { \
486         if (rt == &ip6_null_entry) { \
487                 struct fib6_node *pn; \
488                 while (fn) { \
489                         if (fn->fn_flags & RTN_TL_ROOT) \
490                                 goto out; \
491                         pn = fn->parent; \
492                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
493                                 fn = fib6_lookup(pn->subtree, NULL, saddr); \
494                         else \
495                                 fn = pn; \
496                         if (fn->fn_flags & RTN_RTINFO) \
497                                 goto restart; \
498                 } \
499         } \
500 } while(0)
501
502 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
503                                              struct flowi *fl, int flags)
504 {
505         struct fib6_node *fn;
506         struct rt6_info *rt;
507
508         read_lock_bh(&table->tb6_lock);
509         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
510 restart:
511         rt = fn->leaf;
512         rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
513         BACKTRACK(&fl->fl6_src);
514         dst_hold(&rt->u.dst);
515 out:
516         read_unlock_bh(&table->tb6_lock);
517
518         rt->u.dst.lastuse = jiffies;
519         rt->u.dst.__use++;
520
521         return rt;
522
523 }
524
525 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
526                             int oif, int strict)
527 {
528         struct flowi fl = {
529                 .oif = oif,
530                 .nl_u = {
531                         .ip6_u = {
532                                 .daddr = *daddr,
533                                 /* TODO: saddr */
534                         },
535                 },
536         };
537         struct dst_entry *dst;
538         int flags = strict ? RT6_F_STRICT : 0;
539
540         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
541         if (dst->error == 0)
542                 return (struct rt6_info *) dst;
543
544         dst_release(dst);
545
546         return NULL;
547 }
548
549 /* ip6_ins_rt is called with FREE table->tb6_lock.
550    It takes new route entry, the addition fails by any reason the
551    route is freed. In any case, if caller does not hold it, it may
552    be destroyed.
553  */
554
555 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
556 {
557         int err;
558         struct fib6_table *table;
559
560         table = rt->rt6i_table;
561         write_lock_bh(&table->tb6_lock);
562         err = fib6_add(&table->tb6_root, rt, info);
563         write_unlock_bh(&table->tb6_lock);
564
565         return err;
566 }
567
568 int ip6_ins_rt(struct rt6_info *rt)
569 {
570         return __ip6_ins_rt(rt, NULL);
571 }
572
573 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
574                                       struct in6_addr *saddr)
575 {
576         struct rt6_info *rt;
577
578         /*
579          *      Clone the route.
580          */
581
582         rt = ip6_rt_copy(ort);
583
584         if (rt) {
585                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
586                         if (rt->rt6i_dst.plen != 128 &&
587                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
588                                 rt->rt6i_flags |= RTF_ANYCAST;
589                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
590                 }
591
592                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
593                 rt->rt6i_dst.plen = 128;
594                 rt->rt6i_flags |= RTF_CACHE;
595                 rt->u.dst.flags |= DST_HOST;
596
597 #ifdef CONFIG_IPV6_SUBTREES
598                 if (rt->rt6i_src.plen && saddr) {
599                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
600                         rt->rt6i_src.plen = 128;
601                 }
602 #endif
603
604                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
605
606         }
607
608         return rt;
609 }
610
611 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
612 {
613         struct rt6_info *rt = ip6_rt_copy(ort);
614         if (rt) {
615                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
616                 rt->rt6i_dst.plen = 128;
617                 rt->rt6i_flags |= RTF_CACHE;
618                 if (rt->rt6i_flags & RTF_REJECT)
619                         rt->u.dst.error = ort->u.dst.error;
620                 rt->u.dst.flags |= DST_HOST;
621                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
622         }
623         return rt;
624 }
625
626 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
627                                             struct flowi *fl, int flags)
628 {
629         struct fib6_node *fn;
630         struct rt6_info *rt, *nrt;
631         int strict = 0;
632         int attempts = 3;
633         int err;
634         int reachable = RT6_SELECT_F_REACHABLE;
635
636         if (flags & RT6_F_STRICT)
637                 strict = RT6_SELECT_F_IFACE;
638
639 relookup:
640         read_lock_bh(&table->tb6_lock);
641
642 restart_2:
643         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
644
645 restart:
646         rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
647         BACKTRACK(&fl->fl6_src);
648         if (rt == &ip6_null_entry ||
649             rt->rt6i_flags & RTF_CACHE)
650                 goto out;
651
652         dst_hold(&rt->u.dst);
653         read_unlock_bh(&table->tb6_lock);
654
655         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
656                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
657         else {
658 #if CLONE_OFFLINK_ROUTE
659                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
660 #else
661                 goto out2;
662 #endif
663         }
664
665         dst_release(&rt->u.dst);
666         rt = nrt ? : &ip6_null_entry;
667
668         dst_hold(&rt->u.dst);
669         if (nrt) {
670                 err = ip6_ins_rt(nrt);
671                 if (!err)
672                         goto out2;
673         }
674
675         if (--attempts <= 0)
676                 goto out2;
677
678         /*
679          * Race condition! In the gap, when table->tb6_lock was
680          * released someone could insert this route.  Relookup.
681          */
682         dst_release(&rt->u.dst);
683         goto relookup;
684
685 out:
686         if (reachable) {
687                 reachable = 0;
688                 goto restart_2;
689         }
690         dst_hold(&rt->u.dst);
691         read_unlock_bh(&table->tb6_lock);
692 out2:
693         rt->u.dst.lastuse = jiffies;
694         rt->u.dst.__use++;
695
696         return rt;
697 }
698
699 void ip6_route_input(struct sk_buff *skb)
700 {
701         struct ipv6hdr *iph = skb->nh.ipv6h;
702         struct flowi fl = {
703                 .iif = skb->dev->ifindex,
704                 .nl_u = {
705                         .ip6_u = {
706                                 .daddr = iph->daddr,
707                                 .saddr = iph->saddr,
708                                 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
709                         },
710                 },
711                 .proto = iph->nexthdr,
712         };
713         int flags = 0;
714
715         if (rt6_need_strict(&iph->daddr))
716                 flags |= RT6_F_STRICT;
717
718         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
719 }
720
721 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
722                                              struct flowi *fl, int flags)
723 {
724         struct fib6_node *fn;
725         struct rt6_info *rt, *nrt;
726         int strict = 0;
727         int attempts = 3;
728         int err;
729         int reachable = RT6_SELECT_F_REACHABLE;
730
731         if (flags & RT6_F_STRICT)
732                 strict = RT6_SELECT_F_IFACE;
733
734 relookup:
735         read_lock_bh(&table->tb6_lock);
736
737 restart_2:
738         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
739
740 restart:
741         rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
742         BACKTRACK(&fl->fl6_src);
743         if (rt == &ip6_null_entry ||
744             rt->rt6i_flags & RTF_CACHE)
745                 goto out;
746
747         dst_hold(&rt->u.dst);
748         read_unlock_bh(&table->tb6_lock);
749
750         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
751                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
752         else {
753 #if CLONE_OFFLINK_ROUTE
754                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
755 #else
756                 goto out2;
757 #endif
758         }
759
760         dst_release(&rt->u.dst);
761         rt = nrt ? : &ip6_null_entry;
762
763         dst_hold(&rt->u.dst);
764         if (nrt) {
765                 err = ip6_ins_rt(nrt);
766                 if (!err)
767                         goto out2;
768         }
769
770         if (--attempts <= 0)
771                 goto out2;
772
773         /*
774          * Race condition! In the gap, when table->tb6_lock was
775          * released someone could insert this route.  Relookup.
776          */
777         dst_release(&rt->u.dst);
778         goto relookup;
779
780 out:
781         if (reachable) {
782                 reachable = 0;
783                 goto restart_2;
784         }
785         dst_hold(&rt->u.dst);
786         read_unlock_bh(&table->tb6_lock);
787 out2:
788         rt->u.dst.lastuse = jiffies;
789         rt->u.dst.__use++;
790         return rt;
791 }
792
793 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
794 {
795         int flags = 0;
796
797         if (rt6_need_strict(&fl->fl6_dst))
798                 flags |= RT6_F_STRICT;
799
800         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
801 }
802
803
804 /*
805  *      Destination cache support functions
806  */
807
808 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
809 {
810         struct rt6_info *rt;
811
812         rt = (struct rt6_info *) dst;
813
814         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
815                 return dst;
816
817         return NULL;
818 }
819
820 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
821 {
822         struct rt6_info *rt = (struct rt6_info *) dst;
823
824         if (rt) {
825                 if (rt->rt6i_flags & RTF_CACHE)
826                         ip6_del_rt(rt);
827                 else
828                         dst_release(dst);
829         }
830         return NULL;
831 }
832
833 static void ip6_link_failure(struct sk_buff *skb)
834 {
835         struct rt6_info *rt;
836
837         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
838
839         rt = (struct rt6_info *) skb->dst;
840         if (rt) {
841                 if (rt->rt6i_flags&RTF_CACHE) {
842                         dst_set_expires(&rt->u.dst, 0);
843                         rt->rt6i_flags |= RTF_EXPIRES;
844                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
845                         rt->rt6i_node->fn_sernum = -1;
846         }
847 }
848
849 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
850 {
851         struct rt6_info *rt6 = (struct rt6_info*)dst;
852
853         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
854                 rt6->rt6i_flags |= RTF_MODIFIED;
855                 if (mtu < IPV6_MIN_MTU) {
856                         mtu = IPV6_MIN_MTU;
857                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
858                 }
859                 dst->metrics[RTAX_MTU-1] = mtu;
860                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
861         }
862 }
863
864 static int ipv6_get_mtu(struct net_device *dev);
865
866 static inline unsigned int ipv6_advmss(unsigned int mtu)
867 {
868         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
869
870         if (mtu < ip6_rt_min_advmss)
871                 mtu = ip6_rt_min_advmss;
872
873         /*
874          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
875          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
876          * IPV6_MAXPLEN is also valid and means: "any MSS, 
877          * rely only on pmtu discovery"
878          */
879         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
880                 mtu = IPV6_MAXPLEN;
881         return mtu;
882 }
883
884 static struct dst_entry *ndisc_dst_gc_list;
885 static DEFINE_SPINLOCK(ndisc_lock);
886
887 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
888                                   struct neighbour *neigh,
889                                   struct in6_addr *addr,
890                                   int (*output)(struct sk_buff *))
891 {
892         struct rt6_info *rt;
893         struct inet6_dev *idev = in6_dev_get(dev);
894
895         if (unlikely(idev == NULL))
896                 return NULL;
897
898         rt = ip6_dst_alloc();
899         if (unlikely(rt == NULL)) {
900                 in6_dev_put(idev);
901                 goto out;
902         }
903
904         dev_hold(dev);
905         if (neigh)
906                 neigh_hold(neigh);
907         else
908                 neigh = ndisc_get_neigh(dev, addr);
909
910         rt->rt6i_dev      = dev;
911         rt->rt6i_idev     = idev;
912         rt->rt6i_nexthop  = neigh;
913         atomic_set(&rt->u.dst.__refcnt, 1);
914         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
915         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
916         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
917         rt->u.dst.output  = output;
918
919 #if 0   /* there's no chance to use these for ndisc */
920         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
921                                 ? DST_HOST 
922                                 : 0;
923         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
924         rt->rt6i_dst.plen = 128;
925 #endif
926
927         spin_lock_bh(&ndisc_lock);
928         rt->u.dst.next = ndisc_dst_gc_list;
929         ndisc_dst_gc_list = &rt->u.dst;
930         spin_unlock_bh(&ndisc_lock);
931
932         fib6_force_start_gc();
933
934 out:
935         return (struct dst_entry *)rt;
936 }
937
938 int ndisc_dst_gc(int *more)
939 {
940         struct dst_entry *dst, *next, **pprev;
941         int freed;
942
943         next = NULL;
944         freed = 0;
945
946         spin_lock_bh(&ndisc_lock);
947         pprev = &ndisc_dst_gc_list;
948
949         while ((dst = *pprev) != NULL) {
950                 if (!atomic_read(&dst->__refcnt)) {
951                         *pprev = dst->next;
952                         dst_free(dst);
953                         freed++;
954                 } else {
955                         pprev = &dst->next;
956                         (*more)++;
957                 }
958         }
959
960         spin_unlock_bh(&ndisc_lock);
961
962         return freed;
963 }
964
965 static int ip6_dst_gc(void)
966 {
967         static unsigned expire = 30*HZ;
968         static unsigned long last_gc;
969         unsigned long now = jiffies;
970
971         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
972             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
973                 goto out;
974
975         expire++;
976         fib6_run_gc(expire);
977         last_gc = now;
978         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
979                 expire = ip6_rt_gc_timeout>>1;
980
981 out:
982         expire -= expire>>ip6_rt_gc_elasticity;
983         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
984 }
985
986 /* Clean host part of a prefix. Not necessary in radix tree,
987    but results in cleaner routing tables.
988
989    Remove it only when all the things will work!
990  */
991
992 static int ipv6_get_mtu(struct net_device *dev)
993 {
994         int mtu = IPV6_MIN_MTU;
995         struct inet6_dev *idev;
996
997         idev = in6_dev_get(dev);
998         if (idev) {
999                 mtu = idev->cnf.mtu6;
1000                 in6_dev_put(idev);
1001         }
1002         return mtu;
1003 }
1004
1005 int ipv6_get_hoplimit(struct net_device *dev)
1006 {
1007         int hoplimit = ipv6_devconf.hop_limit;
1008         struct inet6_dev *idev;
1009
1010         idev = in6_dev_get(dev);
1011         if (idev) {
1012                 hoplimit = idev->cnf.hop_limit;
1013                 in6_dev_put(idev);
1014         }
1015         return hoplimit;
1016 }
1017
1018 /*
1019  *
1020  */
1021
1022 int ip6_route_add(struct fib6_config *cfg)
1023 {
1024         int err;
1025         struct rt6_info *rt = NULL;
1026         struct net_device *dev = NULL;
1027         struct inet6_dev *idev = NULL;
1028         struct fib6_table *table;
1029         int addr_type;
1030
1031         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1032                 return -EINVAL;
1033 #ifndef CONFIG_IPV6_SUBTREES
1034         if (cfg->fc_src_len)
1035                 return -EINVAL;
1036 #endif
1037         if (cfg->fc_ifindex) {
1038                 err = -ENODEV;
1039                 dev = dev_get_by_index(cfg->fc_ifindex);
1040                 if (!dev)
1041                         goto out;
1042                 idev = in6_dev_get(dev);
1043                 if (!idev)
1044                         goto out;
1045         }
1046
1047         if (cfg->fc_metric == 0)
1048                 cfg->fc_metric = IP6_RT_PRIO_USER;
1049
1050         table = fib6_new_table(cfg->fc_table);
1051         if (table == NULL) {
1052                 err = -ENOBUFS;
1053                 goto out;
1054         }
1055
1056         rt = ip6_dst_alloc();
1057
1058         if (rt == NULL) {
1059                 err = -ENOMEM;
1060                 goto out;
1061         }
1062
1063         rt->u.dst.obsolete = -1;
1064         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1065
1066         if (cfg->fc_protocol == RTPROT_UNSPEC)
1067                 cfg->fc_protocol = RTPROT_BOOT;
1068         rt->rt6i_protocol = cfg->fc_protocol;
1069
1070         addr_type = ipv6_addr_type(&cfg->fc_dst);
1071
1072         if (addr_type & IPV6_ADDR_MULTICAST)
1073                 rt->u.dst.input = ip6_mc_input;
1074         else
1075                 rt->u.dst.input = ip6_forward;
1076
1077         rt->u.dst.output = ip6_output;
1078
1079         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1080         rt->rt6i_dst.plen = cfg->fc_dst_len;
1081         if (rt->rt6i_dst.plen == 128)
1082                rt->u.dst.flags = DST_HOST;
1083
1084 #ifdef CONFIG_IPV6_SUBTREES
1085         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1086         rt->rt6i_src.plen = cfg->fc_src_len;
1087 #endif
1088
1089         rt->rt6i_metric = cfg->fc_metric;
1090
1091         /* We cannot add true routes via loopback here,
1092            they would result in kernel looping; promote them to reject routes
1093          */
1094         if ((cfg->fc_flags & RTF_REJECT) ||
1095             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1096                 /* hold loopback dev/idev if we haven't done so. */
1097                 if (dev != &loopback_dev) {
1098                         if (dev) {
1099                                 dev_put(dev);
1100                                 in6_dev_put(idev);
1101                         }
1102                         dev = &loopback_dev;
1103                         dev_hold(dev);
1104                         idev = in6_dev_get(dev);
1105                         if (!idev) {
1106                                 err = -ENODEV;
1107                                 goto out;
1108                         }
1109                 }
1110                 rt->u.dst.output = ip6_pkt_discard_out;
1111                 rt->u.dst.input = ip6_pkt_discard;
1112                 rt->u.dst.error = -ENETUNREACH;
1113                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1114                 goto install_route;
1115         }
1116
1117         if (cfg->fc_flags & RTF_GATEWAY) {
1118                 struct in6_addr *gw_addr;
1119                 int gwa_type;
1120
1121                 gw_addr = &cfg->fc_gateway;
1122                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1123                 gwa_type = ipv6_addr_type(gw_addr);
1124
1125                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1126                         struct rt6_info *grt;
1127
1128                         /* IPv6 strictly inhibits using not link-local
1129                            addresses as nexthop address.
1130                            Otherwise, router will not able to send redirects.
1131                            It is very good, but in some (rare!) circumstances
1132                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1133                            some exceptions. --ANK
1134                          */
1135                         err = -EINVAL;
1136                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1137                                 goto out;
1138
1139                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1140
1141                         err = -EHOSTUNREACH;
1142                         if (grt == NULL)
1143                                 goto out;
1144                         if (dev) {
1145                                 if (dev != grt->rt6i_dev) {
1146                                         dst_release(&grt->u.dst);
1147                                         goto out;
1148                                 }
1149                         } else {
1150                                 dev = grt->rt6i_dev;
1151                                 idev = grt->rt6i_idev;
1152                                 dev_hold(dev);
1153                                 in6_dev_hold(grt->rt6i_idev);
1154                         }
1155                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1156                                 err = 0;
1157                         dst_release(&grt->u.dst);
1158
1159                         if (err)
1160                                 goto out;
1161                 }
1162                 err = -EINVAL;
1163                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1164                         goto out;
1165         }
1166
1167         err = -ENODEV;
1168         if (dev == NULL)
1169                 goto out;
1170
1171         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1172                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1173                 if (IS_ERR(rt->rt6i_nexthop)) {
1174                         err = PTR_ERR(rt->rt6i_nexthop);
1175                         rt->rt6i_nexthop = NULL;
1176                         goto out;
1177                 }
1178         }
1179
1180         rt->rt6i_flags = cfg->fc_flags;
1181
1182 install_route:
1183         if (cfg->fc_mx) {
1184                 struct nlattr *nla;
1185                 int remaining;
1186
1187                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1188                         int type = nla->nla_type;
1189
1190                         if (type) {
1191                                 if (type > RTAX_MAX) {
1192                                         err = -EINVAL;
1193                                         goto out;
1194                                 }
1195
1196                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1197                         }
1198                 }
1199         }
1200
1201         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1202                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1203         if (!rt->u.dst.metrics[RTAX_MTU-1])
1204                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1205         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1206                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1207         rt->u.dst.dev = dev;
1208         rt->rt6i_idev = idev;
1209         rt->rt6i_table = table;
1210         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1211
1212 out:
1213         if (dev)
1214                 dev_put(dev);
1215         if (idev)
1216                 in6_dev_put(idev);
1217         if (rt)
1218                 dst_free((struct dst_entry *) rt);
1219         return err;
1220 }
1221
1222 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1223 {
1224         int err;
1225         struct fib6_table *table;
1226
1227         if (rt == &ip6_null_entry)
1228                 return -ENOENT;
1229
1230         table = rt->rt6i_table;
1231         write_lock_bh(&table->tb6_lock);
1232
1233         err = fib6_del(rt, info);
1234         dst_release(&rt->u.dst);
1235
1236         write_unlock_bh(&table->tb6_lock);
1237
1238         return err;
1239 }
1240
1241 int ip6_del_rt(struct rt6_info *rt)
1242 {
1243         return __ip6_del_rt(rt, NULL);
1244 }
1245
1246 static int ip6_route_del(struct fib6_config *cfg)
1247 {
1248         struct fib6_table *table;
1249         struct fib6_node *fn;
1250         struct rt6_info *rt;
1251         int err = -ESRCH;
1252
1253         table = fib6_get_table(cfg->fc_table);
1254         if (table == NULL)
1255                 return err;
1256
1257         read_lock_bh(&table->tb6_lock);
1258
1259         fn = fib6_locate(&table->tb6_root,
1260                          &cfg->fc_dst, cfg->fc_dst_len,
1261                          &cfg->fc_src, cfg->fc_src_len);
1262         
1263         if (fn) {
1264                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1265                         if (cfg->fc_ifindex &&
1266                             (rt->rt6i_dev == NULL ||
1267                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1268                                 continue;
1269                         if (cfg->fc_flags & RTF_GATEWAY &&
1270                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1271                                 continue;
1272                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1273                                 continue;
1274                         dst_hold(&rt->u.dst);
1275                         read_unlock_bh(&table->tb6_lock);
1276
1277                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1278                 }
1279         }
1280         read_unlock_bh(&table->tb6_lock);
1281
1282         return err;
1283 }
1284
1285 /*
1286  *      Handle redirects
1287  */
1288 struct ip6rd_flowi {
1289         struct flowi fl;
1290         struct in6_addr gateway;
1291 };
1292
1293 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1294                                              struct flowi *fl,
1295                                              int flags)
1296 {
1297         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1298         struct rt6_info *rt;
1299         struct fib6_node *fn;
1300
1301         /*
1302          * Get the "current" route for this destination and
1303          * check if the redirect has come from approriate router.
1304          *
1305          * RFC 2461 specifies that redirects should only be
1306          * accepted if they come from the nexthop to the target.
1307          * Due to the way the routes are chosen, this notion
1308          * is a bit fuzzy and one might need to check all possible
1309          * routes.
1310          */
1311
1312         read_lock_bh(&table->tb6_lock);
1313         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1314 restart:
1315         for (rt = fn->leaf; rt; rt = rt->u.next) {
1316                 /*
1317                  * Current route is on-link; redirect is always invalid.
1318                  *
1319                  * Seems, previous statement is not true. It could
1320                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1321                  * But then router serving it might decide, that we should
1322                  * know truth 8)8) --ANK (980726).
1323                  */
1324                 if (rt6_check_expired(rt))
1325                         continue;
1326                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1327                         continue;
1328                 if (fl->oif != rt->rt6i_dev->ifindex)
1329                         continue;
1330                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1331                         continue;
1332                 break;
1333         }
1334
1335         if (!rt)
1336                 rt = &ip6_null_entry;
1337         BACKTRACK(&fl->fl6_src);
1338 out:
1339         dst_hold(&rt->u.dst);
1340
1341         read_unlock_bh(&table->tb6_lock);
1342
1343         return rt;
1344 };
1345
1346 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1347                                            struct in6_addr *src,
1348                                            struct in6_addr *gateway,
1349                                            struct net_device *dev)
1350 {
1351         struct ip6rd_flowi rdfl = {
1352                 .fl = {
1353                         .oif = dev->ifindex,
1354                         .nl_u = {
1355                                 .ip6_u = {
1356                                         .daddr = *dest,
1357                                         .saddr = *src,
1358                                 },
1359                         },
1360                 },
1361                 .gateway = *gateway,
1362         };
1363         int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0;
1364
1365         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1366 }
1367
1368 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1369                   struct in6_addr *saddr,
1370                   struct neighbour *neigh, u8 *lladdr, int on_link)
1371 {
1372         struct rt6_info *rt, *nrt = NULL;
1373         struct netevent_redirect netevent;
1374
1375         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1376
1377         if (rt == &ip6_null_entry) {
1378                 if (net_ratelimit())
1379                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1380                                "for redirect target\n");
1381                 goto out;
1382         }
1383
1384         /*
1385          *      We have finally decided to accept it.
1386          */
1387
1388         neigh_update(neigh, lladdr, NUD_STALE, 
1389                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1390                      NEIGH_UPDATE_F_OVERRIDE|
1391                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1392                                      NEIGH_UPDATE_F_ISROUTER))
1393                      );
1394
1395         /*
1396          * Redirect received -> path was valid.
1397          * Look, redirects are sent only in response to data packets,
1398          * so that this nexthop apparently is reachable. --ANK
1399          */
1400         dst_confirm(&rt->u.dst);
1401
1402         /* Duplicate redirect: silently ignore. */
1403         if (neigh == rt->u.dst.neighbour)
1404                 goto out;
1405
1406         nrt = ip6_rt_copy(rt);
1407         if (nrt == NULL)
1408                 goto out;
1409
1410         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1411         if (on_link)
1412                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1413
1414         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1415         nrt->rt6i_dst.plen = 128;
1416         nrt->u.dst.flags |= DST_HOST;
1417
1418         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1419         nrt->rt6i_nexthop = neigh_clone(neigh);
1420         /* Reset pmtu, it may be better */
1421         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1422         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1423
1424         if (ip6_ins_rt(nrt))
1425                 goto out;
1426
1427         netevent.old = &rt->u.dst;
1428         netevent.new = &nrt->u.dst;
1429         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1430
1431         if (rt->rt6i_flags&RTF_CACHE) {
1432                 ip6_del_rt(rt);
1433                 return;
1434         }
1435
1436 out:
1437         dst_release(&rt->u.dst);
1438         return;
1439 }
1440
1441 /*
1442  *      Handle ICMP "packet too big" messages
1443  *      i.e. Path MTU discovery
1444  */
1445
1446 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1447                         struct net_device *dev, u32 pmtu)
1448 {
1449         struct rt6_info *rt, *nrt;
1450         int allfrag = 0;
1451
1452         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1453         if (rt == NULL)
1454                 return;
1455
1456         if (pmtu >= dst_mtu(&rt->u.dst))
1457                 goto out;
1458
1459         if (pmtu < IPV6_MIN_MTU) {
1460                 /*
1461                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1462                  * MTU (1280) and a fragment header should always be included
1463                  * after a node receiving Too Big message reporting PMTU is
1464                  * less than the IPv6 Minimum Link MTU.
1465                  */
1466                 pmtu = IPV6_MIN_MTU;
1467                 allfrag = 1;
1468         }
1469
1470         /* New mtu received -> path was valid.
1471            They are sent only in response to data packets,
1472            so that this nexthop apparently is reachable. --ANK
1473          */
1474         dst_confirm(&rt->u.dst);
1475
1476         /* Host route. If it is static, it would be better
1477            not to override it, but add new one, so that
1478            when cache entry will expire old pmtu
1479            would return automatically.
1480          */
1481         if (rt->rt6i_flags & RTF_CACHE) {
1482                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1483                 if (allfrag)
1484                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1485                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1486                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1487                 goto out;
1488         }
1489
1490         /* Network route.
1491            Two cases are possible:
1492            1. It is connected route. Action: COW
1493            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1494          */
1495         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1496                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1497         else
1498                 nrt = rt6_alloc_clone(rt, daddr);
1499
1500         if (nrt) {
1501                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1502                 if (allfrag)
1503                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1504
1505                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1506                  * happened within 5 mins, the recommended timer is 10 mins.
1507                  * Here this route expiration time is set to ip6_rt_mtu_expires
1508                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1509                  * and detecting PMTU increase will be automatically happened.
1510                  */
1511                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1512                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1513
1514                 ip6_ins_rt(nrt);
1515         }
1516 out:
1517         dst_release(&rt->u.dst);
1518 }
1519
1520 /*
1521  *      Misc support functions
1522  */
1523
1524 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1525 {
1526         struct rt6_info *rt = ip6_dst_alloc();
1527
1528         if (rt) {
1529                 rt->u.dst.input = ort->u.dst.input;
1530                 rt->u.dst.output = ort->u.dst.output;
1531
1532                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1533                 rt->u.dst.dev = ort->u.dst.dev;
1534                 if (rt->u.dst.dev)
1535                         dev_hold(rt->u.dst.dev);
1536                 rt->rt6i_idev = ort->rt6i_idev;
1537                 if (rt->rt6i_idev)
1538                         in6_dev_hold(rt->rt6i_idev);
1539                 rt->u.dst.lastuse = jiffies;
1540                 rt->rt6i_expires = 0;
1541
1542                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1543                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1544                 rt->rt6i_metric = 0;
1545
1546                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1547 #ifdef CONFIG_IPV6_SUBTREES
1548                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1549 #endif
1550                 rt->rt6i_table = ort->rt6i_table;
1551         }
1552         return rt;
1553 }
1554
1555 #ifdef CONFIG_IPV6_ROUTE_INFO
1556 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1557                                            struct in6_addr *gwaddr, int ifindex)
1558 {
1559         struct fib6_node *fn;
1560         struct rt6_info *rt = NULL;
1561         struct fib6_table *table;
1562
1563         table = fib6_get_table(RT6_TABLE_INFO);
1564         if (table == NULL)
1565                 return NULL;
1566
1567         write_lock_bh(&table->tb6_lock);
1568         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1569         if (!fn)
1570                 goto out;
1571
1572         for (rt = fn->leaf; rt; rt = rt->u.next) {
1573                 if (rt->rt6i_dev->ifindex != ifindex)
1574                         continue;
1575                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1576                         continue;
1577                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1578                         continue;
1579                 dst_hold(&rt->u.dst);
1580                 break;
1581         }
1582 out:
1583         write_unlock_bh(&table->tb6_lock);
1584         return rt;
1585 }
1586
1587 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1588                                            struct in6_addr *gwaddr, int ifindex,
1589                                            unsigned pref)
1590 {
1591         struct fib6_config cfg = {
1592                 .fc_table       = RT6_TABLE_INFO,
1593                 .fc_metric      = 1024,
1594                 .fc_ifindex     = ifindex,
1595                 .fc_dst_len     = prefixlen,
1596                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1597                                   RTF_UP | RTF_PREF(pref),
1598         };
1599
1600         ipv6_addr_copy(&cfg.fc_dst, prefix);
1601         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1602
1603         /* We should treat it as a default route if prefix length is 0. */
1604         if (!prefixlen)
1605                 cfg.fc_flags |= RTF_DEFAULT;
1606
1607         ip6_route_add(&cfg);
1608
1609         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1610 }
1611 #endif
1612
1613 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1614 {       
1615         struct rt6_info *rt;
1616         struct fib6_table *table;
1617
1618         table = fib6_get_table(RT6_TABLE_DFLT);
1619         if (table == NULL)
1620                 return NULL;
1621
1622         write_lock_bh(&table->tb6_lock);
1623         for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1624                 if (dev == rt->rt6i_dev &&
1625                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1626                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1627                         break;
1628         }
1629         if (rt)
1630                 dst_hold(&rt->u.dst);
1631         write_unlock_bh(&table->tb6_lock);
1632         return rt;
1633 }
1634
1635 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1636                                      struct net_device *dev,
1637                                      unsigned int pref)
1638 {
1639         struct fib6_config cfg = {
1640                 .fc_table       = RT6_TABLE_DFLT,
1641                 .fc_metric      = 1024,
1642                 .fc_ifindex     = dev->ifindex,
1643                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1644                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1645         };
1646
1647         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1648
1649         ip6_route_add(&cfg);
1650
1651         return rt6_get_dflt_router(gwaddr, dev);
1652 }
1653
1654 void rt6_purge_dflt_routers(void)
1655 {
1656         struct rt6_info *rt;
1657         struct fib6_table *table;
1658
1659         /* NOTE: Keep consistent with rt6_get_dflt_router */
1660         table = fib6_get_table(RT6_TABLE_DFLT);
1661         if (table == NULL)
1662                 return;
1663
1664 restart:
1665         read_lock_bh(&table->tb6_lock);
1666         for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1667                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1668                         dst_hold(&rt->u.dst);
1669                         read_unlock_bh(&table->tb6_lock);
1670                         ip6_del_rt(rt);
1671                         goto restart;
1672                 }
1673         }
1674         read_unlock_bh(&table->tb6_lock);
1675 }
1676
1677 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1678                                  struct fib6_config *cfg)
1679 {
1680         memset(cfg, 0, sizeof(*cfg));
1681
1682         cfg->fc_table = RT6_TABLE_MAIN;
1683         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1684         cfg->fc_metric = rtmsg->rtmsg_metric;
1685         cfg->fc_expires = rtmsg->rtmsg_info;
1686         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1687         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1688         cfg->fc_flags = rtmsg->rtmsg_flags;
1689
1690         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1691         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1692         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1693 }
1694
1695 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1696 {
1697         struct fib6_config cfg;
1698         struct in6_rtmsg rtmsg;
1699         int err;
1700
1701         switch(cmd) {
1702         case SIOCADDRT:         /* Add a route */
1703         case SIOCDELRT:         /* Delete a route */
1704                 if (!capable(CAP_NET_ADMIN))
1705                         return -EPERM;
1706                 err = copy_from_user(&rtmsg, arg,
1707                                      sizeof(struct in6_rtmsg));
1708                 if (err)
1709                         return -EFAULT;
1710
1711                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1712
1713                 rtnl_lock();
1714                 switch (cmd) {
1715                 case SIOCADDRT:
1716                         err = ip6_route_add(&cfg);
1717                         break;
1718                 case SIOCDELRT:
1719                         err = ip6_route_del(&cfg);
1720                         break;
1721                 default:
1722                         err = -EINVAL;
1723                 }
1724                 rtnl_unlock();
1725
1726                 return err;
1727         };
1728
1729         return -EINVAL;
1730 }
1731
1732 /*
1733  *      Drop the packet on the floor
1734  */
1735
1736 static int ip6_pkt_discard(struct sk_buff *skb)
1737 {
1738         int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1739         if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1740                 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1741
1742         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1743         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1744         kfree_skb(skb);
1745         return 0;
1746 }
1747
1748 static int ip6_pkt_discard_out(struct sk_buff *skb)
1749 {
1750         skb->dev = skb->dst->dev;
1751         return ip6_pkt_discard(skb);
1752 }
1753
1754 /*
1755  *      Allocate a dst for local (unicast / anycast) address.
1756  */
1757
1758 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1759                                     const struct in6_addr *addr,
1760                                     int anycast)
1761 {
1762         struct rt6_info *rt = ip6_dst_alloc();
1763
1764         if (rt == NULL)
1765                 return ERR_PTR(-ENOMEM);
1766
1767         dev_hold(&loopback_dev);
1768         in6_dev_hold(idev);
1769
1770         rt->u.dst.flags = DST_HOST;
1771         rt->u.dst.input = ip6_input;
1772         rt->u.dst.output = ip6_output;
1773         rt->rt6i_dev = &loopback_dev;
1774         rt->rt6i_idev = idev;
1775         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1776         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1777         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1778         rt->u.dst.obsolete = -1;
1779
1780         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1781         if (anycast)
1782                 rt->rt6i_flags |= RTF_ANYCAST;
1783         else
1784                 rt->rt6i_flags |= RTF_LOCAL;
1785         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1786         if (rt->rt6i_nexthop == NULL) {
1787                 dst_free((struct dst_entry *) rt);
1788                 return ERR_PTR(-ENOMEM);
1789         }
1790
1791         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1792         rt->rt6i_dst.plen = 128;
1793         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1794
1795         atomic_set(&rt->u.dst.__refcnt, 1);
1796
1797         return rt;
1798 }
1799
1800 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1801 {
1802         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1803             rt != &ip6_null_entry) {
1804                 RT6_TRACE("deleted by ifdown %p\n", rt);
1805                 return -1;
1806         }
1807         return 0;
1808 }
1809
1810 void rt6_ifdown(struct net_device *dev)
1811 {
1812         fib6_clean_all(fib6_ifdown, 0, dev);
1813 }
1814
1815 struct rt6_mtu_change_arg
1816 {
1817         struct net_device *dev;
1818         unsigned mtu;
1819 };
1820
1821 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1822 {
1823         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1824         struct inet6_dev *idev;
1825
1826         /* In IPv6 pmtu discovery is not optional,
1827            so that RTAX_MTU lock cannot disable it.
1828            We still use this lock to block changes
1829            caused by addrconf/ndisc.
1830         */
1831
1832         idev = __in6_dev_get(arg->dev);
1833         if (idev == NULL)
1834                 return 0;
1835
1836         /* For administrative MTU increase, there is no way to discover
1837            IPv6 PMTU increase, so PMTU increase should be updated here.
1838            Since RFC 1981 doesn't include administrative MTU increase
1839            update PMTU increase is a MUST. (i.e. jumbo frame)
1840          */
1841         /*
1842            If new MTU is less than route PMTU, this new MTU will be the
1843            lowest MTU in the path, update the route PMTU to reflect PMTU
1844            decreases; if new MTU is greater than route PMTU, and the
1845            old MTU is the lowest MTU in the path, update the route PMTU
1846            to reflect the increase. In this case if the other nodes' MTU
1847            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1848            PMTU discouvery.
1849          */
1850         if (rt->rt6i_dev == arg->dev &&
1851             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1852             (dst_mtu(&rt->u.dst) > arg->mtu ||
1853              (dst_mtu(&rt->u.dst) < arg->mtu &&
1854               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1855                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1856         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1857         return 0;
1858 }
1859
1860 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1861 {
1862         struct rt6_mtu_change_arg arg = {
1863                 .dev = dev,
1864                 .mtu = mtu,
1865         };
1866
1867         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1868 }
1869
1870 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1871         [RTA_GATEWAY]           = { .minlen = sizeof(struct in6_addr) },
1872         [RTA_OIF]               = { .type = NLA_U32 },
1873         [RTA_IIF]               = { .type = NLA_U32 },
1874         [RTA_PRIORITY]          = { .type = NLA_U32 },
1875         [RTA_METRICS]           = { .type = NLA_NESTED },
1876 };
1877
1878 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1879                               struct fib6_config *cfg)
1880 {
1881         struct rtmsg *rtm;
1882         struct nlattr *tb[RTA_MAX+1];
1883         int err;
1884
1885         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1886         if (err < 0)
1887                 goto errout;
1888
1889         err = -EINVAL;
1890         rtm = nlmsg_data(nlh);
1891         memset(cfg, 0, sizeof(*cfg));
1892
1893         cfg->fc_table = rtm->rtm_table;
1894         cfg->fc_dst_len = rtm->rtm_dst_len;
1895         cfg->fc_src_len = rtm->rtm_src_len;
1896         cfg->fc_flags = RTF_UP;
1897         cfg->fc_protocol = rtm->rtm_protocol;
1898
1899         if (rtm->rtm_type == RTN_UNREACHABLE)
1900                 cfg->fc_flags |= RTF_REJECT;
1901
1902         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1903         cfg->fc_nlinfo.nlh = nlh;
1904
1905         if (tb[RTA_GATEWAY]) {
1906                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1907                 cfg->fc_flags |= RTF_GATEWAY;
1908         }
1909
1910         if (tb[RTA_DST]) {
1911                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1912
1913                 if (nla_len(tb[RTA_DST]) < plen)
1914                         goto errout;
1915
1916                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1917         }
1918
1919         if (tb[RTA_SRC]) {
1920                 int plen = (rtm->rtm_src_len + 7) >> 3;
1921
1922                 if (nla_len(tb[RTA_SRC]) < plen)
1923                         goto errout;
1924
1925                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1926         }
1927
1928         if (tb[RTA_OIF])
1929                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1930
1931         if (tb[RTA_PRIORITY])
1932                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1933
1934         if (tb[RTA_METRICS]) {
1935                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1936                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1937         }
1938
1939         if (tb[RTA_TABLE])
1940                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1941
1942         err = 0;
1943 errout:
1944         return err;
1945 }
1946
1947 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1948 {
1949         struct fib6_config cfg;
1950         int err;
1951
1952         err = rtm_to_fib6_config(skb, nlh, &cfg);
1953         if (err < 0)
1954                 return err;
1955
1956         return ip6_route_del(&cfg);
1957 }
1958
1959 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1960 {
1961         struct fib6_config cfg;
1962         int err;
1963
1964         err = rtm_to_fib6_config(skb, nlh, &cfg);
1965         if (err < 0)
1966                 return err;
1967
1968         return ip6_route_add(&cfg);
1969 }
1970
1971 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1972                          struct in6_addr *dst, struct in6_addr *src,
1973                          int iif, int type, u32 pid, u32 seq,
1974                          int prefix, unsigned int flags)
1975 {
1976         struct rtmsg *rtm;
1977         struct nlmsghdr *nlh;
1978         struct rta_cacheinfo ci;
1979         u32 table;
1980
1981         if (prefix) {   /* user wants prefix routes only */
1982                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1983                         /* success since this is not a prefix route */
1984                         return 1;
1985                 }
1986         }
1987
1988         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1989         if (nlh == NULL)
1990                 return -ENOBUFS;
1991
1992         rtm = nlmsg_data(nlh);
1993         rtm->rtm_family = AF_INET6;
1994         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1995         rtm->rtm_src_len = rt->rt6i_src.plen;
1996         rtm->rtm_tos = 0;
1997         if (rt->rt6i_table)
1998                 table = rt->rt6i_table->tb6_id;
1999         else
2000                 table = RT6_TABLE_UNSPEC;
2001         rtm->rtm_table = table;
2002         NLA_PUT_U32(skb, RTA_TABLE, table);
2003         if (rt->rt6i_flags&RTF_REJECT)
2004                 rtm->rtm_type = RTN_UNREACHABLE;
2005         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2006                 rtm->rtm_type = RTN_LOCAL;
2007         else
2008                 rtm->rtm_type = RTN_UNICAST;
2009         rtm->rtm_flags = 0;
2010         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2011         rtm->rtm_protocol = rt->rt6i_protocol;
2012         if (rt->rt6i_flags&RTF_DYNAMIC)
2013                 rtm->rtm_protocol = RTPROT_REDIRECT;
2014         else if (rt->rt6i_flags & RTF_ADDRCONF)
2015                 rtm->rtm_protocol = RTPROT_KERNEL;
2016         else if (rt->rt6i_flags&RTF_DEFAULT)
2017                 rtm->rtm_protocol = RTPROT_RA;
2018
2019         if (rt->rt6i_flags&RTF_CACHE)
2020                 rtm->rtm_flags |= RTM_F_CLONED;
2021
2022         if (dst) {
2023                 NLA_PUT(skb, RTA_DST, 16, dst);
2024                 rtm->rtm_dst_len = 128;
2025         } else if (rtm->rtm_dst_len)
2026                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2027 #ifdef CONFIG_IPV6_SUBTREES
2028         if (src) {
2029                 NLA_PUT(skb, RTA_SRC, 16, src);
2030                 rtm->rtm_src_len = 128;
2031         } else if (rtm->rtm_src_len)
2032                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2033 #endif
2034         if (iif)
2035                 NLA_PUT_U32(skb, RTA_IIF, iif);
2036         else if (dst) {
2037                 struct in6_addr saddr_buf;
2038                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2039                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2040         }
2041
2042         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2043                 goto nla_put_failure;
2044
2045         if (rt->u.dst.neighbour)
2046                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2047
2048         if (rt->u.dst.dev)
2049                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2050
2051         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2052         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2053         if (rt->rt6i_expires)
2054                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2055         else
2056                 ci.rta_expires = 0;
2057         ci.rta_used = rt->u.dst.__use;
2058         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2059         ci.rta_error = rt->u.dst.error;
2060         ci.rta_id = 0;
2061         ci.rta_ts = 0;
2062         ci.rta_tsage = 0;
2063         NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2064
2065         return nlmsg_end(skb, nlh);
2066
2067 nla_put_failure:
2068         return nlmsg_cancel(skb, nlh);
2069 }
2070
2071 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2072 {
2073         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2074         int prefix;
2075
2076         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2077                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2078                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2079         } else
2080                 prefix = 0;
2081
2082         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2083                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2084                      prefix, NLM_F_MULTI);
2085 }
2086
2087 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2088 {
2089         struct nlattr *tb[RTA_MAX+1];
2090         struct rt6_info *rt;
2091         struct sk_buff *skb;
2092         struct rtmsg *rtm;
2093         struct flowi fl;
2094         int err, iif = 0;
2095
2096         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2097         if (err < 0)
2098                 goto errout;
2099
2100         err = -EINVAL;
2101         memset(&fl, 0, sizeof(fl));
2102
2103         if (tb[RTA_SRC]) {
2104                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2105                         goto errout;
2106
2107                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2108         }
2109
2110         if (tb[RTA_DST]) {
2111                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2112                         goto errout;
2113
2114                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2115         }
2116
2117         if (tb[RTA_IIF])
2118                 iif = nla_get_u32(tb[RTA_IIF]);
2119
2120         if (tb[RTA_OIF])
2121                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2122
2123         if (iif) {
2124                 struct net_device *dev;
2125                 dev = __dev_get_by_index(iif);
2126                 if (!dev) {
2127                         err = -ENODEV;
2128                         goto errout;
2129                 }
2130         }
2131
2132         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2133         if (skb == NULL) {
2134                 err = -ENOBUFS;
2135                 goto errout;
2136         }
2137
2138         /* Reserve room for dummy headers, this skb can pass
2139            through good chunk of routing engine.
2140          */
2141         skb->mac.raw = skb->data;
2142         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2143
2144         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2145         skb->dst = &rt->u.dst;
2146
2147         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2148                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2149                             nlh->nlmsg_seq, 0, 0);
2150         if (err < 0) {
2151                 kfree_skb(skb);
2152                 goto errout;
2153         }
2154
2155         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2156 errout:
2157         return err;
2158 }
2159
2160 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2161 {
2162         struct sk_buff *skb;
2163         u32 pid = 0, seq = 0;
2164         struct nlmsghdr *nlh = NULL;
2165         int payload = sizeof(struct rtmsg) + 256;
2166         int err = -ENOBUFS;
2167
2168         if (info) {
2169                 pid = info->pid;
2170                 nlh = info->nlh;
2171                 if (nlh)
2172                         seq = nlh->nlmsg_seq;
2173         }
2174
2175         skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2176         if (skb == NULL)
2177                 goto errout;
2178
2179         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2180         if (err < 0) {
2181                 kfree_skb(skb);
2182                 goto errout;
2183         }
2184
2185         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2186 errout:
2187         if (err < 0)
2188                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2189 }
2190
2191 /*
2192  *      /proc
2193  */
2194
2195 #ifdef CONFIG_PROC_FS
2196
2197 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2198
2199 struct rt6_proc_arg
2200 {
2201         char *buffer;
2202         int offset;
2203         int length;
2204         int skip;
2205         int len;
2206 };
2207
2208 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2209 {
2210         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2211         int i;
2212
2213         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2214                 arg->skip++;
2215                 return 0;
2216         }
2217
2218         if (arg->len >= arg->length)
2219                 return 0;
2220
2221         for (i=0; i<16; i++) {
2222                 sprintf(arg->buffer + arg->len, "%02x",
2223                         rt->rt6i_dst.addr.s6_addr[i]);
2224                 arg->len += 2;
2225         }
2226         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2227                             rt->rt6i_dst.plen);
2228
2229 #ifdef CONFIG_IPV6_SUBTREES
2230         for (i=0; i<16; i++) {
2231                 sprintf(arg->buffer + arg->len, "%02x",
2232                         rt->rt6i_src.addr.s6_addr[i]);
2233                 arg->len += 2;
2234         }
2235         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2236                             rt->rt6i_src.plen);
2237 #else
2238         sprintf(arg->buffer + arg->len,
2239                 "00000000000000000000000000000000 00 ");
2240         arg->len += 36;
2241 #endif
2242
2243         if (rt->rt6i_nexthop) {
2244                 for (i=0; i<16; i++) {
2245                         sprintf(arg->buffer + arg->len, "%02x",
2246                                 rt->rt6i_nexthop->primary_key[i]);
2247                         arg->len += 2;
2248                 }
2249         } else {
2250                 sprintf(arg->buffer + arg->len,
2251                         "00000000000000000000000000000000");
2252                 arg->len += 32;
2253         }
2254         arg->len += sprintf(arg->buffer + arg->len,
2255                             " %08x %08x %08x %08x %8s\n",
2256                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2257                             rt->u.dst.__use, rt->rt6i_flags, 
2258                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2259         return 0;
2260 }
2261
2262 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2263 {
2264         struct rt6_proc_arg arg = {
2265                 .buffer = buffer,
2266                 .offset = offset,
2267                 .length = length,
2268         };
2269
2270         fib6_clean_all(rt6_info_route, 0, &arg);
2271
2272         *start = buffer;
2273         if (offset)
2274                 *start += offset % RT6_INFO_LEN;
2275
2276         arg.len -= offset % RT6_INFO_LEN;
2277
2278         if (arg.len > length)
2279                 arg.len = length;
2280         if (arg.len < 0)
2281                 arg.len = 0;
2282
2283         return arg.len;
2284 }
2285
2286 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2287 {
2288         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2289                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2290                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2291                       rt6_stats.fib_rt_cache,
2292                       atomic_read(&ip6_dst_ops.entries),
2293                       rt6_stats.fib_discarded_routes);
2294
2295         return 0;
2296 }
2297
2298 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2299 {
2300         return single_open(file, rt6_stats_seq_show, NULL);
2301 }
2302
2303 static struct file_operations rt6_stats_seq_fops = {
2304         .owner   = THIS_MODULE,
2305         .open    = rt6_stats_seq_open,
2306         .read    = seq_read,
2307         .llseek  = seq_lseek,
2308         .release = single_release,
2309 };
2310 #endif  /* CONFIG_PROC_FS */
2311
2312 #ifdef CONFIG_SYSCTL
2313
2314 static int flush_delay;
2315
2316 static
2317 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2318                               void __user *buffer, size_t *lenp, loff_t *ppos)
2319 {
2320         if (write) {
2321                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2322                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2323                 return 0;
2324         } else
2325                 return -EINVAL;
2326 }
2327
2328 ctl_table ipv6_route_table[] = {
2329         {
2330                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2331                 .procname       =       "flush",
2332                 .data           =       &flush_delay,
2333                 .maxlen         =       sizeof(int),
2334                 .mode           =       0200,
2335                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2336         },
2337         {
2338                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2339                 .procname       =       "gc_thresh",
2340                 .data           =       &ip6_dst_ops.gc_thresh,
2341                 .maxlen         =       sizeof(int),
2342                 .mode           =       0644,
2343                 .proc_handler   =       &proc_dointvec,
2344         },
2345         {
2346                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2347                 .procname       =       "max_size",
2348                 .data           =       &ip6_rt_max_size,
2349                 .maxlen         =       sizeof(int),
2350                 .mode           =       0644,
2351                 .proc_handler   =       &proc_dointvec,
2352         },
2353         {
2354                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2355                 .procname       =       "gc_min_interval",
2356                 .data           =       &ip6_rt_gc_min_interval,
2357                 .maxlen         =       sizeof(int),
2358                 .mode           =       0644,
2359                 .proc_handler   =       &proc_dointvec_jiffies,
2360                 .strategy       =       &sysctl_jiffies,
2361         },
2362         {
2363                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2364                 .procname       =       "gc_timeout",
2365                 .data           =       &ip6_rt_gc_timeout,
2366                 .maxlen         =       sizeof(int),
2367                 .mode           =       0644,
2368                 .proc_handler   =       &proc_dointvec_jiffies,
2369                 .strategy       =       &sysctl_jiffies,
2370         },
2371         {
2372                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2373                 .procname       =       "gc_interval",
2374                 .data           =       &ip6_rt_gc_interval,
2375                 .maxlen         =       sizeof(int),
2376                 .mode           =       0644,
2377                 .proc_handler   =       &proc_dointvec_jiffies,
2378                 .strategy       =       &sysctl_jiffies,
2379         },
2380         {
2381                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2382                 .procname       =       "gc_elasticity",
2383                 .data           =       &ip6_rt_gc_elasticity,
2384                 .maxlen         =       sizeof(int),
2385                 .mode           =       0644,
2386                 .proc_handler   =       &proc_dointvec_jiffies,
2387                 .strategy       =       &sysctl_jiffies,
2388         },
2389         {
2390                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2391                 .procname       =       "mtu_expires",
2392                 .data           =       &ip6_rt_mtu_expires,
2393                 .maxlen         =       sizeof(int),
2394                 .mode           =       0644,
2395                 .proc_handler   =       &proc_dointvec_jiffies,
2396                 .strategy       =       &sysctl_jiffies,
2397         },
2398         {
2399                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2400                 .procname       =       "min_adv_mss",
2401                 .data           =       &ip6_rt_min_advmss,
2402                 .maxlen         =       sizeof(int),
2403                 .mode           =       0644,
2404                 .proc_handler   =       &proc_dointvec_jiffies,
2405                 .strategy       =       &sysctl_jiffies,
2406         },
2407         {
2408                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2409                 .procname       =       "gc_min_interval_ms",
2410                 .data           =       &ip6_rt_gc_min_interval,
2411                 .maxlen         =       sizeof(int),
2412                 .mode           =       0644,
2413                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2414                 .strategy       =       &sysctl_ms_jiffies,
2415         },
2416         { .ctl_name = 0 }
2417 };
2418
2419 #endif
2420
2421 void __init ip6_route_init(void)
2422 {
2423         struct proc_dir_entry *p;
2424
2425         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2426                                                      sizeof(struct rt6_info),
2427                                                      0, SLAB_HWCACHE_ALIGN,
2428                                                      NULL, NULL);
2429         if (!ip6_dst_ops.kmem_cachep)
2430                 panic("cannot create ip6_dst_cache");
2431
2432         fib6_init();
2433 #ifdef  CONFIG_PROC_FS
2434         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2435         if (p)
2436                 p->owner = THIS_MODULE;
2437
2438         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2439 #endif
2440 #ifdef CONFIG_XFRM
2441         xfrm6_init();
2442 #endif
2443 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2444         fib6_rules_init();
2445 #endif
2446 }
2447
2448 void ip6_route_cleanup(void)
2449 {
2450 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2451         fib6_rules_cleanup();
2452 #endif
2453 #ifdef CONFIG_PROC_FS
2454         proc_net_remove("ipv6_route");
2455         proc_net_remove("rt6_stats");
2456 #endif
2457 #ifdef CONFIG_XFRM
2458         xfrm6_fini();
2459 #endif
2460         rt6_ifdown(NULL);
2461         fib6_gc_cleanup();
2462         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2463 }