]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/ip6mr.c
ipv6: ip6mr: remove net pointer from struct mfc6_cache
[net-next-2.6.git] / net / ipv6 / ip6mr.c
1 /*
2  *      Linux IPv6 multicast routing support for BSD pim6sd
3  *      Based on net/ipv4/ipmr.c.
4  *
5  *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *              LSIIT Laboratory, Strasbourg, France
7  *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *              6WIND, Paris, France
9  *      Copyright (C)2007,2008 USAGI/WIDE Project
10  *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *      This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  *
17  */
18
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45
46 #include <net/ipv6.h>
47 #include <net/ip6_route.h>
48 #include <linux/mroute6.h>
49 #include <linux/pim.h>
50 #include <net/addrconf.h>
51 #include <linux/netfilter_ipv6.h>
52 #include <net/ip6_checksum.h>
53
54 /* Big lock, protecting vif table, mrt cache and mroute socket state.
55    Note that the changes are semaphored via rtnl_lock.
56  */
57
58 static DEFINE_RWLOCK(mrt_lock);
59
60 /*
61  *      Multicast router control variables
62  */
63
64 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
65
66 /* Special spinlock for queue of unresolved entries */
67 static DEFINE_SPINLOCK(mfc_unres_lock);
68
69 /* We return to original Alan's scheme. Hash table of resolved
70    entries is changed only in process context and protected
71    with weak lock mrt_lock. Queue of unresolved entries is protected
72    with strong spinlock mfc_unres_lock.
73
74    In this case data path is free of exclusive locks at all.
75  */
76
77 static struct kmem_cache *mrt_cachep __read_mostly;
78
79 static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
80                           struct mfc6_cache *cache);
81 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
82                               mifi_t mifi, int assert);
83 static int ip6mr_fill_mroute(struct net *net, struct sk_buff *skb,
84                              struct mfc6_cache *c, struct rtmsg *rtm);
85 static void mroute_clean_tables(struct net *net);
86
87
88 #ifdef CONFIG_PROC_FS
89
90 struct ipmr_mfc_iter {
91         struct seq_net_private p;
92         struct mfc6_cache **cache;
93         int ct;
94 };
95
96
97 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
98                                            struct ipmr_mfc_iter *it, loff_t pos)
99 {
100         struct mfc6_cache *mfc;
101
102         it->cache = net->ipv6.mfc6_cache_array;
103         read_lock(&mrt_lock);
104         for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
105                 for (mfc = net->ipv6.mfc6_cache_array[it->ct];
106                      mfc; mfc = mfc->next)
107                         if (pos-- == 0)
108                                 return mfc;
109         read_unlock(&mrt_lock);
110
111         it->cache = &net->ipv6.mfc6_unres_queue;
112         spin_lock_bh(&mfc_unres_lock);
113         for (mfc = net->ipv6.mfc6_unres_queue; mfc; mfc = mfc->next)
114                 if (pos-- == 0)
115                         return mfc;
116         spin_unlock_bh(&mfc_unres_lock);
117
118         it->cache = NULL;
119         return NULL;
120 }
121
122
123
124
125 /*
126  *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
127  */
128
129 struct ipmr_vif_iter {
130         struct seq_net_private p;
131         int ct;
132 };
133
134 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
135                                             struct ipmr_vif_iter *iter,
136                                             loff_t pos)
137 {
138         for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
139                 if (!MIF_EXISTS(net, iter->ct))
140                         continue;
141                 if (pos-- == 0)
142                         return &net->ipv6.vif6_table[iter->ct];
143         }
144         return NULL;
145 }
146
147 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
148         __acquires(mrt_lock)
149 {
150         struct net *net = seq_file_net(seq);
151
152         read_lock(&mrt_lock);
153         return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
154                 : SEQ_START_TOKEN;
155 }
156
157 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
158 {
159         struct ipmr_vif_iter *iter = seq->private;
160         struct net *net = seq_file_net(seq);
161
162         ++*pos;
163         if (v == SEQ_START_TOKEN)
164                 return ip6mr_vif_seq_idx(net, iter, 0);
165
166         while (++iter->ct < net->ipv6.maxvif) {
167                 if (!MIF_EXISTS(net, iter->ct))
168                         continue;
169                 return &net->ipv6.vif6_table[iter->ct];
170         }
171         return NULL;
172 }
173
174 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
175         __releases(mrt_lock)
176 {
177         read_unlock(&mrt_lock);
178 }
179
180 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
181 {
182         struct net *net = seq_file_net(seq);
183
184         if (v == SEQ_START_TOKEN) {
185                 seq_puts(seq,
186                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
187         } else {
188                 const struct mif_device *vif = v;
189                 const char *name = vif->dev ? vif->dev->name : "none";
190
191                 seq_printf(seq,
192                            "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
193                            vif - net->ipv6.vif6_table,
194                            name, vif->bytes_in, vif->pkt_in,
195                            vif->bytes_out, vif->pkt_out,
196                            vif->flags);
197         }
198         return 0;
199 }
200
201 static const struct seq_operations ip6mr_vif_seq_ops = {
202         .start = ip6mr_vif_seq_start,
203         .next  = ip6mr_vif_seq_next,
204         .stop  = ip6mr_vif_seq_stop,
205         .show  = ip6mr_vif_seq_show,
206 };
207
208 static int ip6mr_vif_open(struct inode *inode, struct file *file)
209 {
210         return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
211                             sizeof(struct ipmr_vif_iter));
212 }
213
214 static const struct file_operations ip6mr_vif_fops = {
215         .owner   = THIS_MODULE,
216         .open    = ip6mr_vif_open,
217         .read    = seq_read,
218         .llseek  = seq_lseek,
219         .release = seq_release_net,
220 };
221
222 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
223 {
224         struct net *net = seq_file_net(seq);
225
226         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
227                 : SEQ_START_TOKEN;
228 }
229
230 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
231 {
232         struct mfc6_cache *mfc = v;
233         struct ipmr_mfc_iter *it = seq->private;
234         struct net *net = seq_file_net(seq);
235
236         ++*pos;
237
238         if (v == SEQ_START_TOKEN)
239                 return ipmr_mfc_seq_idx(net, seq->private, 0);
240
241         if (mfc->next)
242                 return mfc->next;
243
244         if (it->cache == &net->ipv6.mfc6_unres_queue)
245                 goto end_of_list;
246
247         BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
248
249         while (++it->ct < MFC6_LINES) {
250                 mfc = net->ipv6.mfc6_cache_array[it->ct];
251                 if (mfc)
252                         return mfc;
253         }
254
255         /* exhausted cache_array, show unresolved */
256         read_unlock(&mrt_lock);
257         it->cache = &net->ipv6.mfc6_unres_queue;
258         it->ct = 0;
259
260         spin_lock_bh(&mfc_unres_lock);
261         mfc = net->ipv6.mfc6_unres_queue;
262         if (mfc)
263                 return mfc;
264
265  end_of_list:
266         spin_unlock_bh(&mfc_unres_lock);
267         it->cache = NULL;
268
269         return NULL;
270 }
271
272 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
273 {
274         struct ipmr_mfc_iter *it = seq->private;
275         struct net *net = seq_file_net(seq);
276
277         if (it->cache == &net->ipv6.mfc6_unres_queue)
278                 spin_unlock_bh(&mfc_unres_lock);
279         else if (it->cache == net->ipv6.mfc6_cache_array)
280                 read_unlock(&mrt_lock);
281 }
282
283 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
284 {
285         int n;
286         struct net *net = seq_file_net(seq);
287
288         if (v == SEQ_START_TOKEN) {
289                 seq_puts(seq,
290                          "Group                            "
291                          "Origin                           "
292                          "Iif      Pkts  Bytes     Wrong  Oifs\n");
293         } else {
294                 const struct mfc6_cache *mfc = v;
295                 const struct ipmr_mfc_iter *it = seq->private;
296
297                 seq_printf(seq, "%pI6 %pI6 %-3hd",
298                            &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
299                            mfc->mf6c_parent);
300
301                 if (it->cache != &net->ipv6.mfc6_unres_queue) {
302                         seq_printf(seq, " %8lu %8lu %8lu",
303                                    mfc->mfc_un.res.pkt,
304                                    mfc->mfc_un.res.bytes,
305                                    mfc->mfc_un.res.wrong_if);
306                         for (n = mfc->mfc_un.res.minvif;
307                              n < mfc->mfc_un.res.maxvif; n++) {
308                                 if (MIF_EXISTS(net, n) &&
309                                     mfc->mfc_un.res.ttls[n] < 255)
310                                         seq_printf(seq,
311                                                    " %2d:%-3d",
312                                                    n, mfc->mfc_un.res.ttls[n]);
313                         }
314                 } else {
315                         /* unresolved mfc_caches don't contain
316                          * pkt, bytes and wrong_if values
317                          */
318                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
319                 }
320                 seq_putc(seq, '\n');
321         }
322         return 0;
323 }
324
325 static const struct seq_operations ipmr_mfc_seq_ops = {
326         .start = ipmr_mfc_seq_start,
327         .next  = ipmr_mfc_seq_next,
328         .stop  = ipmr_mfc_seq_stop,
329         .show  = ipmr_mfc_seq_show,
330 };
331
332 static int ipmr_mfc_open(struct inode *inode, struct file *file)
333 {
334         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
335                             sizeof(struct ipmr_mfc_iter));
336 }
337
338 static const struct file_operations ip6mr_mfc_fops = {
339         .owner   = THIS_MODULE,
340         .open    = ipmr_mfc_open,
341         .read    = seq_read,
342         .llseek  = seq_lseek,
343         .release = seq_release_net,
344 };
345 #endif
346
347 #ifdef CONFIG_IPV6_PIMSM_V2
348
349 static int pim6_rcv(struct sk_buff *skb)
350 {
351         struct pimreghdr *pim;
352         struct ipv6hdr   *encap;
353         struct net_device  *reg_dev = NULL;
354         struct net *net = dev_net(skb->dev);
355         int reg_vif_num = net->ipv6.mroute_reg_vif_num;
356
357         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
358                 goto drop;
359
360         pim = (struct pimreghdr *)skb_transport_header(skb);
361         if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
362             (pim->flags & PIM_NULL_REGISTER) ||
363             (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
364                              sizeof(*pim), IPPROTO_PIM,
365                              csum_partial((void *)pim, sizeof(*pim), 0)) &&
366              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
367                 goto drop;
368
369         /* check if the inner packet is destined to mcast group */
370         encap = (struct ipv6hdr *)(skb_transport_header(skb) +
371                                    sizeof(*pim));
372
373         if (!ipv6_addr_is_multicast(&encap->daddr) ||
374             encap->payload_len == 0 ||
375             ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
376                 goto drop;
377
378         read_lock(&mrt_lock);
379         if (reg_vif_num >= 0)
380                 reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
381         if (reg_dev)
382                 dev_hold(reg_dev);
383         read_unlock(&mrt_lock);
384
385         if (reg_dev == NULL)
386                 goto drop;
387
388         skb->mac_header = skb->network_header;
389         skb_pull(skb, (u8 *)encap - skb->data);
390         skb_reset_network_header(skb);
391         skb->dev = reg_dev;
392         skb->protocol = htons(ETH_P_IPV6);
393         skb->ip_summed = 0;
394         skb->pkt_type = PACKET_HOST;
395         skb_dst_drop(skb);
396         reg_dev->stats.rx_bytes += skb->len;
397         reg_dev->stats.rx_packets++;
398         nf_reset(skb);
399         netif_rx(skb);
400         dev_put(reg_dev);
401         return 0;
402  drop:
403         kfree_skb(skb);
404         return 0;
405 }
406
407 static const struct inet6_protocol pim6_protocol = {
408         .handler        =       pim6_rcv,
409 };
410
411 /* Service routines creating virtual interfaces: PIMREG */
412
413 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
414                                       struct net_device *dev)
415 {
416         struct net *net = dev_net(dev);
417
418         read_lock(&mrt_lock);
419         dev->stats.tx_bytes += skb->len;
420         dev->stats.tx_packets++;
421         ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
422                            MRT6MSG_WHOLEPKT);
423         read_unlock(&mrt_lock);
424         kfree_skb(skb);
425         return NETDEV_TX_OK;
426 }
427
428 static const struct net_device_ops reg_vif_netdev_ops = {
429         .ndo_start_xmit = reg_vif_xmit,
430 };
431
432 static void reg_vif_setup(struct net_device *dev)
433 {
434         dev->type               = ARPHRD_PIMREG;
435         dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
436         dev->flags              = IFF_NOARP;
437         dev->netdev_ops         = &reg_vif_netdev_ops;
438         dev->destructor         = free_netdev;
439         dev->features           |= NETIF_F_NETNS_LOCAL;
440 }
441
442 static struct net_device *ip6mr_reg_vif(struct net *net)
443 {
444         struct net_device *dev;
445
446         dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
447         if (dev == NULL)
448                 return NULL;
449
450         dev_net_set(dev, net);
451
452         if (register_netdevice(dev)) {
453                 free_netdev(dev);
454                 return NULL;
455         }
456         dev->iflink = 0;
457
458         if (dev_open(dev))
459                 goto failure;
460
461         dev_hold(dev);
462         return dev;
463
464 failure:
465         /* allow the register to be completed before unregistering. */
466         rtnl_unlock();
467         rtnl_lock();
468
469         unregister_netdevice(dev);
470         return NULL;
471 }
472 #endif
473
474 /*
475  *      Delete a VIF entry
476  */
477
478 static int mif6_delete(struct net *net, int vifi, struct list_head *head)
479 {
480         struct mif_device *v;
481         struct net_device *dev;
482         struct inet6_dev *in6_dev;
483         if (vifi < 0 || vifi >= net->ipv6.maxvif)
484                 return -EADDRNOTAVAIL;
485
486         v = &net->ipv6.vif6_table[vifi];
487
488         write_lock_bh(&mrt_lock);
489         dev = v->dev;
490         v->dev = NULL;
491
492         if (!dev) {
493                 write_unlock_bh(&mrt_lock);
494                 return -EADDRNOTAVAIL;
495         }
496
497 #ifdef CONFIG_IPV6_PIMSM_V2
498         if (vifi == net->ipv6.mroute_reg_vif_num)
499                 net->ipv6.mroute_reg_vif_num = -1;
500 #endif
501
502         if (vifi + 1 == net->ipv6.maxvif) {
503                 int tmp;
504                 for (tmp = vifi - 1; tmp >= 0; tmp--) {
505                         if (MIF_EXISTS(net, tmp))
506                                 break;
507                 }
508                 net->ipv6.maxvif = tmp + 1;
509         }
510
511         write_unlock_bh(&mrt_lock);
512
513         dev_set_allmulti(dev, -1);
514
515         in6_dev = __in6_dev_get(dev);
516         if (in6_dev)
517                 in6_dev->cnf.mc_forwarding--;
518
519         if (v->flags & MIFF_REGISTER)
520                 unregister_netdevice_queue(dev, head);
521
522         dev_put(dev);
523         return 0;
524 }
525
526 static inline void ip6mr_cache_free(struct mfc6_cache *c)
527 {
528         kmem_cache_free(mrt_cachep, c);
529 }
530
531 /* Destroy an unresolved cache entry, killing queued skbs
532    and reporting error to netlink readers.
533  */
534
535 static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
536 {
537         struct sk_buff *skb;
538
539         atomic_dec(&net->ipv6.cache_resolve_queue_len);
540
541         while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
542                 if (ipv6_hdr(skb)->version == 0) {
543                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
544                         nlh->nlmsg_type = NLMSG_ERROR;
545                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
546                         skb_trim(skb, nlh->nlmsg_len);
547                         ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
548                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
549                 } else
550                         kfree_skb(skb);
551         }
552
553         ip6mr_cache_free(c);
554 }
555
556
557 /* Timer process for all the unresolved queue. */
558
559 static void ipmr_do_expire_process(struct net *net)
560 {
561         unsigned long now = jiffies;
562         unsigned long expires = 10 * HZ;
563         struct mfc6_cache *c, **cp;
564
565         cp = &net->ipv6.mfc6_unres_queue;
566
567         while ((c = *cp) != NULL) {
568                 if (time_after(c->mfc_un.unres.expires, now)) {
569                         /* not yet... */
570                         unsigned long interval = c->mfc_un.unres.expires - now;
571                         if (interval < expires)
572                                 expires = interval;
573                         cp = &c->next;
574                         continue;
575                 }
576
577                 *cp = c->next;
578                 ip6mr_destroy_unres(net, c);
579         }
580
581         if (net->ipv6.mfc6_unres_queue != NULL)
582                 mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + expires);
583 }
584
585 static void ipmr_expire_process(unsigned long arg)
586 {
587         struct net *net = (struct net *)arg;
588
589         if (!spin_trylock(&mfc_unres_lock)) {
590                 mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + 1);
591                 return;
592         }
593
594         if (net->ipv6.mfc6_unres_queue != NULL)
595                 ipmr_do_expire_process(net);
596
597         spin_unlock(&mfc_unres_lock);
598 }
599
600 /* Fill oifs list. It is called under write locked mrt_lock. */
601
602 static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
603                                     unsigned char *ttls)
604 {
605         int vifi;
606
607         cache->mfc_un.res.minvif = MAXMIFS;
608         cache->mfc_un.res.maxvif = 0;
609         memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
610
611         for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
612                 if (MIF_EXISTS(net, vifi) &&
613                     ttls[vifi] && ttls[vifi] < 255) {
614                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
615                         if (cache->mfc_un.res.minvif > vifi)
616                                 cache->mfc_un.res.minvif = vifi;
617                         if (cache->mfc_un.res.maxvif <= vifi)
618                                 cache->mfc_un.res.maxvif = vifi + 1;
619                 }
620         }
621 }
622
623 static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
624 {
625         int vifi = vifc->mif6c_mifi;
626         struct mif_device *v = &net->ipv6.vif6_table[vifi];
627         struct net_device *dev;
628         struct inet6_dev *in6_dev;
629         int err;
630
631         /* Is vif busy ? */
632         if (MIF_EXISTS(net, vifi))
633                 return -EADDRINUSE;
634
635         switch (vifc->mif6c_flags) {
636 #ifdef CONFIG_IPV6_PIMSM_V2
637         case MIFF_REGISTER:
638                 /*
639                  * Special Purpose VIF in PIM
640                  * All the packets will be sent to the daemon
641                  */
642                 if (net->ipv6.mroute_reg_vif_num >= 0)
643                         return -EADDRINUSE;
644                 dev = ip6mr_reg_vif(net);
645                 if (!dev)
646                         return -ENOBUFS;
647                 err = dev_set_allmulti(dev, 1);
648                 if (err) {
649                         unregister_netdevice(dev);
650                         dev_put(dev);
651                         return err;
652                 }
653                 break;
654 #endif
655         case 0:
656                 dev = dev_get_by_index(net, vifc->mif6c_pifi);
657                 if (!dev)
658                         return -EADDRNOTAVAIL;
659                 err = dev_set_allmulti(dev, 1);
660                 if (err) {
661                         dev_put(dev);
662                         return err;
663                 }
664                 break;
665         default:
666                 return -EINVAL;
667         }
668
669         in6_dev = __in6_dev_get(dev);
670         if (in6_dev)
671                 in6_dev->cnf.mc_forwarding++;
672
673         /*
674          *      Fill in the VIF structures
675          */
676         v->rate_limit = vifc->vifc_rate_limit;
677         v->flags = vifc->mif6c_flags;
678         if (!mrtsock)
679                 v->flags |= VIFF_STATIC;
680         v->threshold = vifc->vifc_threshold;
681         v->bytes_in = 0;
682         v->bytes_out = 0;
683         v->pkt_in = 0;
684         v->pkt_out = 0;
685         v->link = dev->ifindex;
686         if (v->flags & MIFF_REGISTER)
687                 v->link = dev->iflink;
688
689         /* And finish update writing critical data */
690         write_lock_bh(&mrt_lock);
691         v->dev = dev;
692 #ifdef CONFIG_IPV6_PIMSM_V2
693         if (v->flags & MIFF_REGISTER)
694                 net->ipv6.mroute_reg_vif_num = vifi;
695 #endif
696         if (vifi + 1 > net->ipv6.maxvif)
697                 net->ipv6.maxvif = vifi + 1;
698         write_unlock_bh(&mrt_lock);
699         return 0;
700 }
701
702 static struct mfc6_cache *ip6mr_cache_find(struct net *net,
703                                            struct in6_addr *origin,
704                                            struct in6_addr *mcastgrp)
705 {
706         int line = MFC6_HASH(mcastgrp, origin);
707         struct mfc6_cache *c;
708
709         for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
710                 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
711                     ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
712                         break;
713         }
714         return c;
715 }
716
717 /*
718  *      Allocate a multicast cache entry
719  */
720 static struct mfc6_cache *ip6mr_cache_alloc(void)
721 {
722         struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
723         if (c == NULL)
724                 return NULL;
725         c->mfc_un.res.minvif = MAXMIFS;
726         return c;
727 }
728
729 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
730 {
731         struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
732         if (c == NULL)
733                 return NULL;
734         skb_queue_head_init(&c->mfc_un.unres.unresolved);
735         c->mfc_un.unres.expires = jiffies + 10 * HZ;
736         return c;
737 }
738
739 /*
740  *      A cache entry has gone into a resolved state from queued
741  */
742
743 static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
744                                 struct mfc6_cache *c)
745 {
746         struct sk_buff *skb;
747
748         /*
749          *      Play the pending entries through our router
750          */
751
752         while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
753                 if (ipv6_hdr(skb)->version == 0) {
754                         int err;
755                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
756
757                         if (ip6mr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
758                                 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
759                         } else {
760                                 nlh->nlmsg_type = NLMSG_ERROR;
761                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
762                                 skb_trim(skb, nlh->nlmsg_len);
763                                 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
764                         }
765                         err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
766                 } else
767                         ip6_mr_forward(net, skb, c);
768         }
769 }
770
771 /*
772  *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
773  *      expects the following bizarre scheme.
774  *
775  *      Called under mrt_lock.
776  */
777
778 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
779                               int assert)
780 {
781         struct sk_buff *skb;
782         struct mrt6msg *msg;
783         int ret;
784
785 #ifdef CONFIG_IPV6_PIMSM_V2
786         if (assert == MRT6MSG_WHOLEPKT)
787                 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
788                                                 +sizeof(*msg));
789         else
790 #endif
791                 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
792
793         if (!skb)
794                 return -ENOBUFS;
795
796         /* I suppose that internal messages
797          * do not require checksums */
798
799         skb->ip_summed = CHECKSUM_UNNECESSARY;
800
801 #ifdef CONFIG_IPV6_PIMSM_V2
802         if (assert == MRT6MSG_WHOLEPKT) {
803                 /* Ugly, but we have no choice with this interface.
804                    Duplicate old header, fix length etc.
805                    And all this only to mangle msg->im6_msgtype and
806                    to set msg->im6_mbz to "mbz" :-)
807                  */
808                 skb_push(skb, -skb_network_offset(pkt));
809
810                 skb_push(skb, sizeof(*msg));
811                 skb_reset_transport_header(skb);
812                 msg = (struct mrt6msg *)skb_transport_header(skb);
813                 msg->im6_mbz = 0;
814                 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
815                 msg->im6_mif = net->ipv6.mroute_reg_vif_num;
816                 msg->im6_pad = 0;
817                 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
818                 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
819
820                 skb->ip_summed = CHECKSUM_UNNECESSARY;
821         } else
822 #endif
823         {
824         /*
825          *      Copy the IP header
826          */
827
828         skb_put(skb, sizeof(struct ipv6hdr));
829         skb_reset_network_header(skb);
830         skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
831
832         /*
833          *      Add our header
834          */
835         skb_put(skb, sizeof(*msg));
836         skb_reset_transport_header(skb);
837         msg = (struct mrt6msg *)skb_transport_header(skb);
838
839         msg->im6_mbz = 0;
840         msg->im6_msgtype = assert;
841         msg->im6_mif = mifi;
842         msg->im6_pad = 0;
843         ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
844         ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
845
846         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
847         skb->ip_summed = CHECKSUM_UNNECESSARY;
848         }
849
850         if (net->ipv6.mroute6_sk == NULL) {
851                 kfree_skb(skb);
852                 return -EINVAL;
853         }
854
855         /*
856          *      Deliver to user space multicast routing algorithms
857          */
858         ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
859         if (ret < 0) {
860                 if (net_ratelimit())
861                         printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
862                 kfree_skb(skb);
863         }
864
865         return ret;
866 }
867
868 /*
869  *      Queue a packet for resolution. It gets locked cache entry!
870  */
871
872 static int
873 ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
874 {
875         int err;
876         struct mfc6_cache *c;
877
878         spin_lock_bh(&mfc_unres_lock);
879         for (c = net->ipv6.mfc6_unres_queue; c; c = c->next) {
880                 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
881                     ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
882                         break;
883         }
884
885         if (c == NULL) {
886                 /*
887                  *      Create a new entry if allowable
888                  */
889
890                 if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
891                     (c = ip6mr_cache_alloc_unres()) == NULL) {
892                         spin_unlock_bh(&mfc_unres_lock);
893
894                         kfree_skb(skb);
895                         return -ENOBUFS;
896                 }
897
898                 /*
899                  *      Fill in the new cache entry
900                  */
901                 c->mf6c_parent = -1;
902                 c->mf6c_origin = ipv6_hdr(skb)->saddr;
903                 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
904
905                 /*
906                  *      Reflect first query at pim6sd
907                  */
908                 err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
909                 if (err < 0) {
910                         /* If the report failed throw the cache entry
911                            out - Brad Parker
912                          */
913                         spin_unlock_bh(&mfc_unres_lock);
914
915                         ip6mr_cache_free(c);
916                         kfree_skb(skb);
917                         return err;
918                 }
919
920                 atomic_inc(&net->ipv6.cache_resolve_queue_len);
921                 c->next = net->ipv6.mfc6_unres_queue;
922                 net->ipv6.mfc6_unres_queue = c;
923
924                 ipmr_do_expire_process(net);
925         }
926
927         /*
928          *      See if we can append the packet
929          */
930         if (c->mfc_un.unres.unresolved.qlen > 3) {
931                 kfree_skb(skb);
932                 err = -ENOBUFS;
933         } else {
934                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
935                 err = 0;
936         }
937
938         spin_unlock_bh(&mfc_unres_lock);
939         return err;
940 }
941
942 /*
943  *      MFC6 cache manipulation by user space
944  */
945
946 static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
947 {
948         int line;
949         struct mfc6_cache *c, **cp;
950
951         line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
952
953         for (cp = &net->ipv6.mfc6_cache_array[line];
954              (c = *cp) != NULL; cp = &c->next) {
955                 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
956                     ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
957                         write_lock_bh(&mrt_lock);
958                         *cp = c->next;
959                         write_unlock_bh(&mrt_lock);
960
961                         ip6mr_cache_free(c);
962                         return 0;
963                 }
964         }
965         return -ENOENT;
966 }
967
968 static int ip6mr_device_event(struct notifier_block *this,
969                               unsigned long event, void *ptr)
970 {
971         struct net_device *dev = ptr;
972         struct net *net = dev_net(dev);
973         struct mif_device *v;
974         int ct;
975         LIST_HEAD(list);
976
977         if (event != NETDEV_UNREGISTER)
978                 return NOTIFY_DONE;
979
980         v = &net->ipv6.vif6_table[0];
981         for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
982                 if (v->dev == dev)
983                         mif6_delete(net, ct, &list);
984         }
985         unregister_netdevice_many(&list);
986
987         return NOTIFY_DONE;
988 }
989
990 static struct notifier_block ip6_mr_notifier = {
991         .notifier_call = ip6mr_device_event
992 };
993
994 /*
995  *      Setup for IP multicast routing
996  */
997
998 static int __net_init ip6mr_net_init(struct net *net)
999 {
1000         int err = 0;
1001         net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
1002                                        GFP_KERNEL);
1003         if (!net->ipv6.vif6_table) {
1004                 err = -ENOMEM;
1005                 goto fail;
1006         }
1007
1008         /* Forwarding cache */
1009         net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
1010                                              sizeof(struct mfc6_cache *),
1011                                              GFP_KERNEL);
1012         if (!net->ipv6.mfc6_cache_array) {
1013                 err = -ENOMEM;
1014                 goto fail_mfc6_cache;
1015         }
1016
1017         setup_timer(&net->ipv6.ipmr_expire_timer, ipmr_expire_process,
1018                     (unsigned long)net);
1019
1020 #ifdef CONFIG_IPV6_PIMSM_V2
1021         net->ipv6.mroute_reg_vif_num = -1;
1022 #endif
1023
1024 #ifdef CONFIG_PROC_FS
1025         err = -ENOMEM;
1026         if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1027                 goto proc_vif_fail;
1028         if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1029                 goto proc_cache_fail;
1030 #endif
1031         return 0;
1032
1033 #ifdef CONFIG_PROC_FS
1034 proc_cache_fail:
1035         proc_net_remove(net, "ip6_mr_vif");
1036 proc_vif_fail:
1037         kfree(net->ipv6.mfc6_cache_array);
1038 #endif
1039 fail_mfc6_cache:
1040         kfree(net->ipv6.vif6_table);
1041 fail:
1042         return err;
1043 }
1044
1045 static void __net_exit ip6mr_net_exit(struct net *net)
1046 {
1047 #ifdef CONFIG_PROC_FS
1048         proc_net_remove(net, "ip6_mr_cache");
1049         proc_net_remove(net, "ip6_mr_vif");
1050 #endif
1051         del_timer(&net->ipv6.ipmr_expire_timer);
1052         mroute_clean_tables(net);
1053         kfree(net->ipv6.mfc6_cache_array);
1054         kfree(net->ipv6.vif6_table);
1055 }
1056
1057 static struct pernet_operations ip6mr_net_ops = {
1058         .init = ip6mr_net_init,
1059         .exit = ip6mr_net_exit,
1060 };
1061
1062 int __init ip6_mr_init(void)
1063 {
1064         int err;
1065
1066         mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1067                                        sizeof(struct mfc6_cache),
1068                                        0, SLAB_HWCACHE_ALIGN,
1069                                        NULL);
1070         if (!mrt_cachep)
1071                 return -ENOMEM;
1072
1073         err = register_pernet_subsys(&ip6mr_net_ops);
1074         if (err)
1075                 goto reg_pernet_fail;
1076
1077         err = register_netdevice_notifier(&ip6_mr_notifier);
1078         if (err)
1079                 goto reg_notif_fail;
1080 #ifdef CONFIG_IPV6_PIMSM_V2
1081         if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1082                 printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1083                 err = -EAGAIN;
1084                 goto add_proto_fail;
1085         }
1086 #endif
1087         return 0;
1088 #ifdef CONFIG_IPV6_PIMSM_V2
1089 add_proto_fail:
1090         unregister_netdevice_notifier(&ip6_mr_notifier);
1091 #endif
1092 reg_notif_fail:
1093         unregister_pernet_subsys(&ip6mr_net_ops);
1094 reg_pernet_fail:
1095         kmem_cache_destroy(mrt_cachep);
1096         return err;
1097 }
1098
1099 void ip6_mr_cleanup(void)
1100 {
1101         unregister_netdevice_notifier(&ip6_mr_notifier);
1102         unregister_pernet_subsys(&ip6mr_net_ops);
1103         kmem_cache_destroy(mrt_cachep);
1104 }
1105
1106 static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1107 {
1108         int line;
1109         struct mfc6_cache *uc, *c, **cp;
1110         unsigned char ttls[MAXMIFS];
1111         int i;
1112
1113         if (mfc->mf6cc_parent >= MAXMIFS)
1114                 return -ENFILE;
1115
1116         memset(ttls, 255, MAXMIFS);
1117         for (i = 0; i < MAXMIFS; i++) {
1118                 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1119                         ttls[i] = 1;
1120
1121         }
1122
1123         line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1124
1125         for (cp = &net->ipv6.mfc6_cache_array[line];
1126              (c = *cp) != NULL; cp = &c->next) {
1127                 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1128                     ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1129                         break;
1130         }
1131
1132         if (c != NULL) {
1133                 write_lock_bh(&mrt_lock);
1134                 c->mf6c_parent = mfc->mf6cc_parent;
1135                 ip6mr_update_thresholds(net, c, ttls);
1136                 if (!mrtsock)
1137                         c->mfc_flags |= MFC_STATIC;
1138                 write_unlock_bh(&mrt_lock);
1139                 return 0;
1140         }
1141
1142         if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1143                 return -EINVAL;
1144
1145         c = ip6mr_cache_alloc();
1146         if (c == NULL)
1147                 return -ENOMEM;
1148
1149         c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1150         c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1151         c->mf6c_parent = mfc->mf6cc_parent;
1152         ip6mr_update_thresholds(net, c, ttls);
1153         if (!mrtsock)
1154                 c->mfc_flags |= MFC_STATIC;
1155
1156         write_lock_bh(&mrt_lock);
1157         c->next = net->ipv6.mfc6_cache_array[line];
1158         net->ipv6.mfc6_cache_array[line] = c;
1159         write_unlock_bh(&mrt_lock);
1160
1161         /*
1162          *      Check to see if we resolved a queued list. If so we
1163          *      need to send on the frames and tidy up.
1164          */
1165         spin_lock_bh(&mfc_unres_lock);
1166         for (cp = &net->ipv6.mfc6_unres_queue; (uc = *cp) != NULL;
1167              cp = &uc->next) {
1168                 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1169                     ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1170                         *cp = uc->next;
1171                         atomic_dec(&net->ipv6.cache_resolve_queue_len);
1172                         break;
1173                 }
1174         }
1175         if (net->ipv6.mfc6_unres_queue == NULL)
1176                 del_timer(&net->ipv6.ipmr_expire_timer);
1177         spin_unlock_bh(&mfc_unres_lock);
1178
1179         if (uc) {
1180                 ip6mr_cache_resolve(net, uc, c);
1181                 ip6mr_cache_free(uc);
1182         }
1183         return 0;
1184 }
1185
1186 /*
1187  *      Close the multicast socket, and clear the vif tables etc
1188  */
1189
1190 static void mroute_clean_tables(struct net *net)
1191 {
1192         int i;
1193         LIST_HEAD(list);
1194
1195         /*
1196          *      Shut down all active vif entries
1197          */
1198         for (i = 0; i < net->ipv6.maxvif; i++) {
1199                 if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
1200                         mif6_delete(net, i, &list);
1201         }
1202         unregister_netdevice_many(&list);
1203
1204         /*
1205          *      Wipe the cache
1206          */
1207         for (i = 0; i < MFC6_LINES; i++) {
1208                 struct mfc6_cache *c, **cp;
1209
1210                 cp = &net->ipv6.mfc6_cache_array[i];
1211                 while ((c = *cp) != NULL) {
1212                         if (c->mfc_flags & MFC_STATIC) {
1213                                 cp = &c->next;
1214                                 continue;
1215                         }
1216                         write_lock_bh(&mrt_lock);
1217                         *cp = c->next;
1218                         write_unlock_bh(&mrt_lock);
1219
1220                         ip6mr_cache_free(c);
1221                 }
1222         }
1223
1224         if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
1225                 struct mfc6_cache *c, **cp;
1226
1227                 spin_lock_bh(&mfc_unres_lock);
1228                 cp = &net->ipv6.mfc6_unres_queue;
1229                 while ((c = *cp) != NULL) {
1230                         *cp = c->next;
1231                         ip6mr_destroy_unres(net, c);
1232                 }
1233                 spin_unlock_bh(&mfc_unres_lock);
1234         }
1235 }
1236
1237 static int ip6mr_sk_init(struct sock *sk)
1238 {
1239         int err = 0;
1240         struct net *net = sock_net(sk);
1241
1242         rtnl_lock();
1243         write_lock_bh(&mrt_lock);
1244         if (likely(net->ipv6.mroute6_sk == NULL)) {
1245                 net->ipv6.mroute6_sk = sk;
1246                 net->ipv6.devconf_all->mc_forwarding++;
1247         }
1248         else
1249                 err = -EADDRINUSE;
1250         write_unlock_bh(&mrt_lock);
1251
1252         rtnl_unlock();
1253
1254         return err;
1255 }
1256
1257 int ip6mr_sk_done(struct sock *sk)
1258 {
1259         int err = 0;
1260         struct net *net = sock_net(sk);
1261
1262         rtnl_lock();
1263         if (sk == net->ipv6.mroute6_sk) {
1264                 write_lock_bh(&mrt_lock);
1265                 net->ipv6.mroute6_sk = NULL;
1266                 net->ipv6.devconf_all->mc_forwarding--;
1267                 write_unlock_bh(&mrt_lock);
1268
1269                 mroute_clean_tables(net);
1270         } else
1271                 err = -EACCES;
1272         rtnl_unlock();
1273
1274         return err;
1275 }
1276
1277 /*
1278  *      Socket options and virtual interface manipulation. The whole
1279  *      virtual interface system is a complete heap, but unfortunately
1280  *      that's how BSD mrouted happens to think. Maybe one day with a proper
1281  *      MOSPF/PIM router set up we can clean this up.
1282  */
1283
1284 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1285 {
1286         int ret;
1287         struct mif6ctl vif;
1288         struct mf6cctl mfc;
1289         mifi_t mifi;
1290         struct net *net = sock_net(sk);
1291
1292         if (optname != MRT6_INIT) {
1293                 if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1294                         return -EACCES;
1295         }
1296
1297         switch (optname) {
1298         case MRT6_INIT:
1299                 if (sk->sk_type != SOCK_RAW ||
1300                     inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1301                         return -EOPNOTSUPP;
1302                 if (optlen < sizeof(int))
1303                         return -EINVAL;
1304
1305                 return ip6mr_sk_init(sk);
1306
1307         case MRT6_DONE:
1308                 return ip6mr_sk_done(sk);
1309
1310         case MRT6_ADD_MIF:
1311                 if (optlen < sizeof(vif))
1312                         return -EINVAL;
1313                 if (copy_from_user(&vif, optval, sizeof(vif)))
1314                         return -EFAULT;
1315                 if (vif.mif6c_mifi >= MAXMIFS)
1316                         return -ENFILE;
1317                 rtnl_lock();
1318                 ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
1319                 rtnl_unlock();
1320                 return ret;
1321
1322         case MRT6_DEL_MIF:
1323                 if (optlen < sizeof(mifi_t))
1324                         return -EINVAL;
1325                 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1326                         return -EFAULT;
1327                 rtnl_lock();
1328                 ret = mif6_delete(net, mifi, NULL);
1329                 rtnl_unlock();
1330                 return ret;
1331
1332         /*
1333          *      Manipulate the forwarding caches. These live
1334          *      in a sort of kernel/user symbiosis.
1335          */
1336         case MRT6_ADD_MFC:
1337         case MRT6_DEL_MFC:
1338                 if (optlen < sizeof(mfc))
1339                         return -EINVAL;
1340                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1341                         return -EFAULT;
1342                 rtnl_lock();
1343                 if (optname == MRT6_DEL_MFC)
1344                         ret = ip6mr_mfc_delete(net, &mfc);
1345                 else
1346                         ret = ip6mr_mfc_add(net, &mfc,
1347                                             sk == net->ipv6.mroute6_sk);
1348                 rtnl_unlock();
1349                 return ret;
1350
1351         /*
1352          *      Control PIM assert (to activate pim will activate assert)
1353          */
1354         case MRT6_ASSERT:
1355         {
1356                 int v;
1357                 if (get_user(v, (int __user *)optval))
1358                         return -EFAULT;
1359                 net->ipv6.mroute_do_assert = !!v;
1360                 return 0;
1361         }
1362
1363 #ifdef CONFIG_IPV6_PIMSM_V2
1364         case MRT6_PIM:
1365         {
1366                 int v;
1367                 if (get_user(v, (int __user *)optval))
1368                         return -EFAULT;
1369                 v = !!v;
1370                 rtnl_lock();
1371                 ret = 0;
1372                 if (v != net->ipv6.mroute_do_pim) {
1373                         net->ipv6.mroute_do_pim = v;
1374                         net->ipv6.mroute_do_assert = v;
1375                 }
1376                 rtnl_unlock();
1377                 return ret;
1378         }
1379
1380 #endif
1381         /*
1382          *      Spurious command, or MRT6_VERSION which you cannot
1383          *      set.
1384          */
1385         default:
1386                 return -ENOPROTOOPT;
1387         }
1388 }
1389
1390 /*
1391  *      Getsock opt support for the multicast routing system.
1392  */
1393
1394 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1395                           int __user *optlen)
1396 {
1397         int olr;
1398         int val;
1399         struct net *net = sock_net(sk);
1400
1401         switch (optname) {
1402         case MRT6_VERSION:
1403                 val = 0x0305;
1404                 break;
1405 #ifdef CONFIG_IPV6_PIMSM_V2
1406         case MRT6_PIM:
1407                 val = net->ipv6.mroute_do_pim;
1408                 break;
1409 #endif
1410         case MRT6_ASSERT:
1411                 val = net->ipv6.mroute_do_assert;
1412                 break;
1413         default:
1414                 return -ENOPROTOOPT;
1415         }
1416
1417         if (get_user(olr, optlen))
1418                 return -EFAULT;
1419
1420         olr = min_t(int, olr, sizeof(int));
1421         if (olr < 0)
1422                 return -EINVAL;
1423
1424         if (put_user(olr, optlen))
1425                 return -EFAULT;
1426         if (copy_to_user(optval, &val, olr))
1427                 return -EFAULT;
1428         return 0;
1429 }
1430
1431 /*
1432  *      The IP multicast ioctl support routines.
1433  */
1434
1435 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1436 {
1437         struct sioc_sg_req6 sr;
1438         struct sioc_mif_req6 vr;
1439         struct mif_device *vif;
1440         struct mfc6_cache *c;
1441         struct net *net = sock_net(sk);
1442
1443         switch (cmd) {
1444         case SIOCGETMIFCNT_IN6:
1445                 if (copy_from_user(&vr, arg, sizeof(vr)))
1446                         return -EFAULT;
1447                 if (vr.mifi >= net->ipv6.maxvif)
1448                         return -EINVAL;
1449                 read_lock(&mrt_lock);
1450                 vif = &net->ipv6.vif6_table[vr.mifi];
1451                 if (MIF_EXISTS(net, vr.mifi)) {
1452                         vr.icount = vif->pkt_in;
1453                         vr.ocount = vif->pkt_out;
1454                         vr.ibytes = vif->bytes_in;
1455                         vr.obytes = vif->bytes_out;
1456                         read_unlock(&mrt_lock);
1457
1458                         if (copy_to_user(arg, &vr, sizeof(vr)))
1459                                 return -EFAULT;
1460                         return 0;
1461                 }
1462                 read_unlock(&mrt_lock);
1463                 return -EADDRNOTAVAIL;
1464         case SIOCGETSGCNT_IN6:
1465                 if (copy_from_user(&sr, arg, sizeof(sr)))
1466                         return -EFAULT;
1467
1468                 read_lock(&mrt_lock);
1469                 c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1470                 if (c) {
1471                         sr.pktcnt = c->mfc_un.res.pkt;
1472                         sr.bytecnt = c->mfc_un.res.bytes;
1473                         sr.wrong_if = c->mfc_un.res.wrong_if;
1474                         read_unlock(&mrt_lock);
1475
1476                         if (copy_to_user(arg, &sr, sizeof(sr)))
1477                                 return -EFAULT;
1478                         return 0;
1479                 }
1480                 read_unlock(&mrt_lock);
1481                 return -EADDRNOTAVAIL;
1482         default:
1483                 return -ENOIOCTLCMD;
1484         }
1485 }
1486
1487
1488 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1489 {
1490         IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1491                          IPSTATS_MIB_OUTFORWDATAGRAMS);
1492         return dst_output(skb);
1493 }
1494
1495 /*
1496  *      Processing handlers for ip6mr_forward
1497  */
1498
1499 static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
1500                           struct mfc6_cache *c, int vifi)
1501 {
1502         struct ipv6hdr *ipv6h;
1503         struct mif_device *vif = &net->ipv6.vif6_table[vifi];
1504         struct net_device *dev;
1505         struct dst_entry *dst;
1506         struct flowi fl;
1507
1508         if (vif->dev == NULL)
1509                 goto out_free;
1510
1511 #ifdef CONFIG_IPV6_PIMSM_V2
1512         if (vif->flags & MIFF_REGISTER) {
1513                 vif->pkt_out++;
1514                 vif->bytes_out += skb->len;
1515                 vif->dev->stats.tx_bytes += skb->len;
1516                 vif->dev->stats.tx_packets++;
1517                 ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
1518                 goto out_free;
1519         }
1520 #endif
1521
1522         ipv6h = ipv6_hdr(skb);
1523
1524         fl = (struct flowi) {
1525                 .oif = vif->link,
1526                 .nl_u = { .ip6_u =
1527                                 { .daddr = ipv6h->daddr, }
1528                 }
1529         };
1530
1531         dst = ip6_route_output(net, NULL, &fl);
1532         if (!dst)
1533                 goto out_free;
1534
1535         skb_dst_drop(skb);
1536         skb_dst_set(skb, dst);
1537
1538         /*
1539          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1540          * not only before forwarding, but after forwarding on all output
1541          * interfaces. It is clear, if mrouter runs a multicasting
1542          * program, it should receive packets not depending to what interface
1543          * program is joined.
1544          * If we will not make it, the program will have to join on all
1545          * interfaces. On the other hand, multihoming host (or router, but
1546          * not mrouter) cannot join to more than one interface - it will
1547          * result in receiving multiple packets.
1548          */
1549         dev = vif->dev;
1550         skb->dev = dev;
1551         vif->pkt_out++;
1552         vif->bytes_out += skb->len;
1553
1554         /* We are about to write */
1555         /* XXX: extension headers? */
1556         if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1557                 goto out_free;
1558
1559         ipv6h = ipv6_hdr(skb);
1560         ipv6h->hop_limit--;
1561
1562         IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1563
1564         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1565                        ip6mr_forward2_finish);
1566
1567 out_free:
1568         kfree_skb(skb);
1569         return 0;
1570 }
1571
1572 static int ip6mr_find_vif(struct net_device *dev)
1573 {
1574         struct net *net = dev_net(dev);
1575         int ct;
1576         for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
1577                 if (net->ipv6.vif6_table[ct].dev == dev)
1578                         break;
1579         }
1580         return ct;
1581 }
1582
1583 static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
1584                           struct mfc6_cache *cache)
1585 {
1586         int psend = -1;
1587         int vif, ct;
1588
1589         vif = cache->mf6c_parent;
1590         cache->mfc_un.res.pkt++;
1591         cache->mfc_un.res.bytes += skb->len;
1592
1593         /*
1594          * Wrong interface: drop packet and (maybe) send PIM assert.
1595          */
1596         if (net->ipv6.vif6_table[vif].dev != skb->dev) {
1597                 int true_vifi;
1598
1599                 cache->mfc_un.res.wrong_if++;
1600                 true_vifi = ip6mr_find_vif(skb->dev);
1601
1602                 if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
1603                     /* pimsm uses asserts, when switching from RPT to SPT,
1604                        so that we cannot check that packet arrived on an oif.
1605                        It is bad, but otherwise we would need to move pretty
1606                        large chunk of pimd to kernel. Ough... --ANK
1607                      */
1608                     (net->ipv6.mroute_do_pim ||
1609                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1610                     time_after(jiffies,
1611                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1612                         cache->mfc_un.res.last_assert = jiffies;
1613                         ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
1614                 }
1615                 goto dont_forward;
1616         }
1617
1618         net->ipv6.vif6_table[vif].pkt_in++;
1619         net->ipv6.vif6_table[vif].bytes_in += skb->len;
1620
1621         /*
1622          *      Forward the frame
1623          */
1624         for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1625                 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1626                         if (psend != -1) {
1627                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1628                                 if (skb2)
1629                                         ip6mr_forward2(net, skb2, cache, psend);
1630                         }
1631                         psend = ct;
1632                 }
1633         }
1634         if (psend != -1) {
1635                 ip6mr_forward2(net, skb, cache, psend);
1636                 return 0;
1637         }
1638
1639 dont_forward:
1640         kfree_skb(skb);
1641         return 0;
1642 }
1643
1644
1645 /*
1646  *      Multicast packets for forwarding arrive here
1647  */
1648
1649 int ip6_mr_input(struct sk_buff *skb)
1650 {
1651         struct mfc6_cache *cache;
1652         struct net *net = dev_net(skb->dev);
1653
1654         read_lock(&mrt_lock);
1655         cache = ip6mr_cache_find(net,
1656                                  &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1657
1658         /*
1659          *      No usable cache entry
1660          */
1661         if (cache == NULL) {
1662                 int vif;
1663
1664                 vif = ip6mr_find_vif(skb->dev);
1665                 if (vif >= 0) {
1666                         int err = ip6mr_cache_unresolved(net, vif, skb);
1667                         read_unlock(&mrt_lock);
1668
1669                         return err;
1670                 }
1671                 read_unlock(&mrt_lock);
1672                 kfree_skb(skb);
1673                 return -ENODEV;
1674         }
1675
1676         ip6_mr_forward(net, skb, cache);
1677
1678         read_unlock(&mrt_lock);
1679
1680         return 0;
1681 }
1682
1683
1684 static int
1685 ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
1686                   struct rtmsg *rtm)
1687 {
1688         int ct;
1689         struct rtnexthop *nhp;
1690         u8 *b = skb_tail_pointer(skb);
1691         struct rtattr *mp_head;
1692
1693         /* If cache is unresolved, don't try to parse IIF and OIF */
1694         if (c->mf6c_parent > MAXMIFS)
1695                 return -ENOENT;
1696
1697         if (MIF_EXISTS(net, c->mf6c_parent))
1698                 RTA_PUT(skb, RTA_IIF, 4, &net->ipv6.vif6_table[c->mf6c_parent].dev->ifindex);
1699
1700         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1701
1702         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1703                 if (MIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1704                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1705                                 goto rtattr_failure;
1706                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1707                         nhp->rtnh_flags = 0;
1708                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1709                         nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
1710                         nhp->rtnh_len = sizeof(*nhp);
1711                 }
1712         }
1713         mp_head->rta_type = RTA_MULTIPATH;
1714         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1715         rtm->rtm_type = RTN_MULTICAST;
1716         return 1;
1717
1718 rtattr_failure:
1719         nlmsg_trim(skb, b);
1720         return -EMSGSIZE;
1721 }
1722
1723 int ip6mr_get_route(struct net *net,
1724                     struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1725 {
1726         int err;
1727         struct mfc6_cache *cache;
1728         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1729
1730         read_lock(&mrt_lock);
1731         cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1732
1733         if (!cache) {
1734                 struct sk_buff *skb2;
1735                 struct ipv6hdr *iph;
1736                 struct net_device *dev;
1737                 int vif;
1738
1739                 if (nowait) {
1740                         read_unlock(&mrt_lock);
1741                         return -EAGAIN;
1742                 }
1743
1744                 dev = skb->dev;
1745                 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1746                         read_unlock(&mrt_lock);
1747                         return -ENODEV;
1748                 }
1749
1750                 /* really correct? */
1751                 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1752                 if (!skb2) {
1753                         read_unlock(&mrt_lock);
1754                         return -ENOMEM;
1755                 }
1756
1757                 skb_reset_transport_header(skb2);
1758
1759                 skb_put(skb2, sizeof(struct ipv6hdr));
1760                 skb_reset_network_header(skb2);
1761
1762                 iph = ipv6_hdr(skb2);
1763                 iph->version = 0;
1764                 iph->priority = 0;
1765                 iph->flow_lbl[0] = 0;
1766                 iph->flow_lbl[1] = 0;
1767                 iph->flow_lbl[2] = 0;
1768                 iph->payload_len = 0;
1769                 iph->nexthdr = IPPROTO_NONE;
1770                 iph->hop_limit = 0;
1771                 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1772                 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1773
1774                 err = ip6mr_cache_unresolved(net, vif, skb2);
1775                 read_unlock(&mrt_lock);
1776
1777                 return err;
1778         }
1779
1780         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1781                 cache->mfc_flags |= MFC_NOTIFY;
1782
1783         err = ip6mr_fill_mroute(net, skb, cache, rtm);
1784         read_unlock(&mrt_lock);
1785         return err;
1786 }
1787