]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6mr.c
netns: ip6mr: allocate mroute6_socket per-namespace.
[net-next-2.6.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
7bc570c8
YH
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
7bc570c8
YH
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
7bc570c8 35#include <linux/init.h>
7bc570c8
YH
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
7bc570c8 39#include <net/raw.h>
7bc570c8
YH
40#include <linux/notifier.h>
41#include <linux/if_arp.h>
7bc570c8
YH
42#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
14fb64e1 48#include <linux/pim.h>
7bc570c8
YH
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
7bc570c8
YH
52/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
62static struct mif_device vif6_table[MAXMIFS]; /* Devices */
63static int maxvif;
64
65#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
66
14fb64e1
YH
67static int mroute_do_assert; /* Set in PIM assert */
68#ifdef CONFIG_IPV6_PIMSM_V2
69static int mroute_do_pim;
70#else
71#define mroute_do_pim 0
72#endif
73
6ac7eb08 74static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
7bc570c8
YH
75
76static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
77static atomic_t cache_resolve_queue_len; /* Size of unresolved */
78
79/* Special spinlock for queue of unresolved entries */
80static DEFINE_SPINLOCK(mfc_unres_lock);
81
82/* We return to original Alan's scheme. Hash table of resolved
83 entries is changed only in process context and protected
84 with weak lock mrt_lock. Queue of unresolved entries is protected
85 with strong spinlock mfc_unres_lock.
86
87 In this case data path is free of exclusive locks at all.
88 */
89
90static struct kmem_cache *mrt_cachep __read_mostly;
91
92static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
6ac7eb08 93static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
7bc570c8
YH
94static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
95
14fb64e1
YH
96#ifdef CONFIG_IPV6_PIMSM_V2
97static struct inet6_protocol pim6_protocol;
98#endif
99
7bc570c8
YH
100static struct timer_list ipmr_expire_timer;
101
102
103#ifdef CONFIG_PROC_FS
104
105struct ipmr_mfc_iter {
106 struct mfc6_cache **cache;
107 int ct;
108};
109
110
111static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
112{
113 struct mfc6_cache *mfc;
114
115 it->cache = mfc6_cache_array;
116 read_lock(&mrt_lock);
117 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
118 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
119 if (pos-- == 0)
120 return mfc;
121 read_unlock(&mrt_lock);
122
123 it->cache = &mfc_unres_queue;
124 spin_lock_bh(&mfc_unres_lock);
125 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
126 if (pos-- == 0)
127 return mfc;
128 spin_unlock_bh(&mfc_unres_lock);
129
130 it->cache = NULL;
131 return NULL;
132}
133
134
135
136
137/*
138 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
139 */
140
141struct ipmr_vif_iter {
142 int ct;
143};
144
145static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
146 loff_t pos)
147{
148 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
149 if (!MIF_EXISTS(iter->ct))
150 continue;
151 if (pos-- == 0)
152 return &vif6_table[iter->ct];
153 }
154 return NULL;
155}
156
157static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
158 __acquires(mrt_lock)
159{
160 read_lock(&mrt_lock);
161 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
162 : SEQ_START_TOKEN);
163}
164
165static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
166{
167 struct ipmr_vif_iter *iter = seq->private;
168
169 ++*pos;
170 if (v == SEQ_START_TOKEN)
171 return ip6mr_vif_seq_idx(iter, 0);
172
173 while (++iter->ct < maxvif) {
174 if (!MIF_EXISTS(iter->ct))
175 continue;
176 return &vif6_table[iter->ct];
177 }
178 return NULL;
179}
180
181static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
182 __releases(mrt_lock)
183{
184 read_unlock(&mrt_lock);
185}
186
187static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
188{
189 if (v == SEQ_START_TOKEN) {
190 seq_puts(seq,
191 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
192 } else {
193 const struct mif_device *vif = v;
194 const char *name = vif->dev ? vif->dev->name : "none";
195
196 seq_printf(seq,
d430a227 197 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
7bc570c8
YH
198 vif - vif6_table,
199 name, vif->bytes_in, vif->pkt_in,
200 vif->bytes_out, vif->pkt_out,
201 vif->flags);
202 }
203 return 0;
204}
205
206static struct seq_operations ip6mr_vif_seq_ops = {
207 .start = ip6mr_vif_seq_start,
208 .next = ip6mr_vif_seq_next,
209 .stop = ip6mr_vif_seq_stop,
210 .show = ip6mr_vif_seq_show,
211};
212
213static int ip6mr_vif_open(struct inode *inode, struct file *file)
214{
215 return seq_open_private(file, &ip6mr_vif_seq_ops,
216 sizeof(struct ipmr_vif_iter));
217}
218
219static struct file_operations ip6mr_vif_fops = {
220 .owner = THIS_MODULE,
221 .open = ip6mr_vif_open,
222 .read = seq_read,
223 .llseek = seq_lseek,
eedd726e 224 .release = seq_release_private,
7bc570c8
YH
225};
226
227static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
228{
229 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
230 : SEQ_START_TOKEN);
231}
232
233static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
234{
235 struct mfc6_cache *mfc = v;
236 struct ipmr_mfc_iter *it = seq->private;
237
238 ++*pos;
239
240 if (v == SEQ_START_TOKEN)
241 return ipmr_mfc_seq_idx(seq->private, 0);
242
243 if (mfc->next)
244 return mfc->next;
245
246 if (it->cache == &mfc_unres_queue)
247 goto end_of_list;
248
249 BUG_ON(it->cache != mfc6_cache_array);
250
251 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
252 mfc = mfc6_cache_array[it->ct];
253 if (mfc)
254 return mfc;
255 }
256
257 /* exhausted cache_array, show unresolved */
258 read_unlock(&mrt_lock);
259 it->cache = &mfc_unres_queue;
260 it->ct = 0;
261
262 spin_lock_bh(&mfc_unres_lock);
263 mfc = mfc_unres_queue;
264 if (mfc)
265 return mfc;
266
267 end_of_list:
268 spin_unlock_bh(&mfc_unres_lock);
269 it->cache = NULL;
270
271 return NULL;
272}
273
274static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
275{
276 struct ipmr_mfc_iter *it = seq->private;
277
278 if (it->cache == &mfc_unres_queue)
279 spin_unlock_bh(&mfc_unres_lock);
280 else if (it->cache == mfc6_cache_array)
281 read_unlock(&mrt_lock);
282}
283
284static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
285{
286 int n;
287
288 if (v == SEQ_START_TOKEN) {
289 seq_puts(seq,
290 "Group "
291 "Origin "
292 "Iif Pkts Bytes Wrong Oifs\n");
293 } else {
294 const struct mfc6_cache *mfc = v;
295 const struct ipmr_mfc_iter *it = seq->private;
296
999890b2 297 seq_printf(seq, "%pI6 %pI6 %-3hd",
0c6ce78a 298 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
1ea472e2 299 mfc->mf6c_parent);
7bc570c8
YH
300
301 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
302 seq_printf(seq, " %8lu %8lu %8lu",
303 mfc->mfc_un.res.pkt,
304 mfc->mfc_un.res.bytes,
305 mfc->mfc_un.res.wrong_if);
7bc570c8
YH
306 for (n = mfc->mfc_un.res.minvif;
307 n < mfc->mfc_un.res.maxvif; n++) {
308 if (MIF_EXISTS(n) &&
309 mfc->mfc_un.res.ttls[n] < 255)
310 seq_printf(seq,
311 " %2d:%-3d",
312 n, mfc->mfc_un.res.ttls[n]);
313 }
1ea472e2
BT
314 } else {
315 /* unresolved mfc_caches don't contain
316 * pkt, bytes and wrong_if values
317 */
318 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
7bc570c8
YH
319 }
320 seq_putc(seq, '\n');
321 }
322 return 0;
323}
324
325static struct seq_operations ipmr_mfc_seq_ops = {
326 .start = ipmr_mfc_seq_start,
327 .next = ipmr_mfc_seq_next,
328 .stop = ipmr_mfc_seq_stop,
329 .show = ipmr_mfc_seq_show,
330};
331
332static int ipmr_mfc_open(struct inode *inode, struct file *file)
333{
334 return seq_open_private(file, &ipmr_mfc_seq_ops,
335 sizeof(struct ipmr_mfc_iter));
336}
337
338static struct file_operations ip6mr_mfc_fops = {
339 .owner = THIS_MODULE,
340 .open = ipmr_mfc_open,
341 .read = seq_read,
342 .llseek = seq_lseek,
eedd726e 343 .release = seq_release_private,
7bc570c8
YH
344};
345#endif
346
14fb64e1
YH
347#ifdef CONFIG_IPV6_PIMSM_V2
348static int reg_vif_num = -1;
349
350static int pim6_rcv(struct sk_buff *skb)
351{
352 struct pimreghdr *pim;
353 struct ipv6hdr *encap;
354 struct net_device *reg_dev = NULL;
355
356 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
357 goto drop;
358
359 pim = (struct pimreghdr *)skb_transport_header(skb);
360 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
361 (pim->flags & PIM_NULL_REGISTER) ||
362 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
ec6b486f 363 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
14fb64e1
YH
364 goto drop;
365
366 /* check if the inner packet is destined to mcast group */
367 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
368 sizeof(*pim));
369
370 if (!ipv6_addr_is_multicast(&encap->daddr) ||
371 encap->payload_len == 0 ||
372 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
373 goto drop;
374
375 read_lock(&mrt_lock);
376 if (reg_vif_num >= 0)
377 reg_dev = vif6_table[reg_vif_num].dev;
378 if (reg_dev)
379 dev_hold(reg_dev);
380 read_unlock(&mrt_lock);
381
382 if (reg_dev == NULL)
383 goto drop;
384
385 skb->mac_header = skb->network_header;
386 skb_pull(skb, (u8 *)encap - skb->data);
387 skb_reset_network_header(skb);
388 skb->dev = reg_dev;
389 skb->protocol = htons(ETH_P_IP);
390 skb->ip_summed = 0;
391 skb->pkt_type = PACKET_HOST;
392 dst_release(skb->dst);
dc58c78c
PE
393 reg_dev->stats.rx_bytes += skb->len;
394 reg_dev->stats.rx_packets++;
14fb64e1
YH
395 skb->dst = NULL;
396 nf_reset(skb);
397 netif_rx(skb);
398 dev_put(reg_dev);
399 return 0;
400 drop:
401 kfree_skb(skb);
402 return 0;
403}
404
405static struct inet6_protocol pim6_protocol = {
406 .handler = pim6_rcv,
407};
408
409/* Service routines creating virtual interfaces: PIMREG */
410
411static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
412{
413 read_lock(&mrt_lock);
dc58c78c
PE
414 dev->stats.tx_bytes += skb->len;
415 dev->stats.tx_packets++;
14fb64e1
YH
416 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
417 read_unlock(&mrt_lock);
418 kfree_skb(skb);
419 return 0;
420}
421
007c3838
SH
422static const struct net_device_ops reg_vif_netdev_ops = {
423 .ndo_start_xmit = reg_vif_xmit,
424};
425
14fb64e1
YH
426static void reg_vif_setup(struct net_device *dev)
427{
428 dev->type = ARPHRD_PIMREG;
429 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
430 dev->flags = IFF_NOARP;
007c3838 431 dev->netdev_ops = &reg_vif_netdev_ops;
14fb64e1
YH
432 dev->destructor = free_netdev;
433}
434
435static struct net_device *ip6mr_reg_vif(void)
436{
437 struct net_device *dev;
14fb64e1 438
dc58c78c 439 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
14fb64e1
YH
440 if (dev == NULL)
441 return NULL;
442
443 if (register_netdevice(dev)) {
444 free_netdev(dev);
445 return NULL;
446 }
447 dev->iflink = 0;
448
14fb64e1
YH
449 if (dev_open(dev))
450 goto failure;
451
7af3db78 452 dev_hold(dev);
14fb64e1
YH
453 return dev;
454
455failure:
456 /* allow the register to be completed before unregistering. */
457 rtnl_unlock();
458 rtnl_lock();
459
460 unregister_netdevice(dev);
461 return NULL;
462}
463#endif
464
7bc570c8
YH
465/*
466 * Delete a VIF entry
467 */
468
469static int mif6_delete(int vifi)
470{
471 struct mif_device *v;
472 struct net_device *dev;
473 if (vifi < 0 || vifi >= maxvif)
474 return -EADDRNOTAVAIL;
475
476 v = &vif6_table[vifi];
477
478 write_lock_bh(&mrt_lock);
479 dev = v->dev;
480 v->dev = NULL;
481
482 if (!dev) {
483 write_unlock_bh(&mrt_lock);
484 return -EADDRNOTAVAIL;
485 }
486
14fb64e1
YH
487#ifdef CONFIG_IPV6_PIMSM_V2
488 if (vifi == reg_vif_num)
489 reg_vif_num = -1;
490#endif
491
7bc570c8
YH
492 if (vifi + 1 == maxvif) {
493 int tmp;
494 for (tmp = vifi - 1; tmp >= 0; tmp--) {
495 if (MIF_EXISTS(tmp))
496 break;
497 }
498 maxvif = tmp + 1;
499 }
500
501 write_unlock_bh(&mrt_lock);
502
503 dev_set_allmulti(dev, -1);
504
505 if (v->flags & MIFF_REGISTER)
506 unregister_netdevice(dev);
507
508 dev_put(dev);
509 return 0;
510}
511
512/* Destroy an unresolved cache entry, killing queued skbs
513 and reporting error to netlink readers.
514 */
515
516static void ip6mr_destroy_unres(struct mfc6_cache *c)
517{
518 struct sk_buff *skb;
519
520 atomic_dec(&cache_resolve_queue_len);
521
522 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
523 if (ipv6_hdr(skb)->version == 0) {
524 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
525 nlh->nlmsg_type = NLMSG_ERROR;
526 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
527 skb_trim(skb, nlh->nlmsg_len);
528 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
529 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
530 } else
531 kfree_skb(skb);
532 }
533
534 kmem_cache_free(mrt_cachep, c);
535}
536
537
538/* Single timer process for all the unresolved queue. */
539
540static void ipmr_do_expire_process(unsigned long dummy)
541{
542 unsigned long now = jiffies;
543 unsigned long expires = 10 * HZ;
544 struct mfc6_cache *c, **cp;
545
546 cp = &mfc_unres_queue;
547
548 while ((c = *cp) != NULL) {
549 if (time_after(c->mfc_un.unres.expires, now)) {
550 /* not yet... */
551 unsigned long interval = c->mfc_un.unres.expires - now;
552 if (interval < expires)
553 expires = interval;
554 cp = &c->next;
555 continue;
556 }
557
558 *cp = c->next;
559 ip6mr_destroy_unres(c);
560 }
561
562 if (atomic_read(&cache_resolve_queue_len))
563 mod_timer(&ipmr_expire_timer, jiffies + expires);
564}
565
566static void ipmr_expire_process(unsigned long dummy)
567{
568 if (!spin_trylock(&mfc_unres_lock)) {
569 mod_timer(&ipmr_expire_timer, jiffies + 1);
570 return;
571 }
572
573 if (atomic_read(&cache_resolve_queue_len))
574 ipmr_do_expire_process(dummy);
575
576 spin_unlock(&mfc_unres_lock);
577}
578
579/* Fill oifs list. It is called under write locked mrt_lock. */
580
581static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
582{
583 int vifi;
584
6ac7eb08 585 cache->mfc_un.res.minvif = MAXMIFS;
7bc570c8 586 cache->mfc_un.res.maxvif = 0;
6ac7eb08 587 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
7bc570c8
YH
588
589 for (vifi = 0; vifi < maxvif; vifi++) {
590 if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
591 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
592 if (cache->mfc_un.res.minvif > vifi)
593 cache->mfc_un.res.minvif = vifi;
594 if (cache->mfc_un.res.maxvif <= vifi)
595 cache->mfc_un.res.maxvif = vifi + 1;
596 }
597 }
598}
599
600static int mif6_add(struct mif6ctl *vifc, int mrtsock)
601{
602 int vifi = vifc->mif6c_mifi;
603 struct mif_device *v = &vif6_table[vifi];
604 struct net_device *dev;
5ae7b444 605 int err;
7bc570c8
YH
606
607 /* Is vif busy ? */
608 if (MIF_EXISTS(vifi))
609 return -EADDRINUSE;
610
611 switch (vifc->mif6c_flags) {
14fb64e1
YH
612#ifdef CONFIG_IPV6_PIMSM_V2
613 case MIFF_REGISTER:
614 /*
615 * Special Purpose VIF in PIM
616 * All the packets will be sent to the daemon
617 */
618 if (reg_vif_num >= 0)
619 return -EADDRINUSE;
620 dev = ip6mr_reg_vif();
621 if (!dev)
622 return -ENOBUFS;
5ae7b444
WC
623 err = dev_set_allmulti(dev, 1);
624 if (err) {
625 unregister_netdevice(dev);
7af3db78 626 dev_put(dev);
5ae7b444
WC
627 return err;
628 }
14fb64e1
YH
629 break;
630#endif
7bc570c8
YH
631 case 0:
632 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
633 if (!dev)
634 return -EADDRNOTAVAIL;
5ae7b444 635 err = dev_set_allmulti(dev, 1);
7af3db78
WC
636 if (err) {
637 dev_put(dev);
5ae7b444 638 return err;
7af3db78 639 }
7bc570c8
YH
640 break;
641 default:
642 return -EINVAL;
643 }
644
7bc570c8
YH
645 /*
646 * Fill in the VIF structures
647 */
648 v->rate_limit = vifc->vifc_rate_limit;
649 v->flags = vifc->mif6c_flags;
650 if (!mrtsock)
651 v->flags |= VIFF_STATIC;
652 v->threshold = vifc->vifc_threshold;
653 v->bytes_in = 0;
654 v->bytes_out = 0;
655 v->pkt_in = 0;
656 v->pkt_out = 0;
657 v->link = dev->ifindex;
658 if (v->flags & MIFF_REGISTER)
659 v->link = dev->iflink;
660
661 /* And finish update writing critical data */
662 write_lock_bh(&mrt_lock);
7bc570c8 663 v->dev = dev;
14fb64e1
YH
664#ifdef CONFIG_IPV6_PIMSM_V2
665 if (v->flags & MIFF_REGISTER)
666 reg_vif_num = vifi;
667#endif
7bc570c8
YH
668 if (vifi + 1 > maxvif)
669 maxvif = vifi + 1;
670 write_unlock_bh(&mrt_lock);
671 return 0;
672}
673
674static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
675{
676 int line = MFC6_HASH(mcastgrp, origin);
677 struct mfc6_cache *c;
678
679 for (c = mfc6_cache_array[line]; c; c = c->next) {
680 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
681 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
682 break;
683 }
684 return c;
685}
686
687/*
688 * Allocate a multicast cache entry
689 */
690static struct mfc6_cache *ip6mr_cache_alloc(void)
691{
36cbac59 692 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
7bc570c8
YH
693 if (c == NULL)
694 return NULL;
6ac7eb08 695 c->mfc_un.res.minvif = MAXMIFS;
7bc570c8
YH
696 return c;
697}
698
699static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
700{
36cbac59 701 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
7bc570c8
YH
702 if (c == NULL)
703 return NULL;
7bc570c8
YH
704 skb_queue_head_init(&c->mfc_un.unres.unresolved);
705 c->mfc_un.unres.expires = jiffies + 10 * HZ;
706 return c;
707}
708
709/*
710 * A cache entry has gone into a resolved state from queued
711 */
712
713static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
714{
715 struct sk_buff *skb;
716
717 /*
718 * Play the pending entries through our router
719 */
720
721 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
722 if (ipv6_hdr(skb)->version == 0) {
723 int err;
724 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
725
726 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
549e028d 727 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
7bc570c8
YH
728 } else {
729 nlh->nlmsg_type = NLMSG_ERROR;
730 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
731 skb_trim(skb, nlh->nlmsg_len);
732 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
733 }
734 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
735 } else
736 ip6_mr_forward(skb, c);
737 }
738}
739
740/*
741 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
742 * expects the following bizarre scheme.
743 *
744 * Called under mrt_lock.
745 */
746
6ac7eb08 747static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
7bc570c8
YH
748{
749 struct sk_buff *skb;
750 struct mrt6msg *msg;
751 int ret;
752
14fb64e1
YH
753#ifdef CONFIG_IPV6_PIMSM_V2
754 if (assert == MRT6MSG_WHOLEPKT)
755 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
756 +sizeof(*msg));
757 else
758#endif
759 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
760
761 if (!skb)
762 return -ENOBUFS;
763
764 /* I suppose that internal messages
765 * do not require checksums */
766
767 skb->ip_summed = CHECKSUM_UNNECESSARY;
768
14fb64e1
YH
769#ifdef CONFIG_IPV6_PIMSM_V2
770 if (assert == MRT6MSG_WHOLEPKT) {
771 /* Ugly, but we have no choice with this interface.
772 Duplicate old header, fix length etc.
773 And all this only to mangle msg->im6_msgtype and
774 to set msg->im6_mbz to "mbz" :-)
775 */
776 skb_push(skb, -skb_network_offset(pkt));
777
778 skb_push(skb, sizeof(*msg));
779 skb_reset_transport_header(skb);
780 msg = (struct mrt6msg *)skb_transport_header(skb);
781 msg->im6_mbz = 0;
782 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
783 msg->im6_mif = reg_vif_num;
784 msg->im6_pad = 0;
785 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
786 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
787
788 skb->ip_summed = CHECKSUM_UNNECESSARY;
789 } else
790#endif
791 {
7bc570c8
YH
792 /*
793 * Copy the IP header
794 */
795
796 skb_put(skb, sizeof(struct ipv6hdr));
797 skb_reset_network_header(skb);
798 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
799
800 /*
801 * Add our header
802 */
803 skb_put(skb, sizeof(*msg));
804 skb_reset_transport_header(skb);
805 msg = (struct mrt6msg *)skb_transport_header(skb);
806
807 msg->im6_mbz = 0;
808 msg->im6_msgtype = assert;
6ac7eb08 809 msg->im6_mif = mifi;
7bc570c8
YH
810 msg->im6_pad = 0;
811 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
812 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
813
814 skb->dst = dst_clone(pkt->dst);
815 skb->ip_summed = CHECKSUM_UNNECESSARY;
816
817 skb_pull(skb, sizeof(struct ipv6hdr));
14fb64e1 818 }
7bc570c8 819
bd91b8bf 820 if (init_net.ipv6.mroute6_sk == NULL) {
7bc570c8
YH
821 kfree_skb(skb);
822 return -EINVAL;
823 }
824
825 /*
826 * Deliver to user space multicast routing algorithms
827 */
bd91b8bf
BT
828 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
829 if (ret < 0) {
7bc570c8
YH
830 if (net_ratelimit())
831 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
832 kfree_skb(skb);
833 }
834
835 return ret;
836}
837
838/*
839 * Queue a packet for resolution. It gets locked cache entry!
840 */
841
842static int
6ac7eb08 843ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
7bc570c8
YH
844{
845 int err;
846 struct mfc6_cache *c;
847
848 spin_lock_bh(&mfc_unres_lock);
849 for (c = mfc_unres_queue; c; c = c->next) {
850 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
851 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
852 break;
853 }
854
855 if (c == NULL) {
856 /*
857 * Create a new entry if allowable
858 */
859
860 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
861 (c = ip6mr_cache_alloc_unres()) == NULL) {
862 spin_unlock_bh(&mfc_unres_lock);
863
864 kfree_skb(skb);
865 return -ENOBUFS;
866 }
867
868 /*
869 * Fill in the new cache entry
870 */
871 c->mf6c_parent = -1;
872 c->mf6c_origin = ipv6_hdr(skb)->saddr;
873 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
874
875 /*
876 * Reflect first query at pim6sd
877 */
6ac7eb08 878 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
7bc570c8
YH
879 /* If the report failed throw the cache entry
880 out - Brad Parker
881 */
882 spin_unlock_bh(&mfc_unres_lock);
883
884 kmem_cache_free(mrt_cachep, c);
885 kfree_skb(skb);
886 return err;
887 }
888
889 atomic_inc(&cache_resolve_queue_len);
890 c->next = mfc_unres_queue;
891 mfc_unres_queue = c;
892
893 ipmr_do_expire_process(1);
894 }
895
896 /*
897 * See if we can append the packet
898 */
899 if (c->mfc_un.unres.unresolved.qlen > 3) {
900 kfree_skb(skb);
901 err = -ENOBUFS;
902 } else {
903 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
904 err = 0;
905 }
906
907 spin_unlock_bh(&mfc_unres_lock);
908 return err;
909}
910
911/*
912 * MFC6 cache manipulation by user space
913 */
914
915static int ip6mr_mfc_delete(struct mf6cctl *mfc)
916{
917 int line;
918 struct mfc6_cache *c, **cp;
919
920 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
921
922 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
923 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
924 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
925 write_lock_bh(&mrt_lock);
926 *cp = c->next;
927 write_unlock_bh(&mrt_lock);
928
929 kmem_cache_free(mrt_cachep, c);
930 return 0;
931 }
932 }
933 return -ENOENT;
934}
935
936static int ip6mr_device_event(struct notifier_block *this,
937 unsigned long event, void *ptr)
938{
939 struct net_device *dev = ptr;
940 struct mif_device *v;
941 int ct;
942
721499e8 943 if (!net_eq(dev_net(dev), &init_net))
7bc570c8
YH
944 return NOTIFY_DONE;
945
946 if (event != NETDEV_UNREGISTER)
947 return NOTIFY_DONE;
948
949 v = &vif6_table[0];
950 for (ct = 0; ct < maxvif; ct++, v++) {
951 if (v->dev == dev)
952 mif6_delete(ct);
953 }
954 return NOTIFY_DONE;
955}
956
957static struct notifier_block ip6_mr_notifier = {
958 .notifier_call = ip6mr_device_event
959};
960
961/*
962 * Setup for IP multicast routing
963 */
964
623d1a1a 965int __init ip6_mr_init(void)
7bc570c8 966{
623d1a1a
WC
967 int err;
968
7bc570c8
YH
969 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
970 sizeof(struct mfc6_cache),
971 0, SLAB_HWCACHE_ALIGN,
972 NULL);
973 if (!mrt_cachep)
623d1a1a 974 return -ENOMEM;
7bc570c8
YH
975
976 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
623d1a1a
WC
977 err = register_netdevice_notifier(&ip6_mr_notifier);
978 if (err)
979 goto reg_notif_fail;
980#ifdef CONFIG_PROC_FS
981 err = -ENOMEM;
982 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
983 goto proc_vif_fail;
984 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
985 0, &ip6mr_mfc_fops))
986 goto proc_cache_fail;
987#endif
988 return 0;
7bc570c8 989#ifdef CONFIG_PROC_FS
623d1a1a
WC
990proc_cache_fail:
991 proc_net_remove(&init_net, "ip6_mr_vif");
87b30a65
BT
992proc_vif_fail:
993 unregister_netdevice_notifier(&ip6_mr_notifier);
7bc570c8 994#endif
87b30a65
BT
995reg_notif_fail:
996 del_timer(&ipmr_expire_timer);
997 kmem_cache_destroy(mrt_cachep);
623d1a1a 998 return err;
7bc570c8
YH
999}
1000
623d1a1a
WC
1001void ip6_mr_cleanup(void)
1002{
1003#ifdef CONFIG_PROC_FS
1004 proc_net_remove(&init_net, "ip6_mr_cache");
1005 proc_net_remove(&init_net, "ip6_mr_vif");
1006#endif
1007 unregister_netdevice_notifier(&ip6_mr_notifier);
1008 del_timer(&ipmr_expire_timer);
1009 kmem_cache_destroy(mrt_cachep);
1010}
7bc570c8
YH
1011
1012static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1013{
1014 int line;
1015 struct mfc6_cache *uc, *c, **cp;
6ac7eb08 1016 unsigned char ttls[MAXMIFS];
7bc570c8
YH
1017 int i;
1018
6ac7eb08
RR
1019 memset(ttls, 255, MAXMIFS);
1020 for (i = 0; i < MAXMIFS; i++) {
7bc570c8
YH
1021 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1022 ttls[i] = 1;
1023
1024 }
1025
1026 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1027
1028 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1029 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1030 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1031 break;
1032 }
1033
1034 if (c != NULL) {
1035 write_lock_bh(&mrt_lock);
1036 c->mf6c_parent = mfc->mf6cc_parent;
1037 ip6mr_update_thresholds(c, ttls);
1038 if (!mrtsock)
1039 c->mfc_flags |= MFC_STATIC;
1040 write_unlock_bh(&mrt_lock);
1041 return 0;
1042 }
1043
1044 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1045 return -EINVAL;
1046
1047 c = ip6mr_cache_alloc();
1048 if (c == NULL)
1049 return -ENOMEM;
1050
1051 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1052 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1053 c->mf6c_parent = mfc->mf6cc_parent;
1054 ip6mr_update_thresholds(c, ttls);
1055 if (!mrtsock)
1056 c->mfc_flags |= MFC_STATIC;
1057
1058 write_lock_bh(&mrt_lock);
1059 c->next = mfc6_cache_array[line];
1060 mfc6_cache_array[line] = c;
1061 write_unlock_bh(&mrt_lock);
1062
1063 /*
1064 * Check to see if we resolved a queued list. If so we
1065 * need to send on the frames and tidy up.
1066 */
1067 spin_lock_bh(&mfc_unres_lock);
1068 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1069 cp = &uc->next) {
1070 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1071 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1072 *cp = uc->next;
1073 if (atomic_dec_and_test(&cache_resolve_queue_len))
1074 del_timer(&ipmr_expire_timer);
1075 break;
1076 }
1077 }
1078 spin_unlock_bh(&mfc_unres_lock);
1079
1080 if (uc) {
1081 ip6mr_cache_resolve(uc, c);
1082 kmem_cache_free(mrt_cachep, uc);
1083 }
1084 return 0;
1085}
1086
1087/*
1088 * Close the multicast socket, and clear the vif tables etc
1089 */
1090
1091static void mroute_clean_tables(struct sock *sk)
1092{
1093 int i;
1094
1095 /*
1096 * Shut down all active vif entries
1097 */
1098 for (i = 0; i < maxvif; i++) {
1099 if (!(vif6_table[i].flags & VIFF_STATIC))
1100 mif6_delete(i);
1101 }
1102
1103 /*
1104 * Wipe the cache
1105 */
1106 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1107 struct mfc6_cache *c, **cp;
1108
1109 cp = &mfc6_cache_array[i];
1110 while ((c = *cp) != NULL) {
1111 if (c->mfc_flags & MFC_STATIC) {
1112 cp = &c->next;
1113 continue;
1114 }
1115 write_lock_bh(&mrt_lock);
1116 *cp = c->next;
1117 write_unlock_bh(&mrt_lock);
1118
1119 kmem_cache_free(mrt_cachep, c);
1120 }
1121 }
1122
1123 if (atomic_read(&cache_resolve_queue_len) != 0) {
1124 struct mfc6_cache *c;
1125
1126 spin_lock_bh(&mfc_unres_lock);
1127 while (mfc_unres_queue != NULL) {
1128 c = mfc_unres_queue;
1129 mfc_unres_queue = c->next;
1130 spin_unlock_bh(&mfc_unres_lock);
1131
1132 ip6mr_destroy_unres(c);
1133
1134 spin_lock_bh(&mfc_unres_lock);
1135 }
1136 spin_unlock_bh(&mfc_unres_lock);
1137 }
1138}
1139
1140static int ip6mr_sk_init(struct sock *sk)
1141{
1142 int err = 0;
1143
1144 rtnl_lock();
1145 write_lock_bh(&mrt_lock);
bd91b8bf
BT
1146 if (likely(init_net.ipv6.mroute6_sk == NULL))
1147 init_net.ipv6.mroute6_sk = sk;
7bc570c8
YH
1148 else
1149 err = -EADDRINUSE;
1150 write_unlock_bh(&mrt_lock);
1151
1152 rtnl_unlock();
1153
1154 return err;
1155}
1156
1157int ip6mr_sk_done(struct sock *sk)
1158{
1159 int err = 0;
1160
1161 rtnl_lock();
bd91b8bf 1162 if (sk == init_net.ipv6.mroute6_sk) {
7bc570c8 1163 write_lock_bh(&mrt_lock);
bd91b8bf 1164 init_net.ipv6.mroute6_sk = NULL;
7bc570c8
YH
1165 write_unlock_bh(&mrt_lock);
1166
1167 mroute_clean_tables(sk);
1168 } else
1169 err = -EACCES;
1170 rtnl_unlock();
1171
1172 return err;
1173}
1174
1175/*
1176 * Socket options and virtual interface manipulation. The whole
1177 * virtual interface system is a complete heap, but unfortunately
1178 * that's how BSD mrouted happens to think. Maybe one day with a proper
1179 * MOSPF/PIM router set up we can clean this up.
1180 */
1181
1182int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1183{
1184 int ret;
1185 struct mif6ctl vif;
1186 struct mf6cctl mfc;
1187 mifi_t mifi;
1188
1189 if (optname != MRT6_INIT) {
bd91b8bf 1190 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
7bc570c8
YH
1191 return -EACCES;
1192 }
1193
1194 switch (optname) {
1195 case MRT6_INIT:
1196 if (sk->sk_type != SOCK_RAW ||
1197 inet_sk(sk)->num != IPPROTO_ICMPV6)
1198 return -EOPNOTSUPP;
1199 if (optlen < sizeof(int))
1200 return -EINVAL;
1201
1202 return ip6mr_sk_init(sk);
1203
1204 case MRT6_DONE:
1205 return ip6mr_sk_done(sk);
1206
1207 case MRT6_ADD_MIF:
1208 if (optlen < sizeof(vif))
1209 return -EINVAL;
1210 if (copy_from_user(&vif, optval, sizeof(vif)))
1211 return -EFAULT;
6ac7eb08 1212 if (vif.mif6c_mifi >= MAXMIFS)
7bc570c8
YH
1213 return -ENFILE;
1214 rtnl_lock();
bd91b8bf 1215 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1216 rtnl_unlock();
1217 return ret;
1218
1219 case MRT6_DEL_MIF:
1220 if (optlen < sizeof(mifi_t))
1221 return -EINVAL;
1222 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1223 return -EFAULT;
1224 rtnl_lock();
1225 ret = mif6_delete(mifi);
1226 rtnl_unlock();
1227 return ret;
1228
1229 /*
1230 * Manipulate the forwarding caches. These live
1231 * in a sort of kernel/user symbiosis.
1232 */
1233 case MRT6_ADD_MFC:
1234 case MRT6_DEL_MFC:
1235 if (optlen < sizeof(mfc))
1236 return -EINVAL;
1237 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1238 return -EFAULT;
1239 rtnl_lock();
1240 if (optname == MRT6_DEL_MFC)
1241 ret = ip6mr_mfc_delete(&mfc);
1242 else
bd91b8bf 1243 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1244 rtnl_unlock();
1245 return ret;
1246
14fb64e1
YH
1247 /*
1248 * Control PIM assert (to activate pim will activate assert)
1249 */
1250 case MRT6_ASSERT:
1251 {
1252 int v;
1253 if (get_user(v, (int __user *)optval))
1254 return -EFAULT;
1255 mroute_do_assert = !!v;
1256 return 0;
1257 }
1258
1259#ifdef CONFIG_IPV6_PIMSM_V2
1260 case MRT6_PIM:
1261 {
a9f83bf3 1262 int v;
14fb64e1
YH
1263 if (get_user(v, (int __user *)optval))
1264 return -EFAULT;
1265 v = !!v;
1266 rtnl_lock();
1267 ret = 0;
1268 if (v != mroute_do_pim) {
1269 mroute_do_pim = v;
1270 mroute_do_assert = v;
1271 if (mroute_do_pim)
1272 ret = inet6_add_protocol(&pim6_protocol,
1273 IPPROTO_PIM);
1274 else
1275 ret = inet6_del_protocol(&pim6_protocol,
1276 IPPROTO_PIM);
1277 if (ret < 0)
1278 ret = -EAGAIN;
1279 }
1280 rtnl_unlock();
1281 return ret;
1282 }
1283
1284#endif
7bc570c8 1285 /*
7d120c55 1286 * Spurious command, or MRT6_VERSION which you cannot
7bc570c8
YH
1287 * set.
1288 */
1289 default:
1290 return -ENOPROTOOPT;
1291 }
1292}
1293
1294/*
1295 * Getsock opt support for the multicast routing system.
1296 */
1297
1298int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1299 int __user *optlen)
1300{
1301 int olr;
1302 int val;
1303
1304 switch (optname) {
1305 case MRT6_VERSION:
1306 val = 0x0305;
1307 break;
14fb64e1
YH
1308#ifdef CONFIG_IPV6_PIMSM_V2
1309 case MRT6_PIM:
1310 val = mroute_do_pim;
1311 break;
1312#endif
1313 case MRT6_ASSERT:
1314 val = mroute_do_assert;
1315 break;
7bc570c8
YH
1316 default:
1317 return -ENOPROTOOPT;
1318 }
1319
1320 if (get_user(olr, optlen))
1321 return -EFAULT;
1322
1323 olr = min_t(int, olr, sizeof(int));
1324 if (olr < 0)
1325 return -EINVAL;
1326
1327 if (put_user(olr, optlen))
1328 return -EFAULT;
1329 if (copy_to_user(optval, &val, olr))
1330 return -EFAULT;
1331 return 0;
1332}
1333
1334/*
1335 * The IP multicast ioctl support routines.
1336 */
1337
1338int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1339{
1340 struct sioc_sg_req6 sr;
1341 struct sioc_mif_req6 vr;
1342 struct mif_device *vif;
1343 struct mfc6_cache *c;
1344
1345 switch (cmd) {
1346 case SIOCGETMIFCNT_IN6:
1347 if (copy_from_user(&vr, arg, sizeof(vr)))
1348 return -EFAULT;
1349 if (vr.mifi >= maxvif)
1350 return -EINVAL;
1351 read_lock(&mrt_lock);
1352 vif = &vif6_table[vr.mifi];
1353 if (MIF_EXISTS(vr.mifi)) {
1354 vr.icount = vif->pkt_in;
1355 vr.ocount = vif->pkt_out;
1356 vr.ibytes = vif->bytes_in;
1357 vr.obytes = vif->bytes_out;
1358 read_unlock(&mrt_lock);
1359
1360 if (copy_to_user(arg, &vr, sizeof(vr)))
1361 return -EFAULT;
1362 return 0;
1363 }
1364 read_unlock(&mrt_lock);
1365 return -EADDRNOTAVAIL;
1366 case SIOCGETSGCNT_IN6:
1367 if (copy_from_user(&sr, arg, sizeof(sr)))
1368 return -EFAULT;
1369
1370 read_lock(&mrt_lock);
1371 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1372 if (c) {
1373 sr.pktcnt = c->mfc_un.res.pkt;
1374 sr.bytecnt = c->mfc_un.res.bytes;
1375 sr.wrong_if = c->mfc_un.res.wrong_if;
1376 read_unlock(&mrt_lock);
1377
1378 if (copy_to_user(arg, &sr, sizeof(sr)))
1379 return -EFAULT;
1380 return 0;
1381 }
1382 read_unlock(&mrt_lock);
1383 return -EADDRNOTAVAIL;
1384 default:
1385 return -ENOIOCTLCMD;
1386 }
1387}
1388
1389
1390static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1391{
483a47d2
DL
1392 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1393 IPSTATS_MIB_OUTFORWDATAGRAMS);
7bc570c8
YH
1394 return dst_output(skb);
1395}
1396
1397/*
1398 * Processing handlers for ip6mr_forward
1399 */
1400
1401static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1402{
1403 struct ipv6hdr *ipv6h;
1404 struct mif_device *vif = &vif6_table[vifi];
1405 struct net_device *dev;
1406 struct dst_entry *dst;
1407 struct flowi fl;
1408
1409 if (vif->dev == NULL)
1410 goto out_free;
1411
14fb64e1
YH
1412#ifdef CONFIG_IPV6_PIMSM_V2
1413 if (vif->flags & MIFF_REGISTER) {
1414 vif->pkt_out++;
1415 vif->bytes_out += skb->len;
dc58c78c
PE
1416 vif->dev->stats.tx_bytes += skb->len;
1417 vif->dev->stats.tx_packets++;
14fb64e1
YH
1418 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1419 kfree_skb(skb);
1420 return 0;
1421 }
1422#endif
1423
7bc570c8
YH
1424 ipv6h = ipv6_hdr(skb);
1425
1426 fl = (struct flowi) {
1427 .oif = vif->link,
1428 .nl_u = { .ip6_u =
1429 { .daddr = ipv6h->daddr, }
1430 }
1431 };
1432
1433 dst = ip6_route_output(&init_net, NULL, &fl);
1434 if (!dst)
1435 goto out_free;
1436
1437 dst_release(skb->dst);
1438 skb->dst = dst;
1439
1440 /*
1441 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1442 * not only before forwarding, but after forwarding on all output
1443 * interfaces. It is clear, if mrouter runs a multicasting
1444 * program, it should receive packets not depending to what interface
1445 * program is joined.
1446 * If we will not make it, the program will have to join on all
1447 * interfaces. On the other hand, multihoming host (or router, but
1448 * not mrouter) cannot join to more than one interface - it will
1449 * result in receiving multiple packets.
1450 */
1451 dev = vif->dev;
1452 skb->dev = dev;
1453 vif->pkt_out++;
1454 vif->bytes_out += skb->len;
1455
1456 /* We are about to write */
1457 /* XXX: extension headers? */
1458 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1459 goto out_free;
1460
1461 ipv6h = ipv6_hdr(skb);
1462 ipv6h->hop_limit--;
1463
1464 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1465
1466 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1467 ip6mr_forward2_finish);
1468
1469out_free:
1470 kfree_skb(skb);
1471 return 0;
1472}
1473
1474static int ip6mr_find_vif(struct net_device *dev)
1475{
1476 int ct;
1477 for (ct = maxvif - 1; ct >= 0; ct--) {
1478 if (vif6_table[ct].dev == dev)
1479 break;
1480 }
1481 return ct;
1482}
1483
1484static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1485{
1486 int psend = -1;
1487 int vif, ct;
1488
1489 vif = cache->mf6c_parent;
1490 cache->mfc_un.res.pkt++;
1491 cache->mfc_un.res.bytes += skb->len;
1492
14fb64e1
YH
1493 /*
1494 * Wrong interface: drop packet and (maybe) send PIM assert.
1495 */
1496 if (vif6_table[vif].dev != skb->dev) {
1497 int true_vifi;
1498
1499 cache->mfc_un.res.wrong_if++;
1500 true_vifi = ip6mr_find_vif(skb->dev);
1501
1502 if (true_vifi >= 0 && mroute_do_assert &&
1503 /* pimsm uses asserts, when switching from RPT to SPT,
1504 so that we cannot check that packet arrived on an oif.
1505 It is bad, but otherwise we would need to move pretty
1506 large chunk of pimd to kernel. Ough... --ANK
1507 */
1508 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1509 time_after(jiffies,
1510 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1511 cache->mfc_un.res.last_assert = jiffies;
1512 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1513 }
1514 goto dont_forward;
1515 }
1516
7bc570c8
YH
1517 vif6_table[vif].pkt_in++;
1518 vif6_table[vif].bytes_in += skb->len;
1519
1520 /*
1521 * Forward the frame
1522 */
1523 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1524 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1525 if (psend != -1) {
1526 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1527 if (skb2)
1528 ip6mr_forward2(skb2, cache, psend);
1529 }
1530 psend = ct;
1531 }
1532 }
1533 if (psend != -1) {
1534 ip6mr_forward2(skb, cache, psend);
1535 return 0;
1536 }
1537
14fb64e1 1538dont_forward:
7bc570c8
YH
1539 kfree_skb(skb);
1540 return 0;
1541}
1542
1543
1544/*
1545 * Multicast packets for forwarding arrive here
1546 */
1547
1548int ip6_mr_input(struct sk_buff *skb)
1549{
1550 struct mfc6_cache *cache;
1551
1552 read_lock(&mrt_lock);
1553 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1554
1555 /*
1556 * No usable cache entry
1557 */
1558 if (cache == NULL) {
1559 int vif;
1560
1561 vif = ip6mr_find_vif(skb->dev);
1562 if (vif >= 0) {
1563 int err = ip6mr_cache_unresolved(vif, skb);
1564 read_unlock(&mrt_lock);
1565
1566 return err;
1567 }
1568 read_unlock(&mrt_lock);
1569 kfree_skb(skb);
1570 return -ENODEV;
1571 }
1572
1573 ip6_mr_forward(skb, cache);
1574
1575 read_unlock(&mrt_lock);
1576
1577 return 0;
1578}
1579
1580
1581static int
1582ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1583{
1584 int ct;
1585 struct rtnexthop *nhp;
1586 struct net_device *dev = vif6_table[c->mf6c_parent].dev;
549e028d 1587 u8 *b = skb_tail_pointer(skb);
7bc570c8
YH
1588 struct rtattr *mp_head;
1589
1590 if (dev)
1591 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1592
1593 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1594
1595 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1596 if (c->mfc_un.res.ttls[ct] < 255) {
1597 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1598 goto rtattr_failure;
1599 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1600 nhp->rtnh_flags = 0;
1601 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1602 nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1603 nhp->rtnh_len = sizeof(*nhp);
1604 }
1605 }
1606 mp_head->rta_type = RTA_MULTIPATH;
549e028d 1607 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
7bc570c8
YH
1608 rtm->rtm_type = RTN_MULTICAST;
1609 return 1;
1610
1611rtattr_failure:
1612 nlmsg_trim(skb, b);
1613 return -EMSGSIZE;
1614}
1615
1616int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1617{
1618 int err;
1619 struct mfc6_cache *cache;
1620 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1621
1622 read_lock(&mrt_lock);
1623 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1624
1625 if (!cache) {
1626 struct sk_buff *skb2;
1627 struct ipv6hdr *iph;
1628 struct net_device *dev;
1629 int vif;
1630
1631 if (nowait) {
1632 read_unlock(&mrt_lock);
1633 return -EAGAIN;
1634 }
1635
1636 dev = skb->dev;
1637 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1638 read_unlock(&mrt_lock);
1639 return -ENODEV;
1640 }
1641
1642 /* really correct? */
1643 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1644 if (!skb2) {
1645 read_unlock(&mrt_lock);
1646 return -ENOMEM;
1647 }
1648
1649 skb_reset_transport_header(skb2);
1650
1651 skb_put(skb2, sizeof(struct ipv6hdr));
1652 skb_reset_network_header(skb2);
1653
1654 iph = ipv6_hdr(skb2);
1655 iph->version = 0;
1656 iph->priority = 0;
1657 iph->flow_lbl[0] = 0;
1658 iph->flow_lbl[1] = 0;
1659 iph->flow_lbl[2] = 0;
1660 iph->payload_len = 0;
1661 iph->nexthdr = IPPROTO_NONE;
1662 iph->hop_limit = 0;
1663 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1664 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1665
1666 err = ip6mr_cache_unresolved(vif, skb2);
1667 read_unlock(&mrt_lock);
1668
1669 return err;
1670 }
1671
1672 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1673 cache->mfc_flags |= MFC_NOTIFY;
1674
1675 err = ip6mr_fill_mroute(skb, cache, rtm);
1676 read_unlock(&mrt_lock);
1677 return err;
1678}
1679