]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6mr.c
netns: ip6mr: dynamically allocates vif6_table
[net-next-2.6.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
7bc570c8
YH
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
7bc570c8
YH
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
7bc570c8 35#include <linux/init.h>
7bc570c8
YH
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
7bc570c8 39#include <net/raw.h>
7bc570c8
YH
40#include <linux/notifier.h>
41#include <linux/if_arp.h>
7bc570c8
YH
42#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
14fb64e1 48#include <linux/pim.h>
7bc570c8
YH
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
7bc570c8
YH
52/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
4e16880c 62#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
7bc570c8 63
14fb64e1
YH
64static int mroute_do_assert; /* Set in PIM assert */
65#ifdef CONFIG_IPV6_PIMSM_V2
66static int mroute_do_pim;
67#else
68#define mroute_do_pim 0
69#endif
70
6ac7eb08 71static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
7bc570c8
YH
72
73static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
74static atomic_t cache_resolve_queue_len; /* Size of unresolved */
75
76/* Special spinlock for queue of unresolved entries */
77static DEFINE_SPINLOCK(mfc_unres_lock);
78
79/* We return to original Alan's scheme. Hash table of resolved
80 entries is changed only in process context and protected
81 with weak lock mrt_lock. Queue of unresolved entries is protected
82 with strong spinlock mfc_unres_lock.
83
84 In this case data path is free of exclusive locks at all.
85 */
86
87static struct kmem_cache *mrt_cachep __read_mostly;
88
89static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
6ac7eb08 90static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
7bc570c8
YH
91static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
92
14fb64e1
YH
93#ifdef CONFIG_IPV6_PIMSM_V2
94static struct inet6_protocol pim6_protocol;
95#endif
96
7bc570c8
YH
97static struct timer_list ipmr_expire_timer;
98
99
100#ifdef CONFIG_PROC_FS
101
102struct ipmr_mfc_iter {
103 struct mfc6_cache **cache;
104 int ct;
105};
106
107
108static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
109{
110 struct mfc6_cache *mfc;
111
112 it->cache = mfc6_cache_array;
113 read_lock(&mrt_lock);
114 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
115 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
116 if (pos-- == 0)
117 return mfc;
118 read_unlock(&mrt_lock);
119
120 it->cache = &mfc_unres_queue;
121 spin_lock_bh(&mfc_unres_lock);
122 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
123 if (pos-- == 0)
124 return mfc;
125 spin_unlock_bh(&mfc_unres_lock);
126
127 it->cache = NULL;
128 return NULL;
129}
130
131
132
133
134/*
135 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
136 */
137
138struct ipmr_vif_iter {
139 int ct;
140};
141
142static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
143 loff_t pos)
144{
4e16880c
BT
145 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
146 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8
YH
147 continue;
148 if (pos-- == 0)
4e16880c 149 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
150 }
151 return NULL;
152}
153
154static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
155 __acquires(mrt_lock)
156{
157 read_lock(&mrt_lock);
158 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
159 : SEQ_START_TOKEN);
160}
161
162static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
163{
164 struct ipmr_vif_iter *iter = seq->private;
165
166 ++*pos;
167 if (v == SEQ_START_TOKEN)
168 return ip6mr_vif_seq_idx(iter, 0);
169
4e16880c
BT
170 while (++iter->ct < init_net.ipv6.maxvif) {
171 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8 172 continue;
4e16880c 173 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
174 }
175 return NULL;
176}
177
178static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
179 __releases(mrt_lock)
180{
181 read_unlock(&mrt_lock);
182}
183
184static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
185{
186 if (v == SEQ_START_TOKEN) {
187 seq_puts(seq,
188 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
189 } else {
190 const struct mif_device *vif = v;
191 const char *name = vif->dev ? vif->dev->name : "none";
192
193 seq_printf(seq,
d430a227 194 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
4e16880c 195 vif - init_net.ipv6.vif6_table,
7bc570c8
YH
196 name, vif->bytes_in, vif->pkt_in,
197 vif->bytes_out, vif->pkt_out,
198 vif->flags);
199 }
200 return 0;
201}
202
203static struct seq_operations ip6mr_vif_seq_ops = {
204 .start = ip6mr_vif_seq_start,
205 .next = ip6mr_vif_seq_next,
206 .stop = ip6mr_vif_seq_stop,
207 .show = ip6mr_vif_seq_show,
208};
209
210static int ip6mr_vif_open(struct inode *inode, struct file *file)
211{
212 return seq_open_private(file, &ip6mr_vif_seq_ops,
213 sizeof(struct ipmr_vif_iter));
214}
215
216static struct file_operations ip6mr_vif_fops = {
217 .owner = THIS_MODULE,
218 .open = ip6mr_vif_open,
219 .read = seq_read,
220 .llseek = seq_lseek,
eedd726e 221 .release = seq_release_private,
7bc570c8
YH
222};
223
224static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
225{
226 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
227 : SEQ_START_TOKEN);
228}
229
230static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
231{
232 struct mfc6_cache *mfc = v;
233 struct ipmr_mfc_iter *it = seq->private;
234
235 ++*pos;
236
237 if (v == SEQ_START_TOKEN)
238 return ipmr_mfc_seq_idx(seq->private, 0);
239
240 if (mfc->next)
241 return mfc->next;
242
243 if (it->cache == &mfc_unres_queue)
244 goto end_of_list;
245
246 BUG_ON(it->cache != mfc6_cache_array);
247
248 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
249 mfc = mfc6_cache_array[it->ct];
250 if (mfc)
251 return mfc;
252 }
253
254 /* exhausted cache_array, show unresolved */
255 read_unlock(&mrt_lock);
256 it->cache = &mfc_unres_queue;
257 it->ct = 0;
258
259 spin_lock_bh(&mfc_unres_lock);
260 mfc = mfc_unres_queue;
261 if (mfc)
262 return mfc;
263
264 end_of_list:
265 spin_unlock_bh(&mfc_unres_lock);
266 it->cache = NULL;
267
268 return NULL;
269}
270
271static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
272{
273 struct ipmr_mfc_iter *it = seq->private;
274
275 if (it->cache == &mfc_unres_queue)
276 spin_unlock_bh(&mfc_unres_lock);
277 else if (it->cache == mfc6_cache_array)
278 read_unlock(&mrt_lock);
279}
280
281static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
282{
283 int n;
284
285 if (v == SEQ_START_TOKEN) {
286 seq_puts(seq,
287 "Group "
288 "Origin "
289 "Iif Pkts Bytes Wrong Oifs\n");
290 } else {
291 const struct mfc6_cache *mfc = v;
292 const struct ipmr_mfc_iter *it = seq->private;
293
999890b2 294 seq_printf(seq, "%pI6 %pI6 %-3hd",
0c6ce78a 295 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
1ea472e2 296 mfc->mf6c_parent);
7bc570c8
YH
297
298 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
299 seq_printf(seq, " %8lu %8lu %8lu",
300 mfc->mfc_un.res.pkt,
301 mfc->mfc_un.res.bytes,
302 mfc->mfc_un.res.wrong_if);
7bc570c8
YH
303 for (n = mfc->mfc_un.res.minvif;
304 n < mfc->mfc_un.res.maxvif; n++) {
4e16880c 305 if (MIF_EXISTS(&init_net, n) &&
7bc570c8
YH
306 mfc->mfc_un.res.ttls[n] < 255)
307 seq_printf(seq,
308 " %2d:%-3d",
309 n, mfc->mfc_un.res.ttls[n]);
310 }
1ea472e2
BT
311 } else {
312 /* unresolved mfc_caches don't contain
313 * pkt, bytes and wrong_if values
314 */
315 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
7bc570c8
YH
316 }
317 seq_putc(seq, '\n');
318 }
319 return 0;
320}
321
322static struct seq_operations ipmr_mfc_seq_ops = {
323 .start = ipmr_mfc_seq_start,
324 .next = ipmr_mfc_seq_next,
325 .stop = ipmr_mfc_seq_stop,
326 .show = ipmr_mfc_seq_show,
327};
328
329static int ipmr_mfc_open(struct inode *inode, struct file *file)
330{
331 return seq_open_private(file, &ipmr_mfc_seq_ops,
332 sizeof(struct ipmr_mfc_iter));
333}
334
335static struct file_operations ip6mr_mfc_fops = {
336 .owner = THIS_MODULE,
337 .open = ipmr_mfc_open,
338 .read = seq_read,
339 .llseek = seq_lseek,
eedd726e 340 .release = seq_release_private,
7bc570c8
YH
341};
342#endif
343
14fb64e1
YH
344#ifdef CONFIG_IPV6_PIMSM_V2
345static int reg_vif_num = -1;
346
347static int pim6_rcv(struct sk_buff *skb)
348{
349 struct pimreghdr *pim;
350 struct ipv6hdr *encap;
351 struct net_device *reg_dev = NULL;
352
353 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
354 goto drop;
355
356 pim = (struct pimreghdr *)skb_transport_header(skb);
357 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
358 (pim->flags & PIM_NULL_REGISTER) ||
359 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
ec6b486f 360 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
14fb64e1
YH
361 goto drop;
362
363 /* check if the inner packet is destined to mcast group */
364 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
365 sizeof(*pim));
366
367 if (!ipv6_addr_is_multicast(&encap->daddr) ||
368 encap->payload_len == 0 ||
369 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
370 goto drop;
371
372 read_lock(&mrt_lock);
373 if (reg_vif_num >= 0)
4e16880c 374 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
14fb64e1
YH
375 if (reg_dev)
376 dev_hold(reg_dev);
377 read_unlock(&mrt_lock);
378
379 if (reg_dev == NULL)
380 goto drop;
381
382 skb->mac_header = skb->network_header;
383 skb_pull(skb, (u8 *)encap - skb->data);
384 skb_reset_network_header(skb);
385 skb->dev = reg_dev;
386 skb->protocol = htons(ETH_P_IP);
387 skb->ip_summed = 0;
388 skb->pkt_type = PACKET_HOST;
389 dst_release(skb->dst);
dc58c78c
PE
390 reg_dev->stats.rx_bytes += skb->len;
391 reg_dev->stats.rx_packets++;
14fb64e1
YH
392 skb->dst = NULL;
393 nf_reset(skb);
394 netif_rx(skb);
395 dev_put(reg_dev);
396 return 0;
397 drop:
398 kfree_skb(skb);
399 return 0;
400}
401
402static struct inet6_protocol pim6_protocol = {
403 .handler = pim6_rcv,
404};
405
406/* Service routines creating virtual interfaces: PIMREG */
407
408static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
409{
410 read_lock(&mrt_lock);
dc58c78c
PE
411 dev->stats.tx_bytes += skb->len;
412 dev->stats.tx_packets++;
14fb64e1
YH
413 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
414 read_unlock(&mrt_lock);
415 kfree_skb(skb);
416 return 0;
417}
418
007c3838
SH
419static const struct net_device_ops reg_vif_netdev_ops = {
420 .ndo_start_xmit = reg_vif_xmit,
421};
422
14fb64e1
YH
423static void reg_vif_setup(struct net_device *dev)
424{
425 dev->type = ARPHRD_PIMREG;
426 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
427 dev->flags = IFF_NOARP;
007c3838 428 dev->netdev_ops = &reg_vif_netdev_ops;
14fb64e1
YH
429 dev->destructor = free_netdev;
430}
431
432static struct net_device *ip6mr_reg_vif(void)
433{
434 struct net_device *dev;
14fb64e1 435
dc58c78c 436 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
14fb64e1
YH
437 if (dev == NULL)
438 return NULL;
439
440 if (register_netdevice(dev)) {
441 free_netdev(dev);
442 return NULL;
443 }
444 dev->iflink = 0;
445
14fb64e1
YH
446 if (dev_open(dev))
447 goto failure;
448
7af3db78 449 dev_hold(dev);
14fb64e1
YH
450 return dev;
451
452failure:
453 /* allow the register to be completed before unregistering. */
454 rtnl_unlock();
455 rtnl_lock();
456
457 unregister_netdevice(dev);
458 return NULL;
459}
460#endif
461
7bc570c8
YH
462/*
463 * Delete a VIF entry
464 */
465
466static int mif6_delete(int vifi)
467{
468 struct mif_device *v;
469 struct net_device *dev;
4e16880c 470 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
7bc570c8
YH
471 return -EADDRNOTAVAIL;
472
4e16880c 473 v = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
474
475 write_lock_bh(&mrt_lock);
476 dev = v->dev;
477 v->dev = NULL;
478
479 if (!dev) {
480 write_unlock_bh(&mrt_lock);
481 return -EADDRNOTAVAIL;
482 }
483
14fb64e1
YH
484#ifdef CONFIG_IPV6_PIMSM_V2
485 if (vifi == reg_vif_num)
486 reg_vif_num = -1;
487#endif
488
4e16880c 489 if (vifi + 1 == init_net.ipv6.maxvif) {
7bc570c8
YH
490 int tmp;
491 for (tmp = vifi - 1; tmp >= 0; tmp--) {
4e16880c 492 if (MIF_EXISTS(&init_net, tmp))
7bc570c8
YH
493 break;
494 }
4e16880c 495 init_net.ipv6.maxvif = tmp + 1;
7bc570c8
YH
496 }
497
498 write_unlock_bh(&mrt_lock);
499
500 dev_set_allmulti(dev, -1);
501
502 if (v->flags & MIFF_REGISTER)
503 unregister_netdevice(dev);
504
505 dev_put(dev);
506 return 0;
507}
508
509/* Destroy an unresolved cache entry, killing queued skbs
510 and reporting error to netlink readers.
511 */
512
513static void ip6mr_destroy_unres(struct mfc6_cache *c)
514{
515 struct sk_buff *skb;
516
517 atomic_dec(&cache_resolve_queue_len);
518
519 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
520 if (ipv6_hdr(skb)->version == 0) {
521 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
522 nlh->nlmsg_type = NLMSG_ERROR;
523 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
524 skb_trim(skb, nlh->nlmsg_len);
525 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
526 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
527 } else
528 kfree_skb(skb);
529 }
530
531 kmem_cache_free(mrt_cachep, c);
532}
533
534
535/* Single timer process for all the unresolved queue. */
536
537static void ipmr_do_expire_process(unsigned long dummy)
538{
539 unsigned long now = jiffies;
540 unsigned long expires = 10 * HZ;
541 struct mfc6_cache *c, **cp;
542
543 cp = &mfc_unres_queue;
544
545 while ((c = *cp) != NULL) {
546 if (time_after(c->mfc_un.unres.expires, now)) {
547 /* not yet... */
548 unsigned long interval = c->mfc_un.unres.expires - now;
549 if (interval < expires)
550 expires = interval;
551 cp = &c->next;
552 continue;
553 }
554
555 *cp = c->next;
556 ip6mr_destroy_unres(c);
557 }
558
559 if (atomic_read(&cache_resolve_queue_len))
560 mod_timer(&ipmr_expire_timer, jiffies + expires);
561}
562
563static void ipmr_expire_process(unsigned long dummy)
564{
565 if (!spin_trylock(&mfc_unres_lock)) {
566 mod_timer(&ipmr_expire_timer, jiffies + 1);
567 return;
568 }
569
570 if (atomic_read(&cache_resolve_queue_len))
571 ipmr_do_expire_process(dummy);
572
573 spin_unlock(&mfc_unres_lock);
574}
575
576/* Fill oifs list. It is called under write locked mrt_lock. */
577
578static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
579{
580 int vifi;
581
6ac7eb08 582 cache->mfc_un.res.minvif = MAXMIFS;
7bc570c8 583 cache->mfc_un.res.maxvif = 0;
6ac7eb08 584 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
7bc570c8 585
4e16880c
BT
586 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
587 if (MIF_EXISTS(&init_net, vifi) &&
588 ttls[vifi] && ttls[vifi] < 255) {
7bc570c8
YH
589 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
590 if (cache->mfc_un.res.minvif > vifi)
591 cache->mfc_un.res.minvif = vifi;
592 if (cache->mfc_un.res.maxvif <= vifi)
593 cache->mfc_un.res.maxvif = vifi + 1;
594 }
595 }
596}
597
598static int mif6_add(struct mif6ctl *vifc, int mrtsock)
599{
600 int vifi = vifc->mif6c_mifi;
4e16880c 601 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
7bc570c8 602 struct net_device *dev;
5ae7b444 603 int err;
7bc570c8
YH
604
605 /* Is vif busy ? */
4e16880c 606 if (MIF_EXISTS(&init_net, vifi))
7bc570c8
YH
607 return -EADDRINUSE;
608
609 switch (vifc->mif6c_flags) {
14fb64e1
YH
610#ifdef CONFIG_IPV6_PIMSM_V2
611 case MIFF_REGISTER:
612 /*
613 * Special Purpose VIF in PIM
614 * All the packets will be sent to the daemon
615 */
616 if (reg_vif_num >= 0)
617 return -EADDRINUSE;
618 dev = ip6mr_reg_vif();
619 if (!dev)
620 return -ENOBUFS;
5ae7b444
WC
621 err = dev_set_allmulti(dev, 1);
622 if (err) {
623 unregister_netdevice(dev);
7af3db78 624 dev_put(dev);
5ae7b444
WC
625 return err;
626 }
14fb64e1
YH
627 break;
628#endif
7bc570c8
YH
629 case 0:
630 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
631 if (!dev)
632 return -EADDRNOTAVAIL;
5ae7b444 633 err = dev_set_allmulti(dev, 1);
7af3db78
WC
634 if (err) {
635 dev_put(dev);
5ae7b444 636 return err;
7af3db78 637 }
7bc570c8
YH
638 break;
639 default:
640 return -EINVAL;
641 }
642
7bc570c8
YH
643 /*
644 * Fill in the VIF structures
645 */
646 v->rate_limit = vifc->vifc_rate_limit;
647 v->flags = vifc->mif6c_flags;
648 if (!mrtsock)
649 v->flags |= VIFF_STATIC;
650 v->threshold = vifc->vifc_threshold;
651 v->bytes_in = 0;
652 v->bytes_out = 0;
653 v->pkt_in = 0;
654 v->pkt_out = 0;
655 v->link = dev->ifindex;
656 if (v->flags & MIFF_REGISTER)
657 v->link = dev->iflink;
658
659 /* And finish update writing critical data */
660 write_lock_bh(&mrt_lock);
7bc570c8 661 v->dev = dev;
14fb64e1
YH
662#ifdef CONFIG_IPV6_PIMSM_V2
663 if (v->flags & MIFF_REGISTER)
664 reg_vif_num = vifi;
665#endif
4e16880c
BT
666 if (vifi + 1 > init_net.ipv6.maxvif)
667 init_net.ipv6.maxvif = vifi + 1;
7bc570c8
YH
668 write_unlock_bh(&mrt_lock);
669 return 0;
670}
671
672static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
673{
674 int line = MFC6_HASH(mcastgrp, origin);
675 struct mfc6_cache *c;
676
677 for (c = mfc6_cache_array[line]; c; c = c->next) {
678 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
679 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
680 break;
681 }
682 return c;
683}
684
685/*
686 * Allocate a multicast cache entry
687 */
688static struct mfc6_cache *ip6mr_cache_alloc(void)
689{
36cbac59 690 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
7bc570c8
YH
691 if (c == NULL)
692 return NULL;
6ac7eb08 693 c->mfc_un.res.minvif = MAXMIFS;
7bc570c8
YH
694 return c;
695}
696
697static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
698{
36cbac59 699 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
7bc570c8
YH
700 if (c == NULL)
701 return NULL;
7bc570c8
YH
702 skb_queue_head_init(&c->mfc_un.unres.unresolved);
703 c->mfc_un.unres.expires = jiffies + 10 * HZ;
704 return c;
705}
706
707/*
708 * A cache entry has gone into a resolved state from queued
709 */
710
711static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
712{
713 struct sk_buff *skb;
714
715 /*
716 * Play the pending entries through our router
717 */
718
719 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
720 if (ipv6_hdr(skb)->version == 0) {
721 int err;
722 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
723
724 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
549e028d 725 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
7bc570c8
YH
726 } else {
727 nlh->nlmsg_type = NLMSG_ERROR;
728 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
729 skb_trim(skb, nlh->nlmsg_len);
730 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
731 }
732 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
733 } else
734 ip6_mr_forward(skb, c);
735 }
736}
737
738/*
739 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
740 * expects the following bizarre scheme.
741 *
742 * Called under mrt_lock.
743 */
744
6ac7eb08 745static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
7bc570c8
YH
746{
747 struct sk_buff *skb;
748 struct mrt6msg *msg;
749 int ret;
750
14fb64e1
YH
751#ifdef CONFIG_IPV6_PIMSM_V2
752 if (assert == MRT6MSG_WHOLEPKT)
753 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
754 +sizeof(*msg));
755 else
756#endif
757 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
758
759 if (!skb)
760 return -ENOBUFS;
761
762 /* I suppose that internal messages
763 * do not require checksums */
764
765 skb->ip_summed = CHECKSUM_UNNECESSARY;
766
14fb64e1
YH
767#ifdef CONFIG_IPV6_PIMSM_V2
768 if (assert == MRT6MSG_WHOLEPKT) {
769 /* Ugly, but we have no choice with this interface.
770 Duplicate old header, fix length etc.
771 And all this only to mangle msg->im6_msgtype and
772 to set msg->im6_mbz to "mbz" :-)
773 */
774 skb_push(skb, -skb_network_offset(pkt));
775
776 skb_push(skb, sizeof(*msg));
777 skb_reset_transport_header(skb);
778 msg = (struct mrt6msg *)skb_transport_header(skb);
779 msg->im6_mbz = 0;
780 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
781 msg->im6_mif = reg_vif_num;
782 msg->im6_pad = 0;
783 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
784 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
785
786 skb->ip_summed = CHECKSUM_UNNECESSARY;
787 } else
788#endif
789 {
7bc570c8
YH
790 /*
791 * Copy the IP header
792 */
793
794 skb_put(skb, sizeof(struct ipv6hdr));
795 skb_reset_network_header(skb);
796 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
797
798 /*
799 * Add our header
800 */
801 skb_put(skb, sizeof(*msg));
802 skb_reset_transport_header(skb);
803 msg = (struct mrt6msg *)skb_transport_header(skb);
804
805 msg->im6_mbz = 0;
806 msg->im6_msgtype = assert;
6ac7eb08 807 msg->im6_mif = mifi;
7bc570c8
YH
808 msg->im6_pad = 0;
809 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
810 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
811
812 skb->dst = dst_clone(pkt->dst);
813 skb->ip_summed = CHECKSUM_UNNECESSARY;
814
815 skb_pull(skb, sizeof(struct ipv6hdr));
14fb64e1 816 }
7bc570c8 817
bd91b8bf 818 if (init_net.ipv6.mroute6_sk == NULL) {
7bc570c8
YH
819 kfree_skb(skb);
820 return -EINVAL;
821 }
822
823 /*
824 * Deliver to user space multicast routing algorithms
825 */
bd91b8bf
BT
826 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
827 if (ret < 0) {
7bc570c8
YH
828 if (net_ratelimit())
829 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
830 kfree_skb(skb);
831 }
832
833 return ret;
834}
835
836/*
837 * Queue a packet for resolution. It gets locked cache entry!
838 */
839
840static int
6ac7eb08 841ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
7bc570c8
YH
842{
843 int err;
844 struct mfc6_cache *c;
845
846 spin_lock_bh(&mfc_unres_lock);
847 for (c = mfc_unres_queue; c; c = c->next) {
848 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
849 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
850 break;
851 }
852
853 if (c == NULL) {
854 /*
855 * Create a new entry if allowable
856 */
857
858 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
859 (c = ip6mr_cache_alloc_unres()) == NULL) {
860 spin_unlock_bh(&mfc_unres_lock);
861
862 kfree_skb(skb);
863 return -ENOBUFS;
864 }
865
866 /*
867 * Fill in the new cache entry
868 */
869 c->mf6c_parent = -1;
870 c->mf6c_origin = ipv6_hdr(skb)->saddr;
871 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
872
873 /*
874 * Reflect first query at pim6sd
875 */
6ac7eb08 876 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
7bc570c8
YH
877 /* If the report failed throw the cache entry
878 out - Brad Parker
879 */
880 spin_unlock_bh(&mfc_unres_lock);
881
882 kmem_cache_free(mrt_cachep, c);
883 kfree_skb(skb);
884 return err;
885 }
886
887 atomic_inc(&cache_resolve_queue_len);
888 c->next = mfc_unres_queue;
889 mfc_unres_queue = c;
890
891 ipmr_do_expire_process(1);
892 }
893
894 /*
895 * See if we can append the packet
896 */
897 if (c->mfc_un.unres.unresolved.qlen > 3) {
898 kfree_skb(skb);
899 err = -ENOBUFS;
900 } else {
901 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
902 err = 0;
903 }
904
905 spin_unlock_bh(&mfc_unres_lock);
906 return err;
907}
908
909/*
910 * MFC6 cache manipulation by user space
911 */
912
913static int ip6mr_mfc_delete(struct mf6cctl *mfc)
914{
915 int line;
916 struct mfc6_cache *c, **cp;
917
918 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
919
920 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
921 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
922 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
923 write_lock_bh(&mrt_lock);
924 *cp = c->next;
925 write_unlock_bh(&mrt_lock);
926
927 kmem_cache_free(mrt_cachep, c);
928 return 0;
929 }
930 }
931 return -ENOENT;
932}
933
934static int ip6mr_device_event(struct notifier_block *this,
935 unsigned long event, void *ptr)
936{
937 struct net_device *dev = ptr;
938 struct mif_device *v;
939 int ct;
940
721499e8 941 if (!net_eq(dev_net(dev), &init_net))
7bc570c8
YH
942 return NOTIFY_DONE;
943
944 if (event != NETDEV_UNREGISTER)
945 return NOTIFY_DONE;
946
4e16880c
BT
947 v = &init_net.ipv6.vif6_table[0];
948 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
7bc570c8
YH
949 if (v->dev == dev)
950 mif6_delete(ct);
951 }
952 return NOTIFY_DONE;
953}
954
955static struct notifier_block ip6_mr_notifier = {
956 .notifier_call = ip6mr_device_event
957};
958
959/*
960 * Setup for IP multicast routing
961 */
962
4e16880c
BT
963static int __net_init ip6mr_net_init(struct net *net)
964{
965 int err = 0;
966
967 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
968 GFP_KERNEL);
969 if (!net->ipv6.vif6_table) {
970 err = -ENOMEM;
971 goto fail;
972 }
973fail:
974 return err;
975}
976
977static void __net_exit ip6mr_net_exit(struct net *net)
978{
979 kfree(net->ipv6.vif6_table);
980}
981
982static struct pernet_operations ip6mr_net_ops = {
983 .init = ip6mr_net_init,
984 .exit = ip6mr_net_exit,
985};
986
623d1a1a 987int __init ip6_mr_init(void)
7bc570c8 988{
623d1a1a
WC
989 int err;
990
7bc570c8
YH
991 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
992 sizeof(struct mfc6_cache),
993 0, SLAB_HWCACHE_ALIGN,
994 NULL);
995 if (!mrt_cachep)
623d1a1a 996 return -ENOMEM;
7bc570c8 997
4e16880c
BT
998 err = register_pernet_subsys(&ip6mr_net_ops);
999 if (err)
1000 goto reg_pernet_fail;
1001
7bc570c8 1002 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
623d1a1a
WC
1003 err = register_netdevice_notifier(&ip6_mr_notifier);
1004 if (err)
1005 goto reg_notif_fail;
1006#ifdef CONFIG_PROC_FS
1007 err = -ENOMEM;
1008 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1009 goto proc_vif_fail;
1010 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1011 0, &ip6mr_mfc_fops))
1012 goto proc_cache_fail;
1013#endif
1014 return 0;
7bc570c8 1015#ifdef CONFIG_PROC_FS
623d1a1a
WC
1016proc_cache_fail:
1017 proc_net_remove(&init_net, "ip6_mr_vif");
87b30a65
BT
1018proc_vif_fail:
1019 unregister_netdevice_notifier(&ip6_mr_notifier);
7bc570c8 1020#endif
87b30a65
BT
1021reg_notif_fail:
1022 del_timer(&ipmr_expire_timer);
4e16880c
BT
1023 unregister_pernet_subsys(&ip6mr_net_ops);
1024reg_pernet_fail:
87b30a65 1025 kmem_cache_destroy(mrt_cachep);
623d1a1a 1026 return err;
7bc570c8
YH
1027}
1028
623d1a1a
WC
1029void ip6_mr_cleanup(void)
1030{
1031#ifdef CONFIG_PROC_FS
1032 proc_net_remove(&init_net, "ip6_mr_cache");
1033 proc_net_remove(&init_net, "ip6_mr_vif");
1034#endif
1035 unregister_netdevice_notifier(&ip6_mr_notifier);
1036 del_timer(&ipmr_expire_timer);
4e16880c 1037 unregister_pernet_subsys(&ip6mr_net_ops);
623d1a1a
WC
1038 kmem_cache_destroy(mrt_cachep);
1039}
7bc570c8
YH
1040
1041static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1042{
1043 int line;
1044 struct mfc6_cache *uc, *c, **cp;
6ac7eb08 1045 unsigned char ttls[MAXMIFS];
7bc570c8
YH
1046 int i;
1047
6ac7eb08
RR
1048 memset(ttls, 255, MAXMIFS);
1049 for (i = 0; i < MAXMIFS; i++) {
7bc570c8
YH
1050 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1051 ttls[i] = 1;
1052
1053 }
1054
1055 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1056
1057 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1058 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1059 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1060 break;
1061 }
1062
1063 if (c != NULL) {
1064 write_lock_bh(&mrt_lock);
1065 c->mf6c_parent = mfc->mf6cc_parent;
1066 ip6mr_update_thresholds(c, ttls);
1067 if (!mrtsock)
1068 c->mfc_flags |= MFC_STATIC;
1069 write_unlock_bh(&mrt_lock);
1070 return 0;
1071 }
1072
1073 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1074 return -EINVAL;
1075
1076 c = ip6mr_cache_alloc();
1077 if (c == NULL)
1078 return -ENOMEM;
1079
1080 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1081 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1082 c->mf6c_parent = mfc->mf6cc_parent;
1083 ip6mr_update_thresholds(c, ttls);
1084 if (!mrtsock)
1085 c->mfc_flags |= MFC_STATIC;
1086
1087 write_lock_bh(&mrt_lock);
1088 c->next = mfc6_cache_array[line];
1089 mfc6_cache_array[line] = c;
1090 write_unlock_bh(&mrt_lock);
1091
1092 /*
1093 * Check to see if we resolved a queued list. If so we
1094 * need to send on the frames and tidy up.
1095 */
1096 spin_lock_bh(&mfc_unres_lock);
1097 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1098 cp = &uc->next) {
1099 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1100 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1101 *cp = uc->next;
1102 if (atomic_dec_and_test(&cache_resolve_queue_len))
1103 del_timer(&ipmr_expire_timer);
1104 break;
1105 }
1106 }
1107 spin_unlock_bh(&mfc_unres_lock);
1108
1109 if (uc) {
1110 ip6mr_cache_resolve(uc, c);
1111 kmem_cache_free(mrt_cachep, uc);
1112 }
1113 return 0;
1114}
1115
1116/*
1117 * Close the multicast socket, and clear the vif tables etc
1118 */
1119
1120static void mroute_clean_tables(struct sock *sk)
1121{
1122 int i;
1123
1124 /*
1125 * Shut down all active vif entries
1126 */
4e16880c
BT
1127 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1128 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
7bc570c8
YH
1129 mif6_delete(i);
1130 }
1131
1132 /*
1133 * Wipe the cache
1134 */
1135 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1136 struct mfc6_cache *c, **cp;
1137
1138 cp = &mfc6_cache_array[i];
1139 while ((c = *cp) != NULL) {
1140 if (c->mfc_flags & MFC_STATIC) {
1141 cp = &c->next;
1142 continue;
1143 }
1144 write_lock_bh(&mrt_lock);
1145 *cp = c->next;
1146 write_unlock_bh(&mrt_lock);
1147
1148 kmem_cache_free(mrt_cachep, c);
1149 }
1150 }
1151
1152 if (atomic_read(&cache_resolve_queue_len) != 0) {
1153 struct mfc6_cache *c;
1154
1155 spin_lock_bh(&mfc_unres_lock);
1156 while (mfc_unres_queue != NULL) {
1157 c = mfc_unres_queue;
1158 mfc_unres_queue = c->next;
1159 spin_unlock_bh(&mfc_unres_lock);
1160
1161 ip6mr_destroy_unres(c);
1162
1163 spin_lock_bh(&mfc_unres_lock);
1164 }
1165 spin_unlock_bh(&mfc_unres_lock);
1166 }
1167}
1168
1169static int ip6mr_sk_init(struct sock *sk)
1170{
1171 int err = 0;
1172
1173 rtnl_lock();
1174 write_lock_bh(&mrt_lock);
bd91b8bf
BT
1175 if (likely(init_net.ipv6.mroute6_sk == NULL))
1176 init_net.ipv6.mroute6_sk = sk;
7bc570c8
YH
1177 else
1178 err = -EADDRINUSE;
1179 write_unlock_bh(&mrt_lock);
1180
1181 rtnl_unlock();
1182
1183 return err;
1184}
1185
1186int ip6mr_sk_done(struct sock *sk)
1187{
1188 int err = 0;
1189
1190 rtnl_lock();
bd91b8bf 1191 if (sk == init_net.ipv6.mroute6_sk) {
7bc570c8 1192 write_lock_bh(&mrt_lock);
bd91b8bf 1193 init_net.ipv6.mroute6_sk = NULL;
7bc570c8
YH
1194 write_unlock_bh(&mrt_lock);
1195
1196 mroute_clean_tables(sk);
1197 } else
1198 err = -EACCES;
1199 rtnl_unlock();
1200
1201 return err;
1202}
1203
1204/*
1205 * Socket options and virtual interface manipulation. The whole
1206 * virtual interface system is a complete heap, but unfortunately
1207 * that's how BSD mrouted happens to think. Maybe one day with a proper
1208 * MOSPF/PIM router set up we can clean this up.
1209 */
1210
1211int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1212{
1213 int ret;
1214 struct mif6ctl vif;
1215 struct mf6cctl mfc;
1216 mifi_t mifi;
1217
1218 if (optname != MRT6_INIT) {
bd91b8bf 1219 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
7bc570c8
YH
1220 return -EACCES;
1221 }
1222
1223 switch (optname) {
1224 case MRT6_INIT:
1225 if (sk->sk_type != SOCK_RAW ||
1226 inet_sk(sk)->num != IPPROTO_ICMPV6)
1227 return -EOPNOTSUPP;
1228 if (optlen < sizeof(int))
1229 return -EINVAL;
1230
1231 return ip6mr_sk_init(sk);
1232
1233 case MRT6_DONE:
1234 return ip6mr_sk_done(sk);
1235
1236 case MRT6_ADD_MIF:
1237 if (optlen < sizeof(vif))
1238 return -EINVAL;
1239 if (copy_from_user(&vif, optval, sizeof(vif)))
1240 return -EFAULT;
6ac7eb08 1241 if (vif.mif6c_mifi >= MAXMIFS)
7bc570c8
YH
1242 return -ENFILE;
1243 rtnl_lock();
bd91b8bf 1244 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1245 rtnl_unlock();
1246 return ret;
1247
1248 case MRT6_DEL_MIF:
1249 if (optlen < sizeof(mifi_t))
1250 return -EINVAL;
1251 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1252 return -EFAULT;
1253 rtnl_lock();
1254 ret = mif6_delete(mifi);
1255 rtnl_unlock();
1256 return ret;
1257
1258 /*
1259 * Manipulate the forwarding caches. These live
1260 * in a sort of kernel/user symbiosis.
1261 */
1262 case MRT6_ADD_MFC:
1263 case MRT6_DEL_MFC:
1264 if (optlen < sizeof(mfc))
1265 return -EINVAL;
1266 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1267 return -EFAULT;
1268 rtnl_lock();
1269 if (optname == MRT6_DEL_MFC)
1270 ret = ip6mr_mfc_delete(&mfc);
1271 else
bd91b8bf 1272 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1273 rtnl_unlock();
1274 return ret;
1275
14fb64e1
YH
1276 /*
1277 * Control PIM assert (to activate pim will activate assert)
1278 */
1279 case MRT6_ASSERT:
1280 {
1281 int v;
1282 if (get_user(v, (int __user *)optval))
1283 return -EFAULT;
1284 mroute_do_assert = !!v;
1285 return 0;
1286 }
1287
1288#ifdef CONFIG_IPV6_PIMSM_V2
1289 case MRT6_PIM:
1290 {
a9f83bf3 1291 int v;
14fb64e1
YH
1292 if (get_user(v, (int __user *)optval))
1293 return -EFAULT;
1294 v = !!v;
1295 rtnl_lock();
1296 ret = 0;
1297 if (v != mroute_do_pim) {
1298 mroute_do_pim = v;
1299 mroute_do_assert = v;
1300 if (mroute_do_pim)
1301 ret = inet6_add_protocol(&pim6_protocol,
1302 IPPROTO_PIM);
1303 else
1304 ret = inet6_del_protocol(&pim6_protocol,
1305 IPPROTO_PIM);
1306 if (ret < 0)
1307 ret = -EAGAIN;
1308 }
1309 rtnl_unlock();
1310 return ret;
1311 }
1312
1313#endif
7bc570c8 1314 /*
7d120c55 1315 * Spurious command, or MRT6_VERSION which you cannot
7bc570c8
YH
1316 * set.
1317 */
1318 default:
1319 return -ENOPROTOOPT;
1320 }
1321}
1322
1323/*
1324 * Getsock opt support for the multicast routing system.
1325 */
1326
1327int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1328 int __user *optlen)
1329{
1330 int olr;
1331 int val;
1332
1333 switch (optname) {
1334 case MRT6_VERSION:
1335 val = 0x0305;
1336 break;
14fb64e1
YH
1337#ifdef CONFIG_IPV6_PIMSM_V2
1338 case MRT6_PIM:
1339 val = mroute_do_pim;
1340 break;
1341#endif
1342 case MRT6_ASSERT:
1343 val = mroute_do_assert;
1344 break;
7bc570c8
YH
1345 default:
1346 return -ENOPROTOOPT;
1347 }
1348
1349 if (get_user(olr, optlen))
1350 return -EFAULT;
1351
1352 olr = min_t(int, olr, sizeof(int));
1353 if (olr < 0)
1354 return -EINVAL;
1355
1356 if (put_user(olr, optlen))
1357 return -EFAULT;
1358 if (copy_to_user(optval, &val, olr))
1359 return -EFAULT;
1360 return 0;
1361}
1362
1363/*
1364 * The IP multicast ioctl support routines.
1365 */
1366
1367int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1368{
1369 struct sioc_sg_req6 sr;
1370 struct sioc_mif_req6 vr;
1371 struct mif_device *vif;
1372 struct mfc6_cache *c;
1373
1374 switch (cmd) {
1375 case SIOCGETMIFCNT_IN6:
1376 if (copy_from_user(&vr, arg, sizeof(vr)))
1377 return -EFAULT;
4e16880c 1378 if (vr.mifi >= init_net.ipv6.maxvif)
7bc570c8
YH
1379 return -EINVAL;
1380 read_lock(&mrt_lock);
4e16880c
BT
1381 vif = &init_net.ipv6.vif6_table[vr.mifi];
1382 if (MIF_EXISTS(&init_net, vr.mifi)) {
7bc570c8
YH
1383 vr.icount = vif->pkt_in;
1384 vr.ocount = vif->pkt_out;
1385 vr.ibytes = vif->bytes_in;
1386 vr.obytes = vif->bytes_out;
1387 read_unlock(&mrt_lock);
1388
1389 if (copy_to_user(arg, &vr, sizeof(vr)))
1390 return -EFAULT;
1391 return 0;
1392 }
1393 read_unlock(&mrt_lock);
1394 return -EADDRNOTAVAIL;
1395 case SIOCGETSGCNT_IN6:
1396 if (copy_from_user(&sr, arg, sizeof(sr)))
1397 return -EFAULT;
1398
1399 read_lock(&mrt_lock);
1400 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1401 if (c) {
1402 sr.pktcnt = c->mfc_un.res.pkt;
1403 sr.bytecnt = c->mfc_un.res.bytes;
1404 sr.wrong_if = c->mfc_un.res.wrong_if;
1405 read_unlock(&mrt_lock);
1406
1407 if (copy_to_user(arg, &sr, sizeof(sr)))
1408 return -EFAULT;
1409 return 0;
1410 }
1411 read_unlock(&mrt_lock);
1412 return -EADDRNOTAVAIL;
1413 default:
1414 return -ENOIOCTLCMD;
1415 }
1416}
1417
1418
1419static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1420{
483a47d2
DL
1421 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1422 IPSTATS_MIB_OUTFORWDATAGRAMS);
7bc570c8
YH
1423 return dst_output(skb);
1424}
1425
1426/*
1427 * Processing handlers for ip6mr_forward
1428 */
1429
1430static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1431{
1432 struct ipv6hdr *ipv6h;
4e16880c 1433 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
1434 struct net_device *dev;
1435 struct dst_entry *dst;
1436 struct flowi fl;
1437
1438 if (vif->dev == NULL)
1439 goto out_free;
1440
14fb64e1
YH
1441#ifdef CONFIG_IPV6_PIMSM_V2
1442 if (vif->flags & MIFF_REGISTER) {
1443 vif->pkt_out++;
1444 vif->bytes_out += skb->len;
dc58c78c
PE
1445 vif->dev->stats.tx_bytes += skb->len;
1446 vif->dev->stats.tx_packets++;
14fb64e1
YH
1447 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1448 kfree_skb(skb);
1449 return 0;
1450 }
1451#endif
1452
7bc570c8
YH
1453 ipv6h = ipv6_hdr(skb);
1454
1455 fl = (struct flowi) {
1456 .oif = vif->link,
1457 .nl_u = { .ip6_u =
1458 { .daddr = ipv6h->daddr, }
1459 }
1460 };
1461
1462 dst = ip6_route_output(&init_net, NULL, &fl);
1463 if (!dst)
1464 goto out_free;
1465
1466 dst_release(skb->dst);
1467 skb->dst = dst;
1468
1469 /*
1470 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1471 * not only before forwarding, but after forwarding on all output
1472 * interfaces. It is clear, if mrouter runs a multicasting
1473 * program, it should receive packets not depending to what interface
1474 * program is joined.
1475 * If we will not make it, the program will have to join on all
1476 * interfaces. On the other hand, multihoming host (or router, but
1477 * not mrouter) cannot join to more than one interface - it will
1478 * result in receiving multiple packets.
1479 */
1480 dev = vif->dev;
1481 skb->dev = dev;
1482 vif->pkt_out++;
1483 vif->bytes_out += skb->len;
1484
1485 /* We are about to write */
1486 /* XXX: extension headers? */
1487 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1488 goto out_free;
1489
1490 ipv6h = ipv6_hdr(skb);
1491 ipv6h->hop_limit--;
1492
1493 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1494
1495 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1496 ip6mr_forward2_finish);
1497
1498out_free:
1499 kfree_skb(skb);
1500 return 0;
1501}
1502
1503static int ip6mr_find_vif(struct net_device *dev)
1504{
1505 int ct;
4e16880c
BT
1506 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1507 if (init_net.ipv6.vif6_table[ct].dev == dev)
7bc570c8
YH
1508 break;
1509 }
1510 return ct;
1511}
1512
1513static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1514{
1515 int psend = -1;
1516 int vif, ct;
1517
1518 vif = cache->mf6c_parent;
1519 cache->mfc_un.res.pkt++;
1520 cache->mfc_un.res.bytes += skb->len;
1521
14fb64e1
YH
1522 /*
1523 * Wrong interface: drop packet and (maybe) send PIM assert.
1524 */
4e16880c 1525 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
14fb64e1
YH
1526 int true_vifi;
1527
1528 cache->mfc_un.res.wrong_if++;
1529 true_vifi = ip6mr_find_vif(skb->dev);
1530
1531 if (true_vifi >= 0 && mroute_do_assert &&
1532 /* pimsm uses asserts, when switching from RPT to SPT,
1533 so that we cannot check that packet arrived on an oif.
1534 It is bad, but otherwise we would need to move pretty
1535 large chunk of pimd to kernel. Ough... --ANK
1536 */
1537 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1538 time_after(jiffies,
1539 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1540 cache->mfc_un.res.last_assert = jiffies;
1541 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1542 }
1543 goto dont_forward;
1544 }
1545
4e16880c
BT
1546 init_net.ipv6.vif6_table[vif].pkt_in++;
1547 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
7bc570c8
YH
1548
1549 /*
1550 * Forward the frame
1551 */
1552 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1553 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1554 if (psend != -1) {
1555 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1556 if (skb2)
1557 ip6mr_forward2(skb2, cache, psend);
1558 }
1559 psend = ct;
1560 }
1561 }
1562 if (psend != -1) {
1563 ip6mr_forward2(skb, cache, psend);
1564 return 0;
1565 }
1566
14fb64e1 1567dont_forward:
7bc570c8
YH
1568 kfree_skb(skb);
1569 return 0;
1570}
1571
1572
1573/*
1574 * Multicast packets for forwarding arrive here
1575 */
1576
1577int ip6_mr_input(struct sk_buff *skb)
1578{
1579 struct mfc6_cache *cache;
1580
1581 read_lock(&mrt_lock);
1582 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1583
1584 /*
1585 * No usable cache entry
1586 */
1587 if (cache == NULL) {
1588 int vif;
1589
1590 vif = ip6mr_find_vif(skb->dev);
1591 if (vif >= 0) {
1592 int err = ip6mr_cache_unresolved(vif, skb);
1593 read_unlock(&mrt_lock);
1594
1595 return err;
1596 }
1597 read_unlock(&mrt_lock);
1598 kfree_skb(skb);
1599 return -ENODEV;
1600 }
1601
1602 ip6_mr_forward(skb, cache);
1603
1604 read_unlock(&mrt_lock);
1605
1606 return 0;
1607}
1608
1609
1610static int
1611ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1612{
1613 int ct;
1614 struct rtnexthop *nhp;
4e16880c 1615 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
549e028d 1616 u8 *b = skb_tail_pointer(skb);
7bc570c8
YH
1617 struct rtattr *mp_head;
1618
1619 if (dev)
1620 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1621
1622 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1623
1624 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1625 if (c->mfc_un.res.ttls[ct] < 255) {
1626 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1627 goto rtattr_failure;
1628 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1629 nhp->rtnh_flags = 0;
1630 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4e16880c 1631 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
7bc570c8
YH
1632 nhp->rtnh_len = sizeof(*nhp);
1633 }
1634 }
1635 mp_head->rta_type = RTA_MULTIPATH;
549e028d 1636 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
7bc570c8
YH
1637 rtm->rtm_type = RTN_MULTICAST;
1638 return 1;
1639
1640rtattr_failure:
1641 nlmsg_trim(skb, b);
1642 return -EMSGSIZE;
1643}
1644
1645int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1646{
1647 int err;
1648 struct mfc6_cache *cache;
1649 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1650
1651 read_lock(&mrt_lock);
1652 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1653
1654 if (!cache) {
1655 struct sk_buff *skb2;
1656 struct ipv6hdr *iph;
1657 struct net_device *dev;
1658 int vif;
1659
1660 if (nowait) {
1661 read_unlock(&mrt_lock);
1662 return -EAGAIN;
1663 }
1664
1665 dev = skb->dev;
1666 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1667 read_unlock(&mrt_lock);
1668 return -ENODEV;
1669 }
1670
1671 /* really correct? */
1672 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1673 if (!skb2) {
1674 read_unlock(&mrt_lock);
1675 return -ENOMEM;
1676 }
1677
1678 skb_reset_transport_header(skb2);
1679
1680 skb_put(skb2, sizeof(struct ipv6hdr));
1681 skb_reset_network_header(skb2);
1682
1683 iph = ipv6_hdr(skb2);
1684 iph->version = 0;
1685 iph->priority = 0;
1686 iph->flow_lbl[0] = 0;
1687 iph->flow_lbl[1] = 0;
1688 iph->flow_lbl[2] = 0;
1689 iph->payload_len = 0;
1690 iph->nexthdr = IPPROTO_NONE;
1691 iph->hop_limit = 0;
1692 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1693 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1694
1695 err = ip6mr_cache_unresolved(vif, skb2);
1696 read_unlock(&mrt_lock);
1697
1698 return err;
1699 }
1700
1701 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1702 cache->mfc_flags |= MFC_NOTIFY;
1703
1704 err = ip6mr_fill_mroute(skb, cache, rtm);
1705 read_unlock(&mrt_lock);
1706 return err;
1707}
1708