]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6mr.c
netns: ip6mr: store netns in struct mfc6_cache
[net-next-2.6.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
7bc570c8
YH
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
7bc570c8
YH
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
7bc570c8 35#include <linux/init.h>
7bc570c8
YH
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
7bc570c8 39#include <net/raw.h>
7bc570c8
YH
40#include <linux/notifier.h>
41#include <linux/if_arp.h>
7bc570c8
YH
42#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
14fb64e1 48#include <linux/pim.h>
7bc570c8
YH
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
7bc570c8
YH
52/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
4e16880c 62#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
7bc570c8 63
14fb64e1
YH
64static int mroute_do_assert; /* Set in PIM assert */
65#ifdef CONFIG_IPV6_PIMSM_V2
66static int mroute_do_pim;
67#else
68#define mroute_do_pim 0
69#endif
70
6ac7eb08 71static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
7bc570c8
YH
72
73static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
74static atomic_t cache_resolve_queue_len; /* Size of unresolved */
75
76/* Special spinlock for queue of unresolved entries */
77static DEFINE_SPINLOCK(mfc_unres_lock);
78
79/* We return to original Alan's scheme. Hash table of resolved
80 entries is changed only in process context and protected
81 with weak lock mrt_lock. Queue of unresolved entries is protected
82 with strong spinlock mfc_unres_lock.
83
84 In this case data path is free of exclusive locks at all.
85 */
86
87static struct kmem_cache *mrt_cachep __read_mostly;
88
89static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
6ac7eb08 90static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
7bc570c8
YH
91static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
92
14fb64e1
YH
93#ifdef CONFIG_IPV6_PIMSM_V2
94static struct inet6_protocol pim6_protocol;
95#endif
96
7bc570c8
YH
97static struct timer_list ipmr_expire_timer;
98
99
100#ifdef CONFIG_PROC_FS
101
102struct ipmr_mfc_iter {
103 struct mfc6_cache **cache;
104 int ct;
105};
106
107
108static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
109{
110 struct mfc6_cache *mfc;
111
112 it->cache = mfc6_cache_array;
113 read_lock(&mrt_lock);
114 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
115 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
116 if (pos-- == 0)
117 return mfc;
118 read_unlock(&mrt_lock);
119
120 it->cache = &mfc_unres_queue;
121 spin_lock_bh(&mfc_unres_lock);
122 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
123 if (pos-- == 0)
124 return mfc;
125 spin_unlock_bh(&mfc_unres_lock);
126
127 it->cache = NULL;
128 return NULL;
129}
130
131
132
133
134/*
135 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
136 */
137
138struct ipmr_vif_iter {
139 int ct;
140};
141
142static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
143 loff_t pos)
144{
4e16880c
BT
145 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
146 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8
YH
147 continue;
148 if (pos-- == 0)
4e16880c 149 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
150 }
151 return NULL;
152}
153
154static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
155 __acquires(mrt_lock)
156{
157 read_lock(&mrt_lock);
158 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
159 : SEQ_START_TOKEN);
160}
161
162static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
163{
164 struct ipmr_vif_iter *iter = seq->private;
165
166 ++*pos;
167 if (v == SEQ_START_TOKEN)
168 return ip6mr_vif_seq_idx(iter, 0);
169
4e16880c
BT
170 while (++iter->ct < init_net.ipv6.maxvif) {
171 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8 172 continue;
4e16880c 173 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
174 }
175 return NULL;
176}
177
178static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
179 __releases(mrt_lock)
180{
181 read_unlock(&mrt_lock);
182}
183
184static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
185{
186 if (v == SEQ_START_TOKEN) {
187 seq_puts(seq,
188 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
189 } else {
190 const struct mif_device *vif = v;
191 const char *name = vif->dev ? vif->dev->name : "none";
192
193 seq_printf(seq,
d430a227 194 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
4e16880c 195 vif - init_net.ipv6.vif6_table,
7bc570c8
YH
196 name, vif->bytes_in, vif->pkt_in,
197 vif->bytes_out, vif->pkt_out,
198 vif->flags);
199 }
200 return 0;
201}
202
203static struct seq_operations ip6mr_vif_seq_ops = {
204 .start = ip6mr_vif_seq_start,
205 .next = ip6mr_vif_seq_next,
206 .stop = ip6mr_vif_seq_stop,
207 .show = ip6mr_vif_seq_show,
208};
209
210static int ip6mr_vif_open(struct inode *inode, struct file *file)
211{
212 return seq_open_private(file, &ip6mr_vif_seq_ops,
213 sizeof(struct ipmr_vif_iter));
214}
215
216static struct file_operations ip6mr_vif_fops = {
217 .owner = THIS_MODULE,
218 .open = ip6mr_vif_open,
219 .read = seq_read,
220 .llseek = seq_lseek,
eedd726e 221 .release = seq_release_private,
7bc570c8
YH
222};
223
224static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
225{
226 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
227 : SEQ_START_TOKEN);
228}
229
230static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
231{
232 struct mfc6_cache *mfc = v;
233 struct ipmr_mfc_iter *it = seq->private;
234
235 ++*pos;
236
237 if (v == SEQ_START_TOKEN)
238 return ipmr_mfc_seq_idx(seq->private, 0);
239
240 if (mfc->next)
241 return mfc->next;
242
243 if (it->cache == &mfc_unres_queue)
244 goto end_of_list;
245
246 BUG_ON(it->cache != mfc6_cache_array);
247
248 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
249 mfc = mfc6_cache_array[it->ct];
250 if (mfc)
251 return mfc;
252 }
253
254 /* exhausted cache_array, show unresolved */
255 read_unlock(&mrt_lock);
256 it->cache = &mfc_unres_queue;
257 it->ct = 0;
258
259 spin_lock_bh(&mfc_unres_lock);
260 mfc = mfc_unres_queue;
261 if (mfc)
262 return mfc;
263
264 end_of_list:
265 spin_unlock_bh(&mfc_unres_lock);
266 it->cache = NULL;
267
268 return NULL;
269}
270
271static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
272{
273 struct ipmr_mfc_iter *it = seq->private;
274
275 if (it->cache == &mfc_unres_queue)
276 spin_unlock_bh(&mfc_unres_lock);
277 else if (it->cache == mfc6_cache_array)
278 read_unlock(&mrt_lock);
279}
280
281static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
282{
283 int n;
284
285 if (v == SEQ_START_TOKEN) {
286 seq_puts(seq,
287 "Group "
288 "Origin "
289 "Iif Pkts Bytes Wrong Oifs\n");
290 } else {
291 const struct mfc6_cache *mfc = v;
292 const struct ipmr_mfc_iter *it = seq->private;
293
999890b2 294 seq_printf(seq, "%pI6 %pI6 %-3hd",
0c6ce78a 295 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
1ea472e2 296 mfc->mf6c_parent);
7bc570c8
YH
297
298 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
299 seq_printf(seq, " %8lu %8lu %8lu",
300 mfc->mfc_un.res.pkt,
301 mfc->mfc_un.res.bytes,
302 mfc->mfc_un.res.wrong_if);
7bc570c8
YH
303 for (n = mfc->mfc_un.res.minvif;
304 n < mfc->mfc_un.res.maxvif; n++) {
4e16880c 305 if (MIF_EXISTS(&init_net, n) &&
7bc570c8
YH
306 mfc->mfc_un.res.ttls[n] < 255)
307 seq_printf(seq,
308 " %2d:%-3d",
309 n, mfc->mfc_un.res.ttls[n]);
310 }
1ea472e2
BT
311 } else {
312 /* unresolved mfc_caches don't contain
313 * pkt, bytes and wrong_if values
314 */
315 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
7bc570c8
YH
316 }
317 seq_putc(seq, '\n');
318 }
319 return 0;
320}
321
322static struct seq_operations ipmr_mfc_seq_ops = {
323 .start = ipmr_mfc_seq_start,
324 .next = ipmr_mfc_seq_next,
325 .stop = ipmr_mfc_seq_stop,
326 .show = ipmr_mfc_seq_show,
327};
328
329static int ipmr_mfc_open(struct inode *inode, struct file *file)
330{
331 return seq_open_private(file, &ipmr_mfc_seq_ops,
332 sizeof(struct ipmr_mfc_iter));
333}
334
335static struct file_operations ip6mr_mfc_fops = {
336 .owner = THIS_MODULE,
337 .open = ipmr_mfc_open,
338 .read = seq_read,
339 .llseek = seq_lseek,
eedd726e 340 .release = seq_release_private,
7bc570c8
YH
341};
342#endif
343
14fb64e1
YH
344#ifdef CONFIG_IPV6_PIMSM_V2
345static int reg_vif_num = -1;
346
347static int pim6_rcv(struct sk_buff *skb)
348{
349 struct pimreghdr *pim;
350 struct ipv6hdr *encap;
351 struct net_device *reg_dev = NULL;
352
353 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
354 goto drop;
355
356 pim = (struct pimreghdr *)skb_transport_header(skb);
357 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
358 (pim->flags & PIM_NULL_REGISTER) ||
359 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
ec6b486f 360 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
14fb64e1
YH
361 goto drop;
362
363 /* check if the inner packet is destined to mcast group */
364 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
365 sizeof(*pim));
366
367 if (!ipv6_addr_is_multicast(&encap->daddr) ||
368 encap->payload_len == 0 ||
369 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
370 goto drop;
371
372 read_lock(&mrt_lock);
373 if (reg_vif_num >= 0)
4e16880c 374 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
14fb64e1
YH
375 if (reg_dev)
376 dev_hold(reg_dev);
377 read_unlock(&mrt_lock);
378
379 if (reg_dev == NULL)
380 goto drop;
381
382 skb->mac_header = skb->network_header;
383 skb_pull(skb, (u8 *)encap - skb->data);
384 skb_reset_network_header(skb);
385 skb->dev = reg_dev;
386 skb->protocol = htons(ETH_P_IP);
387 skb->ip_summed = 0;
388 skb->pkt_type = PACKET_HOST;
389 dst_release(skb->dst);
dc58c78c
PE
390 reg_dev->stats.rx_bytes += skb->len;
391 reg_dev->stats.rx_packets++;
14fb64e1
YH
392 skb->dst = NULL;
393 nf_reset(skb);
394 netif_rx(skb);
395 dev_put(reg_dev);
396 return 0;
397 drop:
398 kfree_skb(skb);
399 return 0;
400}
401
402static struct inet6_protocol pim6_protocol = {
403 .handler = pim6_rcv,
404};
405
406/* Service routines creating virtual interfaces: PIMREG */
407
408static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
409{
410 read_lock(&mrt_lock);
dc58c78c
PE
411 dev->stats.tx_bytes += skb->len;
412 dev->stats.tx_packets++;
14fb64e1
YH
413 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
414 read_unlock(&mrt_lock);
415 kfree_skb(skb);
416 return 0;
417}
418
007c3838
SH
419static const struct net_device_ops reg_vif_netdev_ops = {
420 .ndo_start_xmit = reg_vif_xmit,
421};
422
14fb64e1
YH
423static void reg_vif_setup(struct net_device *dev)
424{
425 dev->type = ARPHRD_PIMREG;
426 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
427 dev->flags = IFF_NOARP;
007c3838 428 dev->netdev_ops = &reg_vif_netdev_ops;
14fb64e1
YH
429 dev->destructor = free_netdev;
430}
431
432static struct net_device *ip6mr_reg_vif(void)
433{
434 struct net_device *dev;
14fb64e1 435
dc58c78c 436 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
14fb64e1
YH
437 if (dev == NULL)
438 return NULL;
439
440 if (register_netdevice(dev)) {
441 free_netdev(dev);
442 return NULL;
443 }
444 dev->iflink = 0;
445
14fb64e1
YH
446 if (dev_open(dev))
447 goto failure;
448
7af3db78 449 dev_hold(dev);
14fb64e1
YH
450 return dev;
451
452failure:
453 /* allow the register to be completed before unregistering. */
454 rtnl_unlock();
455 rtnl_lock();
456
457 unregister_netdevice(dev);
458 return NULL;
459}
460#endif
461
7bc570c8
YH
462/*
463 * Delete a VIF entry
464 */
465
466static int mif6_delete(int vifi)
467{
468 struct mif_device *v;
469 struct net_device *dev;
4e16880c 470 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
7bc570c8
YH
471 return -EADDRNOTAVAIL;
472
4e16880c 473 v = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
474
475 write_lock_bh(&mrt_lock);
476 dev = v->dev;
477 v->dev = NULL;
478
479 if (!dev) {
480 write_unlock_bh(&mrt_lock);
481 return -EADDRNOTAVAIL;
482 }
483
14fb64e1
YH
484#ifdef CONFIG_IPV6_PIMSM_V2
485 if (vifi == reg_vif_num)
486 reg_vif_num = -1;
487#endif
488
4e16880c 489 if (vifi + 1 == init_net.ipv6.maxvif) {
7bc570c8
YH
490 int tmp;
491 for (tmp = vifi - 1; tmp >= 0; tmp--) {
4e16880c 492 if (MIF_EXISTS(&init_net, tmp))
7bc570c8
YH
493 break;
494 }
4e16880c 495 init_net.ipv6.maxvif = tmp + 1;
7bc570c8
YH
496 }
497
498 write_unlock_bh(&mrt_lock);
499
500 dev_set_allmulti(dev, -1);
501
502 if (v->flags & MIFF_REGISTER)
503 unregister_netdevice(dev);
504
505 dev_put(dev);
506 return 0;
507}
508
58701ad4
BT
509static inline void ip6mr_cache_free(struct mfc6_cache *c)
510{
511 release_net(mfc6_net(c));
512 kmem_cache_free(mrt_cachep, c);
513}
514
7bc570c8
YH
515/* Destroy an unresolved cache entry, killing queued skbs
516 and reporting error to netlink readers.
517 */
518
519static void ip6mr_destroy_unres(struct mfc6_cache *c)
520{
521 struct sk_buff *skb;
522
523 atomic_dec(&cache_resolve_queue_len);
524
525 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
526 if (ipv6_hdr(skb)->version == 0) {
527 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
528 nlh->nlmsg_type = NLMSG_ERROR;
529 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
530 skb_trim(skb, nlh->nlmsg_len);
531 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
532 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
533 } else
534 kfree_skb(skb);
535 }
536
58701ad4 537 ip6mr_cache_free(c);
7bc570c8
YH
538}
539
540
541/* Single timer process for all the unresolved queue. */
542
543static void ipmr_do_expire_process(unsigned long dummy)
544{
545 unsigned long now = jiffies;
546 unsigned long expires = 10 * HZ;
547 struct mfc6_cache *c, **cp;
548
549 cp = &mfc_unres_queue;
550
551 while ((c = *cp) != NULL) {
552 if (time_after(c->mfc_un.unres.expires, now)) {
553 /* not yet... */
554 unsigned long interval = c->mfc_un.unres.expires - now;
555 if (interval < expires)
556 expires = interval;
557 cp = &c->next;
558 continue;
559 }
560
561 *cp = c->next;
562 ip6mr_destroy_unres(c);
563 }
564
565 if (atomic_read(&cache_resolve_queue_len))
566 mod_timer(&ipmr_expire_timer, jiffies + expires);
567}
568
569static void ipmr_expire_process(unsigned long dummy)
570{
571 if (!spin_trylock(&mfc_unres_lock)) {
572 mod_timer(&ipmr_expire_timer, jiffies + 1);
573 return;
574 }
575
576 if (atomic_read(&cache_resolve_queue_len))
577 ipmr_do_expire_process(dummy);
578
579 spin_unlock(&mfc_unres_lock);
580}
581
582/* Fill oifs list. It is called under write locked mrt_lock. */
583
584static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
585{
586 int vifi;
587
6ac7eb08 588 cache->mfc_un.res.minvif = MAXMIFS;
7bc570c8 589 cache->mfc_un.res.maxvif = 0;
6ac7eb08 590 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
7bc570c8 591
4e16880c
BT
592 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
593 if (MIF_EXISTS(&init_net, vifi) &&
594 ttls[vifi] && ttls[vifi] < 255) {
7bc570c8
YH
595 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
596 if (cache->mfc_un.res.minvif > vifi)
597 cache->mfc_un.res.minvif = vifi;
598 if (cache->mfc_un.res.maxvif <= vifi)
599 cache->mfc_un.res.maxvif = vifi + 1;
600 }
601 }
602}
603
604static int mif6_add(struct mif6ctl *vifc, int mrtsock)
605{
606 int vifi = vifc->mif6c_mifi;
4e16880c 607 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
7bc570c8 608 struct net_device *dev;
5ae7b444 609 int err;
7bc570c8
YH
610
611 /* Is vif busy ? */
4e16880c 612 if (MIF_EXISTS(&init_net, vifi))
7bc570c8
YH
613 return -EADDRINUSE;
614
615 switch (vifc->mif6c_flags) {
14fb64e1
YH
616#ifdef CONFIG_IPV6_PIMSM_V2
617 case MIFF_REGISTER:
618 /*
619 * Special Purpose VIF in PIM
620 * All the packets will be sent to the daemon
621 */
622 if (reg_vif_num >= 0)
623 return -EADDRINUSE;
624 dev = ip6mr_reg_vif();
625 if (!dev)
626 return -ENOBUFS;
5ae7b444
WC
627 err = dev_set_allmulti(dev, 1);
628 if (err) {
629 unregister_netdevice(dev);
7af3db78 630 dev_put(dev);
5ae7b444
WC
631 return err;
632 }
14fb64e1
YH
633 break;
634#endif
7bc570c8
YH
635 case 0:
636 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
637 if (!dev)
638 return -EADDRNOTAVAIL;
5ae7b444 639 err = dev_set_allmulti(dev, 1);
7af3db78
WC
640 if (err) {
641 dev_put(dev);
5ae7b444 642 return err;
7af3db78 643 }
7bc570c8
YH
644 break;
645 default:
646 return -EINVAL;
647 }
648
7bc570c8
YH
649 /*
650 * Fill in the VIF structures
651 */
652 v->rate_limit = vifc->vifc_rate_limit;
653 v->flags = vifc->mif6c_flags;
654 if (!mrtsock)
655 v->flags |= VIFF_STATIC;
656 v->threshold = vifc->vifc_threshold;
657 v->bytes_in = 0;
658 v->bytes_out = 0;
659 v->pkt_in = 0;
660 v->pkt_out = 0;
661 v->link = dev->ifindex;
662 if (v->flags & MIFF_REGISTER)
663 v->link = dev->iflink;
664
665 /* And finish update writing critical data */
666 write_lock_bh(&mrt_lock);
7bc570c8 667 v->dev = dev;
14fb64e1
YH
668#ifdef CONFIG_IPV6_PIMSM_V2
669 if (v->flags & MIFF_REGISTER)
670 reg_vif_num = vifi;
671#endif
4e16880c
BT
672 if (vifi + 1 > init_net.ipv6.maxvif)
673 init_net.ipv6.maxvif = vifi + 1;
7bc570c8
YH
674 write_unlock_bh(&mrt_lock);
675 return 0;
676}
677
678static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
679{
680 int line = MFC6_HASH(mcastgrp, origin);
681 struct mfc6_cache *c;
682
683 for (c = mfc6_cache_array[line]; c; c = c->next) {
684 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
685 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
686 break;
687 }
688 return c;
689}
690
691/*
692 * Allocate a multicast cache entry
693 */
58701ad4 694static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
7bc570c8 695{
36cbac59 696 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
7bc570c8
YH
697 if (c == NULL)
698 return NULL;
6ac7eb08 699 c->mfc_un.res.minvif = MAXMIFS;
58701ad4 700 mfc6_net_set(c, net);
7bc570c8
YH
701 return c;
702}
703
58701ad4 704static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
7bc570c8 705{
36cbac59 706 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
7bc570c8
YH
707 if (c == NULL)
708 return NULL;
7bc570c8
YH
709 skb_queue_head_init(&c->mfc_un.unres.unresolved);
710 c->mfc_un.unres.expires = jiffies + 10 * HZ;
58701ad4 711 mfc6_net_set(c, net);
7bc570c8
YH
712 return c;
713}
714
715/*
716 * A cache entry has gone into a resolved state from queued
717 */
718
719static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
720{
721 struct sk_buff *skb;
722
723 /*
724 * Play the pending entries through our router
725 */
726
727 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
728 if (ipv6_hdr(skb)->version == 0) {
729 int err;
730 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
731
732 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
549e028d 733 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
7bc570c8
YH
734 } else {
735 nlh->nlmsg_type = NLMSG_ERROR;
736 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
737 skb_trim(skb, nlh->nlmsg_len);
738 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
739 }
740 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
741 } else
742 ip6_mr_forward(skb, c);
743 }
744}
745
746/*
747 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
748 * expects the following bizarre scheme.
749 *
750 * Called under mrt_lock.
751 */
752
6ac7eb08 753static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
7bc570c8
YH
754{
755 struct sk_buff *skb;
756 struct mrt6msg *msg;
757 int ret;
758
14fb64e1
YH
759#ifdef CONFIG_IPV6_PIMSM_V2
760 if (assert == MRT6MSG_WHOLEPKT)
761 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
762 +sizeof(*msg));
763 else
764#endif
765 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
766
767 if (!skb)
768 return -ENOBUFS;
769
770 /* I suppose that internal messages
771 * do not require checksums */
772
773 skb->ip_summed = CHECKSUM_UNNECESSARY;
774
14fb64e1
YH
775#ifdef CONFIG_IPV6_PIMSM_V2
776 if (assert == MRT6MSG_WHOLEPKT) {
777 /* Ugly, but we have no choice with this interface.
778 Duplicate old header, fix length etc.
779 And all this only to mangle msg->im6_msgtype and
780 to set msg->im6_mbz to "mbz" :-)
781 */
782 skb_push(skb, -skb_network_offset(pkt));
783
784 skb_push(skb, sizeof(*msg));
785 skb_reset_transport_header(skb);
786 msg = (struct mrt6msg *)skb_transport_header(skb);
787 msg->im6_mbz = 0;
788 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
789 msg->im6_mif = reg_vif_num;
790 msg->im6_pad = 0;
791 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
792 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
793
794 skb->ip_summed = CHECKSUM_UNNECESSARY;
795 } else
796#endif
797 {
7bc570c8
YH
798 /*
799 * Copy the IP header
800 */
801
802 skb_put(skb, sizeof(struct ipv6hdr));
803 skb_reset_network_header(skb);
804 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
805
806 /*
807 * Add our header
808 */
809 skb_put(skb, sizeof(*msg));
810 skb_reset_transport_header(skb);
811 msg = (struct mrt6msg *)skb_transport_header(skb);
812
813 msg->im6_mbz = 0;
814 msg->im6_msgtype = assert;
6ac7eb08 815 msg->im6_mif = mifi;
7bc570c8
YH
816 msg->im6_pad = 0;
817 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
818 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
819
820 skb->dst = dst_clone(pkt->dst);
821 skb->ip_summed = CHECKSUM_UNNECESSARY;
822
823 skb_pull(skb, sizeof(struct ipv6hdr));
14fb64e1 824 }
7bc570c8 825
bd91b8bf 826 if (init_net.ipv6.mroute6_sk == NULL) {
7bc570c8
YH
827 kfree_skb(skb);
828 return -EINVAL;
829 }
830
831 /*
832 * Deliver to user space multicast routing algorithms
833 */
bd91b8bf
BT
834 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
835 if (ret < 0) {
7bc570c8
YH
836 if (net_ratelimit())
837 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
838 kfree_skb(skb);
839 }
840
841 return ret;
842}
843
844/*
845 * Queue a packet for resolution. It gets locked cache entry!
846 */
847
848static int
6ac7eb08 849ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
7bc570c8
YH
850{
851 int err;
852 struct mfc6_cache *c;
853
854 spin_lock_bh(&mfc_unres_lock);
855 for (c = mfc_unres_queue; c; c = c->next) {
856 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
857 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
858 break;
859 }
860
861 if (c == NULL) {
862 /*
863 * Create a new entry if allowable
864 */
865
866 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
58701ad4 867 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
7bc570c8
YH
868 spin_unlock_bh(&mfc_unres_lock);
869
870 kfree_skb(skb);
871 return -ENOBUFS;
872 }
873
874 /*
875 * Fill in the new cache entry
876 */
877 c->mf6c_parent = -1;
878 c->mf6c_origin = ipv6_hdr(skb)->saddr;
879 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
880
881 /*
882 * Reflect first query at pim6sd
883 */
6ac7eb08 884 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
7bc570c8
YH
885 /* If the report failed throw the cache entry
886 out - Brad Parker
887 */
888 spin_unlock_bh(&mfc_unres_lock);
889
58701ad4 890 ip6mr_cache_free(c);
7bc570c8
YH
891 kfree_skb(skb);
892 return err;
893 }
894
895 atomic_inc(&cache_resolve_queue_len);
896 c->next = mfc_unres_queue;
897 mfc_unres_queue = c;
898
899 ipmr_do_expire_process(1);
900 }
901
902 /*
903 * See if we can append the packet
904 */
905 if (c->mfc_un.unres.unresolved.qlen > 3) {
906 kfree_skb(skb);
907 err = -ENOBUFS;
908 } else {
909 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
910 err = 0;
911 }
912
913 spin_unlock_bh(&mfc_unres_lock);
914 return err;
915}
916
917/*
918 * MFC6 cache manipulation by user space
919 */
920
921static int ip6mr_mfc_delete(struct mf6cctl *mfc)
922{
923 int line;
924 struct mfc6_cache *c, **cp;
925
926 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
927
928 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
929 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
930 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
931 write_lock_bh(&mrt_lock);
932 *cp = c->next;
933 write_unlock_bh(&mrt_lock);
934
58701ad4 935 ip6mr_cache_free(c);
7bc570c8
YH
936 return 0;
937 }
938 }
939 return -ENOENT;
940}
941
942static int ip6mr_device_event(struct notifier_block *this,
943 unsigned long event, void *ptr)
944{
945 struct net_device *dev = ptr;
946 struct mif_device *v;
947 int ct;
948
721499e8 949 if (!net_eq(dev_net(dev), &init_net))
7bc570c8
YH
950 return NOTIFY_DONE;
951
952 if (event != NETDEV_UNREGISTER)
953 return NOTIFY_DONE;
954
4e16880c
BT
955 v = &init_net.ipv6.vif6_table[0];
956 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
7bc570c8
YH
957 if (v->dev == dev)
958 mif6_delete(ct);
959 }
960 return NOTIFY_DONE;
961}
962
963static struct notifier_block ip6_mr_notifier = {
964 .notifier_call = ip6mr_device_event
965};
966
967/*
968 * Setup for IP multicast routing
969 */
970
4e16880c
BT
971static int __net_init ip6mr_net_init(struct net *net)
972{
973 int err = 0;
974
975 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
976 GFP_KERNEL);
977 if (!net->ipv6.vif6_table) {
978 err = -ENOMEM;
979 goto fail;
980 }
981fail:
982 return err;
983}
984
985static void __net_exit ip6mr_net_exit(struct net *net)
986{
987 kfree(net->ipv6.vif6_table);
988}
989
990static struct pernet_operations ip6mr_net_ops = {
991 .init = ip6mr_net_init,
992 .exit = ip6mr_net_exit,
993};
994
623d1a1a 995int __init ip6_mr_init(void)
7bc570c8 996{
623d1a1a
WC
997 int err;
998
7bc570c8
YH
999 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1000 sizeof(struct mfc6_cache),
1001 0, SLAB_HWCACHE_ALIGN,
1002 NULL);
1003 if (!mrt_cachep)
623d1a1a 1004 return -ENOMEM;
7bc570c8 1005
4e16880c
BT
1006 err = register_pernet_subsys(&ip6mr_net_ops);
1007 if (err)
1008 goto reg_pernet_fail;
1009
7bc570c8 1010 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
623d1a1a
WC
1011 err = register_netdevice_notifier(&ip6_mr_notifier);
1012 if (err)
1013 goto reg_notif_fail;
1014#ifdef CONFIG_PROC_FS
1015 err = -ENOMEM;
1016 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1017 goto proc_vif_fail;
1018 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1019 0, &ip6mr_mfc_fops))
1020 goto proc_cache_fail;
1021#endif
1022 return 0;
7bc570c8 1023#ifdef CONFIG_PROC_FS
623d1a1a
WC
1024proc_cache_fail:
1025 proc_net_remove(&init_net, "ip6_mr_vif");
87b30a65
BT
1026proc_vif_fail:
1027 unregister_netdevice_notifier(&ip6_mr_notifier);
7bc570c8 1028#endif
87b30a65
BT
1029reg_notif_fail:
1030 del_timer(&ipmr_expire_timer);
4e16880c
BT
1031 unregister_pernet_subsys(&ip6mr_net_ops);
1032reg_pernet_fail:
87b30a65 1033 kmem_cache_destroy(mrt_cachep);
623d1a1a 1034 return err;
7bc570c8
YH
1035}
1036
623d1a1a
WC
1037void ip6_mr_cleanup(void)
1038{
1039#ifdef CONFIG_PROC_FS
1040 proc_net_remove(&init_net, "ip6_mr_cache");
1041 proc_net_remove(&init_net, "ip6_mr_vif");
1042#endif
1043 unregister_netdevice_notifier(&ip6_mr_notifier);
1044 del_timer(&ipmr_expire_timer);
4e16880c 1045 unregister_pernet_subsys(&ip6mr_net_ops);
623d1a1a
WC
1046 kmem_cache_destroy(mrt_cachep);
1047}
7bc570c8
YH
1048
1049static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1050{
1051 int line;
1052 struct mfc6_cache *uc, *c, **cp;
6ac7eb08 1053 unsigned char ttls[MAXMIFS];
7bc570c8
YH
1054 int i;
1055
6ac7eb08
RR
1056 memset(ttls, 255, MAXMIFS);
1057 for (i = 0; i < MAXMIFS; i++) {
7bc570c8
YH
1058 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1059 ttls[i] = 1;
1060
1061 }
1062
1063 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1064
1065 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1066 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1067 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1068 break;
1069 }
1070
1071 if (c != NULL) {
1072 write_lock_bh(&mrt_lock);
1073 c->mf6c_parent = mfc->mf6cc_parent;
1074 ip6mr_update_thresholds(c, ttls);
1075 if (!mrtsock)
1076 c->mfc_flags |= MFC_STATIC;
1077 write_unlock_bh(&mrt_lock);
1078 return 0;
1079 }
1080
1081 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1082 return -EINVAL;
1083
58701ad4 1084 c = ip6mr_cache_alloc(&init_net);
7bc570c8
YH
1085 if (c == NULL)
1086 return -ENOMEM;
1087
1088 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1089 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1090 c->mf6c_parent = mfc->mf6cc_parent;
1091 ip6mr_update_thresholds(c, ttls);
1092 if (!mrtsock)
1093 c->mfc_flags |= MFC_STATIC;
1094
1095 write_lock_bh(&mrt_lock);
1096 c->next = mfc6_cache_array[line];
1097 mfc6_cache_array[line] = c;
1098 write_unlock_bh(&mrt_lock);
1099
1100 /*
1101 * Check to see if we resolved a queued list. If so we
1102 * need to send on the frames and tidy up.
1103 */
1104 spin_lock_bh(&mfc_unres_lock);
1105 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1106 cp = &uc->next) {
1107 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1108 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1109 *cp = uc->next;
1110 if (atomic_dec_and_test(&cache_resolve_queue_len))
1111 del_timer(&ipmr_expire_timer);
1112 break;
1113 }
1114 }
1115 spin_unlock_bh(&mfc_unres_lock);
1116
1117 if (uc) {
1118 ip6mr_cache_resolve(uc, c);
58701ad4 1119 ip6mr_cache_free(uc);
7bc570c8
YH
1120 }
1121 return 0;
1122}
1123
1124/*
1125 * Close the multicast socket, and clear the vif tables etc
1126 */
1127
1128static void mroute_clean_tables(struct sock *sk)
1129{
1130 int i;
1131
1132 /*
1133 * Shut down all active vif entries
1134 */
4e16880c
BT
1135 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1136 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
7bc570c8
YH
1137 mif6_delete(i);
1138 }
1139
1140 /*
1141 * Wipe the cache
1142 */
1143 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1144 struct mfc6_cache *c, **cp;
1145
1146 cp = &mfc6_cache_array[i];
1147 while ((c = *cp) != NULL) {
1148 if (c->mfc_flags & MFC_STATIC) {
1149 cp = &c->next;
1150 continue;
1151 }
1152 write_lock_bh(&mrt_lock);
1153 *cp = c->next;
1154 write_unlock_bh(&mrt_lock);
1155
58701ad4 1156 ip6mr_cache_free(c);
7bc570c8
YH
1157 }
1158 }
1159
1160 if (atomic_read(&cache_resolve_queue_len) != 0) {
1161 struct mfc6_cache *c;
1162
1163 spin_lock_bh(&mfc_unres_lock);
1164 while (mfc_unres_queue != NULL) {
1165 c = mfc_unres_queue;
1166 mfc_unres_queue = c->next;
1167 spin_unlock_bh(&mfc_unres_lock);
1168
1169 ip6mr_destroy_unres(c);
1170
1171 spin_lock_bh(&mfc_unres_lock);
1172 }
1173 spin_unlock_bh(&mfc_unres_lock);
1174 }
1175}
1176
1177static int ip6mr_sk_init(struct sock *sk)
1178{
1179 int err = 0;
1180
1181 rtnl_lock();
1182 write_lock_bh(&mrt_lock);
bd91b8bf
BT
1183 if (likely(init_net.ipv6.mroute6_sk == NULL))
1184 init_net.ipv6.mroute6_sk = sk;
7bc570c8
YH
1185 else
1186 err = -EADDRINUSE;
1187 write_unlock_bh(&mrt_lock);
1188
1189 rtnl_unlock();
1190
1191 return err;
1192}
1193
1194int ip6mr_sk_done(struct sock *sk)
1195{
1196 int err = 0;
1197
1198 rtnl_lock();
bd91b8bf 1199 if (sk == init_net.ipv6.mroute6_sk) {
7bc570c8 1200 write_lock_bh(&mrt_lock);
bd91b8bf 1201 init_net.ipv6.mroute6_sk = NULL;
7bc570c8
YH
1202 write_unlock_bh(&mrt_lock);
1203
1204 mroute_clean_tables(sk);
1205 } else
1206 err = -EACCES;
1207 rtnl_unlock();
1208
1209 return err;
1210}
1211
1212/*
1213 * Socket options and virtual interface manipulation. The whole
1214 * virtual interface system is a complete heap, but unfortunately
1215 * that's how BSD mrouted happens to think. Maybe one day with a proper
1216 * MOSPF/PIM router set up we can clean this up.
1217 */
1218
1219int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1220{
1221 int ret;
1222 struct mif6ctl vif;
1223 struct mf6cctl mfc;
1224 mifi_t mifi;
1225
1226 if (optname != MRT6_INIT) {
bd91b8bf 1227 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
7bc570c8
YH
1228 return -EACCES;
1229 }
1230
1231 switch (optname) {
1232 case MRT6_INIT:
1233 if (sk->sk_type != SOCK_RAW ||
1234 inet_sk(sk)->num != IPPROTO_ICMPV6)
1235 return -EOPNOTSUPP;
1236 if (optlen < sizeof(int))
1237 return -EINVAL;
1238
1239 return ip6mr_sk_init(sk);
1240
1241 case MRT6_DONE:
1242 return ip6mr_sk_done(sk);
1243
1244 case MRT6_ADD_MIF:
1245 if (optlen < sizeof(vif))
1246 return -EINVAL;
1247 if (copy_from_user(&vif, optval, sizeof(vif)))
1248 return -EFAULT;
6ac7eb08 1249 if (vif.mif6c_mifi >= MAXMIFS)
7bc570c8
YH
1250 return -ENFILE;
1251 rtnl_lock();
bd91b8bf 1252 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1253 rtnl_unlock();
1254 return ret;
1255
1256 case MRT6_DEL_MIF:
1257 if (optlen < sizeof(mifi_t))
1258 return -EINVAL;
1259 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1260 return -EFAULT;
1261 rtnl_lock();
1262 ret = mif6_delete(mifi);
1263 rtnl_unlock();
1264 return ret;
1265
1266 /*
1267 * Manipulate the forwarding caches. These live
1268 * in a sort of kernel/user symbiosis.
1269 */
1270 case MRT6_ADD_MFC:
1271 case MRT6_DEL_MFC:
1272 if (optlen < sizeof(mfc))
1273 return -EINVAL;
1274 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1275 return -EFAULT;
1276 rtnl_lock();
1277 if (optname == MRT6_DEL_MFC)
1278 ret = ip6mr_mfc_delete(&mfc);
1279 else
bd91b8bf 1280 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1281 rtnl_unlock();
1282 return ret;
1283
14fb64e1
YH
1284 /*
1285 * Control PIM assert (to activate pim will activate assert)
1286 */
1287 case MRT6_ASSERT:
1288 {
1289 int v;
1290 if (get_user(v, (int __user *)optval))
1291 return -EFAULT;
1292 mroute_do_assert = !!v;
1293 return 0;
1294 }
1295
1296#ifdef CONFIG_IPV6_PIMSM_V2
1297 case MRT6_PIM:
1298 {
a9f83bf3 1299 int v;
14fb64e1
YH
1300 if (get_user(v, (int __user *)optval))
1301 return -EFAULT;
1302 v = !!v;
1303 rtnl_lock();
1304 ret = 0;
1305 if (v != mroute_do_pim) {
1306 mroute_do_pim = v;
1307 mroute_do_assert = v;
1308 if (mroute_do_pim)
1309 ret = inet6_add_protocol(&pim6_protocol,
1310 IPPROTO_PIM);
1311 else
1312 ret = inet6_del_protocol(&pim6_protocol,
1313 IPPROTO_PIM);
1314 if (ret < 0)
1315 ret = -EAGAIN;
1316 }
1317 rtnl_unlock();
1318 return ret;
1319 }
1320
1321#endif
7bc570c8 1322 /*
7d120c55 1323 * Spurious command, or MRT6_VERSION which you cannot
7bc570c8
YH
1324 * set.
1325 */
1326 default:
1327 return -ENOPROTOOPT;
1328 }
1329}
1330
1331/*
1332 * Getsock opt support for the multicast routing system.
1333 */
1334
1335int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1336 int __user *optlen)
1337{
1338 int olr;
1339 int val;
1340
1341 switch (optname) {
1342 case MRT6_VERSION:
1343 val = 0x0305;
1344 break;
14fb64e1
YH
1345#ifdef CONFIG_IPV6_PIMSM_V2
1346 case MRT6_PIM:
1347 val = mroute_do_pim;
1348 break;
1349#endif
1350 case MRT6_ASSERT:
1351 val = mroute_do_assert;
1352 break;
7bc570c8
YH
1353 default:
1354 return -ENOPROTOOPT;
1355 }
1356
1357 if (get_user(olr, optlen))
1358 return -EFAULT;
1359
1360 olr = min_t(int, olr, sizeof(int));
1361 if (olr < 0)
1362 return -EINVAL;
1363
1364 if (put_user(olr, optlen))
1365 return -EFAULT;
1366 if (copy_to_user(optval, &val, olr))
1367 return -EFAULT;
1368 return 0;
1369}
1370
1371/*
1372 * The IP multicast ioctl support routines.
1373 */
1374
1375int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1376{
1377 struct sioc_sg_req6 sr;
1378 struct sioc_mif_req6 vr;
1379 struct mif_device *vif;
1380 struct mfc6_cache *c;
1381
1382 switch (cmd) {
1383 case SIOCGETMIFCNT_IN6:
1384 if (copy_from_user(&vr, arg, sizeof(vr)))
1385 return -EFAULT;
4e16880c 1386 if (vr.mifi >= init_net.ipv6.maxvif)
7bc570c8
YH
1387 return -EINVAL;
1388 read_lock(&mrt_lock);
4e16880c
BT
1389 vif = &init_net.ipv6.vif6_table[vr.mifi];
1390 if (MIF_EXISTS(&init_net, vr.mifi)) {
7bc570c8
YH
1391 vr.icount = vif->pkt_in;
1392 vr.ocount = vif->pkt_out;
1393 vr.ibytes = vif->bytes_in;
1394 vr.obytes = vif->bytes_out;
1395 read_unlock(&mrt_lock);
1396
1397 if (copy_to_user(arg, &vr, sizeof(vr)))
1398 return -EFAULT;
1399 return 0;
1400 }
1401 read_unlock(&mrt_lock);
1402 return -EADDRNOTAVAIL;
1403 case SIOCGETSGCNT_IN6:
1404 if (copy_from_user(&sr, arg, sizeof(sr)))
1405 return -EFAULT;
1406
1407 read_lock(&mrt_lock);
1408 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1409 if (c) {
1410 sr.pktcnt = c->mfc_un.res.pkt;
1411 sr.bytecnt = c->mfc_un.res.bytes;
1412 sr.wrong_if = c->mfc_un.res.wrong_if;
1413 read_unlock(&mrt_lock);
1414
1415 if (copy_to_user(arg, &sr, sizeof(sr)))
1416 return -EFAULT;
1417 return 0;
1418 }
1419 read_unlock(&mrt_lock);
1420 return -EADDRNOTAVAIL;
1421 default:
1422 return -ENOIOCTLCMD;
1423 }
1424}
1425
1426
1427static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1428{
483a47d2
DL
1429 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1430 IPSTATS_MIB_OUTFORWDATAGRAMS);
7bc570c8
YH
1431 return dst_output(skb);
1432}
1433
1434/*
1435 * Processing handlers for ip6mr_forward
1436 */
1437
1438static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1439{
1440 struct ipv6hdr *ipv6h;
4e16880c 1441 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
1442 struct net_device *dev;
1443 struct dst_entry *dst;
1444 struct flowi fl;
1445
1446 if (vif->dev == NULL)
1447 goto out_free;
1448
14fb64e1
YH
1449#ifdef CONFIG_IPV6_PIMSM_V2
1450 if (vif->flags & MIFF_REGISTER) {
1451 vif->pkt_out++;
1452 vif->bytes_out += skb->len;
dc58c78c
PE
1453 vif->dev->stats.tx_bytes += skb->len;
1454 vif->dev->stats.tx_packets++;
14fb64e1
YH
1455 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1456 kfree_skb(skb);
1457 return 0;
1458 }
1459#endif
1460
7bc570c8
YH
1461 ipv6h = ipv6_hdr(skb);
1462
1463 fl = (struct flowi) {
1464 .oif = vif->link,
1465 .nl_u = { .ip6_u =
1466 { .daddr = ipv6h->daddr, }
1467 }
1468 };
1469
1470 dst = ip6_route_output(&init_net, NULL, &fl);
1471 if (!dst)
1472 goto out_free;
1473
1474 dst_release(skb->dst);
1475 skb->dst = dst;
1476
1477 /*
1478 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1479 * not only before forwarding, but after forwarding on all output
1480 * interfaces. It is clear, if mrouter runs a multicasting
1481 * program, it should receive packets not depending to what interface
1482 * program is joined.
1483 * If we will not make it, the program will have to join on all
1484 * interfaces. On the other hand, multihoming host (or router, but
1485 * not mrouter) cannot join to more than one interface - it will
1486 * result in receiving multiple packets.
1487 */
1488 dev = vif->dev;
1489 skb->dev = dev;
1490 vif->pkt_out++;
1491 vif->bytes_out += skb->len;
1492
1493 /* We are about to write */
1494 /* XXX: extension headers? */
1495 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1496 goto out_free;
1497
1498 ipv6h = ipv6_hdr(skb);
1499 ipv6h->hop_limit--;
1500
1501 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1502
1503 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1504 ip6mr_forward2_finish);
1505
1506out_free:
1507 kfree_skb(skb);
1508 return 0;
1509}
1510
1511static int ip6mr_find_vif(struct net_device *dev)
1512{
1513 int ct;
4e16880c
BT
1514 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1515 if (init_net.ipv6.vif6_table[ct].dev == dev)
7bc570c8
YH
1516 break;
1517 }
1518 return ct;
1519}
1520
1521static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1522{
1523 int psend = -1;
1524 int vif, ct;
1525
1526 vif = cache->mf6c_parent;
1527 cache->mfc_un.res.pkt++;
1528 cache->mfc_un.res.bytes += skb->len;
1529
14fb64e1
YH
1530 /*
1531 * Wrong interface: drop packet and (maybe) send PIM assert.
1532 */
4e16880c 1533 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
14fb64e1
YH
1534 int true_vifi;
1535
1536 cache->mfc_un.res.wrong_if++;
1537 true_vifi = ip6mr_find_vif(skb->dev);
1538
1539 if (true_vifi >= 0 && mroute_do_assert &&
1540 /* pimsm uses asserts, when switching from RPT to SPT,
1541 so that we cannot check that packet arrived on an oif.
1542 It is bad, but otherwise we would need to move pretty
1543 large chunk of pimd to kernel. Ough... --ANK
1544 */
1545 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1546 time_after(jiffies,
1547 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1548 cache->mfc_un.res.last_assert = jiffies;
1549 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1550 }
1551 goto dont_forward;
1552 }
1553
4e16880c
BT
1554 init_net.ipv6.vif6_table[vif].pkt_in++;
1555 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
7bc570c8
YH
1556
1557 /*
1558 * Forward the frame
1559 */
1560 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1561 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1562 if (psend != -1) {
1563 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1564 if (skb2)
1565 ip6mr_forward2(skb2, cache, psend);
1566 }
1567 psend = ct;
1568 }
1569 }
1570 if (psend != -1) {
1571 ip6mr_forward2(skb, cache, psend);
1572 return 0;
1573 }
1574
14fb64e1 1575dont_forward:
7bc570c8
YH
1576 kfree_skb(skb);
1577 return 0;
1578}
1579
1580
1581/*
1582 * Multicast packets for forwarding arrive here
1583 */
1584
1585int ip6_mr_input(struct sk_buff *skb)
1586{
1587 struct mfc6_cache *cache;
1588
1589 read_lock(&mrt_lock);
1590 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1591
1592 /*
1593 * No usable cache entry
1594 */
1595 if (cache == NULL) {
1596 int vif;
1597
1598 vif = ip6mr_find_vif(skb->dev);
1599 if (vif >= 0) {
1600 int err = ip6mr_cache_unresolved(vif, skb);
1601 read_unlock(&mrt_lock);
1602
1603 return err;
1604 }
1605 read_unlock(&mrt_lock);
1606 kfree_skb(skb);
1607 return -ENODEV;
1608 }
1609
1610 ip6_mr_forward(skb, cache);
1611
1612 read_unlock(&mrt_lock);
1613
1614 return 0;
1615}
1616
1617
1618static int
1619ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1620{
1621 int ct;
1622 struct rtnexthop *nhp;
4e16880c 1623 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
549e028d 1624 u8 *b = skb_tail_pointer(skb);
7bc570c8
YH
1625 struct rtattr *mp_head;
1626
1627 if (dev)
1628 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1629
1630 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1631
1632 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1633 if (c->mfc_un.res.ttls[ct] < 255) {
1634 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1635 goto rtattr_failure;
1636 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1637 nhp->rtnh_flags = 0;
1638 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4e16880c 1639 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
7bc570c8
YH
1640 nhp->rtnh_len = sizeof(*nhp);
1641 }
1642 }
1643 mp_head->rta_type = RTA_MULTIPATH;
549e028d 1644 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
7bc570c8
YH
1645 rtm->rtm_type = RTN_MULTICAST;
1646 return 1;
1647
1648rtattr_failure:
1649 nlmsg_trim(skb, b);
1650 return -EMSGSIZE;
1651}
1652
1653int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1654{
1655 int err;
1656 struct mfc6_cache *cache;
1657 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1658
1659 read_lock(&mrt_lock);
1660 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1661
1662 if (!cache) {
1663 struct sk_buff *skb2;
1664 struct ipv6hdr *iph;
1665 struct net_device *dev;
1666 int vif;
1667
1668 if (nowait) {
1669 read_unlock(&mrt_lock);
1670 return -EAGAIN;
1671 }
1672
1673 dev = skb->dev;
1674 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1675 read_unlock(&mrt_lock);
1676 return -ENODEV;
1677 }
1678
1679 /* really correct? */
1680 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1681 if (!skb2) {
1682 read_unlock(&mrt_lock);
1683 return -ENOMEM;
1684 }
1685
1686 skb_reset_transport_header(skb2);
1687
1688 skb_put(skb2, sizeof(struct ipv6hdr));
1689 skb_reset_network_header(skb2);
1690
1691 iph = ipv6_hdr(skb2);
1692 iph->version = 0;
1693 iph->priority = 0;
1694 iph->flow_lbl[0] = 0;
1695 iph->flow_lbl[1] = 0;
1696 iph->flow_lbl[2] = 0;
1697 iph->payload_len = 0;
1698 iph->nexthdr = IPPROTO_NONE;
1699 iph->hop_limit = 0;
1700 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1701 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1702
1703 err = ip6mr_cache_unresolved(vif, skb2);
1704 read_unlock(&mrt_lock);
1705
1706 return err;
1707 }
1708
1709 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1710 cache->mfc_flags |= MFC_NOTIFY;
1711
1712 err = ip6mr_fill_mroute(skb, cache, rtm);
1713 read_unlock(&mrt_lock);
1714 return err;
1715}
1716