]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6mr.c
netns: ip6mr: declare reg_vif_num per-namespace
[net-next-2.6.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
7bc570c8
YH
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
7bc570c8
YH
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
7bc570c8 35#include <linux/init.h>
7bc570c8
YH
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
7bc570c8 39#include <net/raw.h>
7bc570c8
YH
40#include <linux/notifier.h>
41#include <linux/if_arp.h>
7bc570c8
YH
42#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
14fb64e1 48#include <linux/pim.h>
7bc570c8
YH
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
7bc570c8
YH
52/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
4e16880c 62#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
7bc570c8 63
7bc570c8 64static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
7bc570c8
YH
65
66/* Special spinlock for queue of unresolved entries */
67static DEFINE_SPINLOCK(mfc_unres_lock);
68
69/* We return to original Alan's scheme. Hash table of resolved
70 entries is changed only in process context and protected
71 with weak lock mrt_lock. Queue of unresolved entries is protected
72 with strong spinlock mfc_unres_lock.
73
74 In this case data path is free of exclusive locks at all.
75 */
76
77static struct kmem_cache *mrt_cachep __read_mostly;
78
79static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
6ac7eb08 80static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
7bc570c8
YH
81static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
82
14fb64e1
YH
83#ifdef CONFIG_IPV6_PIMSM_V2
84static struct inet6_protocol pim6_protocol;
85#endif
86
7bc570c8
YH
87static struct timer_list ipmr_expire_timer;
88
89
90#ifdef CONFIG_PROC_FS
91
92struct ipmr_mfc_iter {
93 struct mfc6_cache **cache;
94 int ct;
95};
96
97
98static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
99{
100 struct mfc6_cache *mfc;
101
4a6258a0 102 it->cache = init_net.ipv6.mfc6_cache_array;
7bc570c8 103 read_lock(&mrt_lock);
4a6258a0
BT
104 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
105 for (mfc = init_net.ipv6.mfc6_cache_array[it->ct];
106 mfc; mfc = mfc->next)
7bc570c8
YH
107 if (pos-- == 0)
108 return mfc;
109 read_unlock(&mrt_lock);
110
111 it->cache = &mfc_unres_queue;
112 spin_lock_bh(&mfc_unres_lock);
113 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
114 if (pos-- == 0)
115 return mfc;
116 spin_unlock_bh(&mfc_unres_lock);
117
118 it->cache = NULL;
119 return NULL;
120}
121
122
123
124
125/*
126 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
127 */
128
129struct ipmr_vif_iter {
130 int ct;
131};
132
133static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
134 loff_t pos)
135{
4e16880c
BT
136 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
137 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8
YH
138 continue;
139 if (pos-- == 0)
4e16880c 140 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
141 }
142 return NULL;
143}
144
145static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
146 __acquires(mrt_lock)
147{
148 read_lock(&mrt_lock);
149 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
150 : SEQ_START_TOKEN);
151}
152
153static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
154{
155 struct ipmr_vif_iter *iter = seq->private;
156
157 ++*pos;
158 if (v == SEQ_START_TOKEN)
159 return ip6mr_vif_seq_idx(iter, 0);
160
4e16880c
BT
161 while (++iter->ct < init_net.ipv6.maxvif) {
162 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8 163 continue;
4e16880c 164 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
165 }
166 return NULL;
167}
168
169static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
170 __releases(mrt_lock)
171{
172 read_unlock(&mrt_lock);
173}
174
175static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
176{
177 if (v == SEQ_START_TOKEN) {
178 seq_puts(seq,
179 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
180 } else {
181 const struct mif_device *vif = v;
182 const char *name = vif->dev ? vif->dev->name : "none";
183
184 seq_printf(seq,
d430a227 185 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
4e16880c 186 vif - init_net.ipv6.vif6_table,
7bc570c8
YH
187 name, vif->bytes_in, vif->pkt_in,
188 vif->bytes_out, vif->pkt_out,
189 vif->flags);
190 }
191 return 0;
192}
193
194static struct seq_operations ip6mr_vif_seq_ops = {
195 .start = ip6mr_vif_seq_start,
196 .next = ip6mr_vif_seq_next,
197 .stop = ip6mr_vif_seq_stop,
198 .show = ip6mr_vif_seq_show,
199};
200
201static int ip6mr_vif_open(struct inode *inode, struct file *file)
202{
203 return seq_open_private(file, &ip6mr_vif_seq_ops,
204 sizeof(struct ipmr_vif_iter));
205}
206
207static struct file_operations ip6mr_vif_fops = {
208 .owner = THIS_MODULE,
209 .open = ip6mr_vif_open,
210 .read = seq_read,
211 .llseek = seq_lseek,
eedd726e 212 .release = seq_release_private,
7bc570c8
YH
213};
214
215static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
216{
217 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
218 : SEQ_START_TOKEN);
219}
220
221static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
222{
223 struct mfc6_cache *mfc = v;
224 struct ipmr_mfc_iter *it = seq->private;
225
226 ++*pos;
227
228 if (v == SEQ_START_TOKEN)
229 return ipmr_mfc_seq_idx(seq->private, 0);
230
231 if (mfc->next)
232 return mfc->next;
233
234 if (it->cache == &mfc_unres_queue)
235 goto end_of_list;
236
4a6258a0 237 BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array);
7bc570c8 238
4a6258a0
BT
239 while (++it->ct < MFC6_LINES) {
240 mfc = init_net.ipv6.mfc6_cache_array[it->ct];
7bc570c8
YH
241 if (mfc)
242 return mfc;
243 }
244
245 /* exhausted cache_array, show unresolved */
246 read_unlock(&mrt_lock);
247 it->cache = &mfc_unres_queue;
248 it->ct = 0;
249
250 spin_lock_bh(&mfc_unres_lock);
251 mfc = mfc_unres_queue;
252 if (mfc)
253 return mfc;
254
255 end_of_list:
256 spin_unlock_bh(&mfc_unres_lock);
257 it->cache = NULL;
258
259 return NULL;
260}
261
262static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
263{
264 struct ipmr_mfc_iter *it = seq->private;
265
266 if (it->cache == &mfc_unres_queue)
267 spin_unlock_bh(&mfc_unres_lock);
4a6258a0 268 else if (it->cache == init_net.ipv6.mfc6_cache_array)
7bc570c8
YH
269 read_unlock(&mrt_lock);
270}
271
272static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
273{
274 int n;
275
276 if (v == SEQ_START_TOKEN) {
277 seq_puts(seq,
278 "Group "
279 "Origin "
280 "Iif Pkts Bytes Wrong Oifs\n");
281 } else {
282 const struct mfc6_cache *mfc = v;
283 const struct ipmr_mfc_iter *it = seq->private;
284
999890b2 285 seq_printf(seq, "%pI6 %pI6 %-3hd",
0c6ce78a 286 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
1ea472e2 287 mfc->mf6c_parent);
7bc570c8
YH
288
289 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
290 seq_printf(seq, " %8lu %8lu %8lu",
291 mfc->mfc_un.res.pkt,
292 mfc->mfc_un.res.bytes,
293 mfc->mfc_un.res.wrong_if);
7bc570c8
YH
294 for (n = mfc->mfc_un.res.minvif;
295 n < mfc->mfc_un.res.maxvif; n++) {
4e16880c 296 if (MIF_EXISTS(&init_net, n) &&
7bc570c8
YH
297 mfc->mfc_un.res.ttls[n] < 255)
298 seq_printf(seq,
299 " %2d:%-3d",
300 n, mfc->mfc_un.res.ttls[n]);
301 }
1ea472e2
BT
302 } else {
303 /* unresolved mfc_caches don't contain
304 * pkt, bytes and wrong_if values
305 */
306 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
7bc570c8
YH
307 }
308 seq_putc(seq, '\n');
309 }
310 return 0;
311}
312
313static struct seq_operations ipmr_mfc_seq_ops = {
314 .start = ipmr_mfc_seq_start,
315 .next = ipmr_mfc_seq_next,
316 .stop = ipmr_mfc_seq_stop,
317 .show = ipmr_mfc_seq_show,
318};
319
320static int ipmr_mfc_open(struct inode *inode, struct file *file)
321{
322 return seq_open_private(file, &ipmr_mfc_seq_ops,
323 sizeof(struct ipmr_mfc_iter));
324}
325
326static struct file_operations ip6mr_mfc_fops = {
327 .owner = THIS_MODULE,
328 .open = ipmr_mfc_open,
329 .read = seq_read,
330 .llseek = seq_lseek,
eedd726e 331 .release = seq_release_private,
7bc570c8
YH
332};
333#endif
334
14fb64e1 335#ifdef CONFIG_IPV6_PIMSM_V2
14fb64e1
YH
336
337static int pim6_rcv(struct sk_buff *skb)
338{
339 struct pimreghdr *pim;
340 struct ipv6hdr *encap;
341 struct net_device *reg_dev = NULL;
950d5704 342 int reg_vif_num = init_net.ipv6.mroute_reg_vif_num;
14fb64e1
YH
343
344 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
345 goto drop;
346
347 pim = (struct pimreghdr *)skb_transport_header(skb);
348 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
349 (pim->flags & PIM_NULL_REGISTER) ||
350 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
ec6b486f 351 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
14fb64e1
YH
352 goto drop;
353
354 /* check if the inner packet is destined to mcast group */
355 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
356 sizeof(*pim));
357
358 if (!ipv6_addr_is_multicast(&encap->daddr) ||
359 encap->payload_len == 0 ||
360 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
361 goto drop;
362
363 read_lock(&mrt_lock);
364 if (reg_vif_num >= 0)
4e16880c 365 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
14fb64e1
YH
366 if (reg_dev)
367 dev_hold(reg_dev);
368 read_unlock(&mrt_lock);
369
370 if (reg_dev == NULL)
371 goto drop;
372
373 skb->mac_header = skb->network_header;
374 skb_pull(skb, (u8 *)encap - skb->data);
375 skb_reset_network_header(skb);
376 skb->dev = reg_dev;
377 skb->protocol = htons(ETH_P_IP);
378 skb->ip_summed = 0;
379 skb->pkt_type = PACKET_HOST;
380 dst_release(skb->dst);
dc58c78c
PE
381 reg_dev->stats.rx_bytes += skb->len;
382 reg_dev->stats.rx_packets++;
14fb64e1
YH
383 skb->dst = NULL;
384 nf_reset(skb);
385 netif_rx(skb);
386 dev_put(reg_dev);
387 return 0;
388 drop:
389 kfree_skb(skb);
390 return 0;
391}
392
393static struct inet6_protocol pim6_protocol = {
394 .handler = pim6_rcv,
395};
396
397/* Service routines creating virtual interfaces: PIMREG */
398
399static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
400{
401 read_lock(&mrt_lock);
dc58c78c
PE
402 dev->stats.tx_bytes += skb->len;
403 dev->stats.tx_packets++;
950d5704 404 ip6mr_cache_report(skb, init_net.ipv6.mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
14fb64e1
YH
405 read_unlock(&mrt_lock);
406 kfree_skb(skb);
407 return 0;
408}
409
007c3838
SH
410static const struct net_device_ops reg_vif_netdev_ops = {
411 .ndo_start_xmit = reg_vif_xmit,
412};
413
14fb64e1
YH
414static void reg_vif_setup(struct net_device *dev)
415{
416 dev->type = ARPHRD_PIMREG;
417 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
418 dev->flags = IFF_NOARP;
007c3838 419 dev->netdev_ops = &reg_vif_netdev_ops;
14fb64e1
YH
420 dev->destructor = free_netdev;
421}
422
423static struct net_device *ip6mr_reg_vif(void)
424{
425 struct net_device *dev;
14fb64e1 426
dc58c78c 427 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
14fb64e1
YH
428 if (dev == NULL)
429 return NULL;
430
431 if (register_netdevice(dev)) {
432 free_netdev(dev);
433 return NULL;
434 }
435 dev->iflink = 0;
436
14fb64e1
YH
437 if (dev_open(dev))
438 goto failure;
439
7af3db78 440 dev_hold(dev);
14fb64e1
YH
441 return dev;
442
443failure:
444 /* allow the register to be completed before unregistering. */
445 rtnl_unlock();
446 rtnl_lock();
447
448 unregister_netdevice(dev);
449 return NULL;
450}
451#endif
452
7bc570c8
YH
453/*
454 * Delete a VIF entry
455 */
456
457static int mif6_delete(int vifi)
458{
459 struct mif_device *v;
460 struct net_device *dev;
4e16880c 461 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
7bc570c8
YH
462 return -EADDRNOTAVAIL;
463
4e16880c 464 v = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
465
466 write_lock_bh(&mrt_lock);
467 dev = v->dev;
468 v->dev = NULL;
469
470 if (!dev) {
471 write_unlock_bh(&mrt_lock);
472 return -EADDRNOTAVAIL;
473 }
474
14fb64e1 475#ifdef CONFIG_IPV6_PIMSM_V2
950d5704
BT
476 if (vifi == init_net.ipv6.mroute_reg_vif_num)
477 init_net.ipv6.mroute_reg_vif_num = -1;
14fb64e1
YH
478#endif
479
4e16880c 480 if (vifi + 1 == init_net.ipv6.maxvif) {
7bc570c8
YH
481 int tmp;
482 for (tmp = vifi - 1; tmp >= 0; tmp--) {
4e16880c 483 if (MIF_EXISTS(&init_net, tmp))
7bc570c8
YH
484 break;
485 }
4e16880c 486 init_net.ipv6.maxvif = tmp + 1;
7bc570c8
YH
487 }
488
489 write_unlock_bh(&mrt_lock);
490
491 dev_set_allmulti(dev, -1);
492
493 if (v->flags & MIFF_REGISTER)
494 unregister_netdevice(dev);
495
496 dev_put(dev);
497 return 0;
498}
499
58701ad4
BT
500static inline void ip6mr_cache_free(struct mfc6_cache *c)
501{
502 release_net(mfc6_net(c));
503 kmem_cache_free(mrt_cachep, c);
504}
505
7bc570c8
YH
506/* Destroy an unresolved cache entry, killing queued skbs
507 and reporting error to netlink readers.
508 */
509
510static void ip6mr_destroy_unres(struct mfc6_cache *c)
511{
512 struct sk_buff *skb;
513
4045e57c 514 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
7bc570c8
YH
515
516 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
517 if (ipv6_hdr(skb)->version == 0) {
518 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
519 nlh->nlmsg_type = NLMSG_ERROR;
520 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
521 skb_trim(skb, nlh->nlmsg_len);
522 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
523 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
524 } else
525 kfree_skb(skb);
526 }
527
58701ad4 528 ip6mr_cache_free(c);
7bc570c8
YH
529}
530
531
532/* Single timer process for all the unresolved queue. */
533
534static void ipmr_do_expire_process(unsigned long dummy)
535{
536 unsigned long now = jiffies;
537 unsigned long expires = 10 * HZ;
538 struct mfc6_cache *c, **cp;
539
540 cp = &mfc_unres_queue;
541
542 while ((c = *cp) != NULL) {
543 if (time_after(c->mfc_un.unres.expires, now)) {
544 /* not yet... */
545 unsigned long interval = c->mfc_un.unres.expires - now;
546 if (interval < expires)
547 expires = interval;
548 cp = &c->next;
549 continue;
550 }
551
552 *cp = c->next;
553 ip6mr_destroy_unres(c);
554 }
555
4045e57c 556 if (mfc_unres_queue != NULL)
7bc570c8
YH
557 mod_timer(&ipmr_expire_timer, jiffies + expires);
558}
559
560static void ipmr_expire_process(unsigned long dummy)
561{
562 if (!spin_trylock(&mfc_unres_lock)) {
563 mod_timer(&ipmr_expire_timer, jiffies + 1);
564 return;
565 }
566
4045e57c 567 if (mfc_unres_queue != NULL)
7bc570c8
YH
568 ipmr_do_expire_process(dummy);
569
570 spin_unlock(&mfc_unres_lock);
571}
572
573/* Fill oifs list. It is called under write locked mrt_lock. */
574
575static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
576{
577 int vifi;
578
6ac7eb08 579 cache->mfc_un.res.minvif = MAXMIFS;
7bc570c8 580 cache->mfc_un.res.maxvif = 0;
6ac7eb08 581 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
7bc570c8 582
4e16880c
BT
583 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
584 if (MIF_EXISTS(&init_net, vifi) &&
585 ttls[vifi] && ttls[vifi] < 255) {
7bc570c8
YH
586 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
587 if (cache->mfc_un.res.minvif > vifi)
588 cache->mfc_un.res.minvif = vifi;
589 if (cache->mfc_un.res.maxvif <= vifi)
590 cache->mfc_un.res.maxvif = vifi + 1;
591 }
592 }
593}
594
595static int mif6_add(struct mif6ctl *vifc, int mrtsock)
596{
597 int vifi = vifc->mif6c_mifi;
4e16880c 598 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
7bc570c8 599 struct net_device *dev;
5ae7b444 600 int err;
7bc570c8
YH
601
602 /* Is vif busy ? */
4e16880c 603 if (MIF_EXISTS(&init_net, vifi))
7bc570c8
YH
604 return -EADDRINUSE;
605
606 switch (vifc->mif6c_flags) {
14fb64e1
YH
607#ifdef CONFIG_IPV6_PIMSM_V2
608 case MIFF_REGISTER:
609 /*
610 * Special Purpose VIF in PIM
611 * All the packets will be sent to the daemon
612 */
950d5704 613 if (init_net.ipv6.mroute_reg_vif_num >= 0)
14fb64e1
YH
614 return -EADDRINUSE;
615 dev = ip6mr_reg_vif();
616 if (!dev)
617 return -ENOBUFS;
5ae7b444
WC
618 err = dev_set_allmulti(dev, 1);
619 if (err) {
620 unregister_netdevice(dev);
7af3db78 621 dev_put(dev);
5ae7b444
WC
622 return err;
623 }
14fb64e1
YH
624 break;
625#endif
7bc570c8
YH
626 case 0:
627 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
628 if (!dev)
629 return -EADDRNOTAVAIL;
5ae7b444 630 err = dev_set_allmulti(dev, 1);
7af3db78
WC
631 if (err) {
632 dev_put(dev);
5ae7b444 633 return err;
7af3db78 634 }
7bc570c8
YH
635 break;
636 default:
637 return -EINVAL;
638 }
639
7bc570c8
YH
640 /*
641 * Fill in the VIF structures
642 */
643 v->rate_limit = vifc->vifc_rate_limit;
644 v->flags = vifc->mif6c_flags;
645 if (!mrtsock)
646 v->flags |= VIFF_STATIC;
647 v->threshold = vifc->vifc_threshold;
648 v->bytes_in = 0;
649 v->bytes_out = 0;
650 v->pkt_in = 0;
651 v->pkt_out = 0;
652 v->link = dev->ifindex;
653 if (v->flags & MIFF_REGISTER)
654 v->link = dev->iflink;
655
656 /* And finish update writing critical data */
657 write_lock_bh(&mrt_lock);
7bc570c8 658 v->dev = dev;
14fb64e1
YH
659#ifdef CONFIG_IPV6_PIMSM_V2
660 if (v->flags & MIFF_REGISTER)
950d5704 661 init_net.ipv6.mroute_reg_vif_num = vifi;
14fb64e1 662#endif
4e16880c
BT
663 if (vifi + 1 > init_net.ipv6.maxvif)
664 init_net.ipv6.maxvif = vifi + 1;
7bc570c8
YH
665 write_unlock_bh(&mrt_lock);
666 return 0;
667}
668
669static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
670{
671 int line = MFC6_HASH(mcastgrp, origin);
672 struct mfc6_cache *c;
673
4a6258a0 674 for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
7bc570c8
YH
675 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
676 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
677 break;
678 }
679 return c;
680}
681
682/*
683 * Allocate a multicast cache entry
684 */
58701ad4 685static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
7bc570c8 686{
36cbac59 687 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
7bc570c8
YH
688 if (c == NULL)
689 return NULL;
6ac7eb08 690 c->mfc_un.res.minvif = MAXMIFS;
58701ad4 691 mfc6_net_set(c, net);
7bc570c8
YH
692 return c;
693}
694
58701ad4 695static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
7bc570c8 696{
36cbac59 697 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
7bc570c8
YH
698 if (c == NULL)
699 return NULL;
7bc570c8
YH
700 skb_queue_head_init(&c->mfc_un.unres.unresolved);
701 c->mfc_un.unres.expires = jiffies + 10 * HZ;
58701ad4 702 mfc6_net_set(c, net);
7bc570c8
YH
703 return c;
704}
705
706/*
707 * A cache entry has gone into a resolved state from queued
708 */
709
710static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
711{
712 struct sk_buff *skb;
713
714 /*
715 * Play the pending entries through our router
716 */
717
718 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
719 if (ipv6_hdr(skb)->version == 0) {
720 int err;
721 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
722
723 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
549e028d 724 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
7bc570c8
YH
725 } else {
726 nlh->nlmsg_type = NLMSG_ERROR;
727 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
728 skb_trim(skb, nlh->nlmsg_len);
729 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
730 }
731 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
732 } else
733 ip6_mr_forward(skb, c);
734 }
735}
736
737/*
738 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
739 * expects the following bizarre scheme.
740 *
741 * Called under mrt_lock.
742 */
743
6ac7eb08 744static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
7bc570c8
YH
745{
746 struct sk_buff *skb;
747 struct mrt6msg *msg;
748 int ret;
749
14fb64e1
YH
750#ifdef CONFIG_IPV6_PIMSM_V2
751 if (assert == MRT6MSG_WHOLEPKT)
752 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
753 +sizeof(*msg));
754 else
755#endif
756 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
757
758 if (!skb)
759 return -ENOBUFS;
760
761 /* I suppose that internal messages
762 * do not require checksums */
763
764 skb->ip_summed = CHECKSUM_UNNECESSARY;
765
14fb64e1
YH
766#ifdef CONFIG_IPV6_PIMSM_V2
767 if (assert == MRT6MSG_WHOLEPKT) {
768 /* Ugly, but we have no choice with this interface.
769 Duplicate old header, fix length etc.
770 And all this only to mangle msg->im6_msgtype and
771 to set msg->im6_mbz to "mbz" :-)
772 */
773 skb_push(skb, -skb_network_offset(pkt));
774
775 skb_push(skb, sizeof(*msg));
776 skb_reset_transport_header(skb);
777 msg = (struct mrt6msg *)skb_transport_header(skb);
778 msg->im6_mbz = 0;
779 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
950d5704 780 msg->im6_mif = init_net.ipv6.mroute_reg_vif_num;
14fb64e1
YH
781 msg->im6_pad = 0;
782 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
783 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
784
785 skb->ip_summed = CHECKSUM_UNNECESSARY;
786 } else
787#endif
788 {
7bc570c8
YH
789 /*
790 * Copy the IP header
791 */
792
793 skb_put(skb, sizeof(struct ipv6hdr));
794 skb_reset_network_header(skb);
795 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
796
797 /*
798 * Add our header
799 */
800 skb_put(skb, sizeof(*msg));
801 skb_reset_transport_header(skb);
802 msg = (struct mrt6msg *)skb_transport_header(skb);
803
804 msg->im6_mbz = 0;
805 msg->im6_msgtype = assert;
6ac7eb08 806 msg->im6_mif = mifi;
7bc570c8
YH
807 msg->im6_pad = 0;
808 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
809 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
810
811 skb->dst = dst_clone(pkt->dst);
812 skb->ip_summed = CHECKSUM_UNNECESSARY;
813
814 skb_pull(skb, sizeof(struct ipv6hdr));
14fb64e1 815 }
7bc570c8 816
bd91b8bf 817 if (init_net.ipv6.mroute6_sk == NULL) {
7bc570c8
YH
818 kfree_skb(skb);
819 return -EINVAL;
820 }
821
822 /*
823 * Deliver to user space multicast routing algorithms
824 */
bd91b8bf
BT
825 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
826 if (ret < 0) {
7bc570c8
YH
827 if (net_ratelimit())
828 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
829 kfree_skb(skb);
830 }
831
832 return ret;
833}
834
835/*
836 * Queue a packet for resolution. It gets locked cache entry!
837 */
838
839static int
6ac7eb08 840ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
7bc570c8
YH
841{
842 int err;
843 struct mfc6_cache *c;
844
845 spin_lock_bh(&mfc_unres_lock);
846 for (c = mfc_unres_queue; c; c = c->next) {
4045e57c
BT
847 if (net_eq(mfc6_net(c), &init_net) &&
848 ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
7bc570c8
YH
849 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
850 break;
851 }
852
853 if (c == NULL) {
854 /*
855 * Create a new entry if allowable
856 */
857
4045e57c 858 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 ||
58701ad4 859 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
7bc570c8
YH
860 spin_unlock_bh(&mfc_unres_lock);
861
862 kfree_skb(skb);
863 return -ENOBUFS;
864 }
865
866 /*
867 * Fill in the new cache entry
868 */
869 c->mf6c_parent = -1;
870 c->mf6c_origin = ipv6_hdr(skb)->saddr;
871 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
872
873 /*
874 * Reflect first query at pim6sd
875 */
6ac7eb08 876 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
7bc570c8
YH
877 /* If the report failed throw the cache entry
878 out - Brad Parker
879 */
880 spin_unlock_bh(&mfc_unres_lock);
881
58701ad4 882 ip6mr_cache_free(c);
7bc570c8
YH
883 kfree_skb(skb);
884 return err;
885 }
886
4045e57c 887 atomic_inc(&init_net.ipv6.cache_resolve_queue_len);
7bc570c8
YH
888 c->next = mfc_unres_queue;
889 mfc_unres_queue = c;
890
891 ipmr_do_expire_process(1);
892 }
893
894 /*
895 * See if we can append the packet
896 */
897 if (c->mfc_un.unres.unresolved.qlen > 3) {
898 kfree_skb(skb);
899 err = -ENOBUFS;
900 } else {
901 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
902 err = 0;
903 }
904
905 spin_unlock_bh(&mfc_unres_lock);
906 return err;
907}
908
909/*
910 * MFC6 cache manipulation by user space
911 */
912
913static int ip6mr_mfc_delete(struct mf6cctl *mfc)
914{
915 int line;
916 struct mfc6_cache *c, **cp;
917
918 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
919
4a6258a0
BT
920 for (cp = &init_net.ipv6.mfc6_cache_array[line];
921 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
922 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
923 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
924 write_lock_bh(&mrt_lock);
925 *cp = c->next;
926 write_unlock_bh(&mrt_lock);
927
58701ad4 928 ip6mr_cache_free(c);
7bc570c8
YH
929 return 0;
930 }
931 }
932 return -ENOENT;
933}
934
935static int ip6mr_device_event(struct notifier_block *this,
936 unsigned long event, void *ptr)
937{
938 struct net_device *dev = ptr;
939 struct mif_device *v;
940 int ct;
941
721499e8 942 if (!net_eq(dev_net(dev), &init_net))
7bc570c8
YH
943 return NOTIFY_DONE;
944
945 if (event != NETDEV_UNREGISTER)
946 return NOTIFY_DONE;
947
4e16880c
BT
948 v = &init_net.ipv6.vif6_table[0];
949 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
7bc570c8
YH
950 if (v->dev == dev)
951 mif6_delete(ct);
952 }
953 return NOTIFY_DONE;
954}
955
956static struct notifier_block ip6_mr_notifier = {
957 .notifier_call = ip6mr_device_event
958};
959
960/*
961 * Setup for IP multicast routing
962 */
963
4e16880c
BT
964static int __net_init ip6mr_net_init(struct net *net)
965{
966 int err = 0;
4e16880c
BT
967 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
968 GFP_KERNEL);
969 if (!net->ipv6.vif6_table) {
970 err = -ENOMEM;
971 goto fail;
972 }
4a6258a0
BT
973
974 /* Forwarding cache */
975 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
976 sizeof(struct mfc6_cache *),
977 GFP_KERNEL);
978 if (!net->ipv6.mfc6_cache_array) {
979 err = -ENOMEM;
980 goto fail_mfc6_cache;
981 }
950d5704
BT
982
983#ifdef CONFIG_IPV6_PIMSM_V2
984 net->ipv6.mroute_reg_vif_num = -1;
985#endif
4a6258a0
BT
986 return 0;
987
988fail_mfc6_cache:
989 kfree(net->ipv6.vif6_table);
4e16880c
BT
990fail:
991 return err;
992}
993
994static void __net_exit ip6mr_net_exit(struct net *net)
995{
4a6258a0 996 kfree(net->ipv6.mfc6_cache_array);
4e16880c
BT
997 kfree(net->ipv6.vif6_table);
998}
999
1000static struct pernet_operations ip6mr_net_ops = {
1001 .init = ip6mr_net_init,
1002 .exit = ip6mr_net_exit,
1003};
1004
623d1a1a 1005int __init ip6_mr_init(void)
7bc570c8 1006{
623d1a1a
WC
1007 int err;
1008
7bc570c8
YH
1009 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1010 sizeof(struct mfc6_cache),
1011 0, SLAB_HWCACHE_ALIGN,
1012 NULL);
1013 if (!mrt_cachep)
623d1a1a 1014 return -ENOMEM;
7bc570c8 1015
4e16880c
BT
1016 err = register_pernet_subsys(&ip6mr_net_ops);
1017 if (err)
1018 goto reg_pernet_fail;
1019
7bc570c8 1020 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
623d1a1a
WC
1021 err = register_netdevice_notifier(&ip6_mr_notifier);
1022 if (err)
1023 goto reg_notif_fail;
1024#ifdef CONFIG_PROC_FS
1025 err = -ENOMEM;
1026 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1027 goto proc_vif_fail;
1028 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1029 0, &ip6mr_mfc_fops))
1030 goto proc_cache_fail;
1031#endif
1032 return 0;
7bc570c8 1033#ifdef CONFIG_PROC_FS
623d1a1a
WC
1034proc_cache_fail:
1035 proc_net_remove(&init_net, "ip6_mr_vif");
87b30a65
BT
1036proc_vif_fail:
1037 unregister_netdevice_notifier(&ip6_mr_notifier);
7bc570c8 1038#endif
87b30a65
BT
1039reg_notif_fail:
1040 del_timer(&ipmr_expire_timer);
4e16880c
BT
1041 unregister_pernet_subsys(&ip6mr_net_ops);
1042reg_pernet_fail:
87b30a65 1043 kmem_cache_destroy(mrt_cachep);
623d1a1a 1044 return err;
7bc570c8
YH
1045}
1046
623d1a1a
WC
1047void ip6_mr_cleanup(void)
1048{
1049#ifdef CONFIG_PROC_FS
1050 proc_net_remove(&init_net, "ip6_mr_cache");
1051 proc_net_remove(&init_net, "ip6_mr_vif");
1052#endif
1053 unregister_netdevice_notifier(&ip6_mr_notifier);
1054 del_timer(&ipmr_expire_timer);
4e16880c 1055 unregister_pernet_subsys(&ip6mr_net_ops);
623d1a1a
WC
1056 kmem_cache_destroy(mrt_cachep);
1057}
7bc570c8
YH
1058
1059static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1060{
1061 int line;
1062 struct mfc6_cache *uc, *c, **cp;
6ac7eb08 1063 unsigned char ttls[MAXMIFS];
7bc570c8
YH
1064 int i;
1065
6ac7eb08
RR
1066 memset(ttls, 255, MAXMIFS);
1067 for (i = 0; i < MAXMIFS; i++) {
7bc570c8
YH
1068 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1069 ttls[i] = 1;
1070
1071 }
1072
1073 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1074
4a6258a0
BT
1075 for (cp = &init_net.ipv6.mfc6_cache_array[line];
1076 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
1077 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1078 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1079 break;
1080 }
1081
1082 if (c != NULL) {
1083 write_lock_bh(&mrt_lock);
1084 c->mf6c_parent = mfc->mf6cc_parent;
1085 ip6mr_update_thresholds(c, ttls);
1086 if (!mrtsock)
1087 c->mfc_flags |= MFC_STATIC;
1088 write_unlock_bh(&mrt_lock);
1089 return 0;
1090 }
1091
1092 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1093 return -EINVAL;
1094
58701ad4 1095 c = ip6mr_cache_alloc(&init_net);
7bc570c8
YH
1096 if (c == NULL)
1097 return -ENOMEM;
1098
1099 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1100 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1101 c->mf6c_parent = mfc->mf6cc_parent;
1102 ip6mr_update_thresholds(c, ttls);
1103 if (!mrtsock)
1104 c->mfc_flags |= MFC_STATIC;
1105
1106 write_lock_bh(&mrt_lock);
4a6258a0
BT
1107 c->next = init_net.ipv6.mfc6_cache_array[line];
1108 init_net.ipv6.mfc6_cache_array[line] = c;
7bc570c8
YH
1109 write_unlock_bh(&mrt_lock);
1110
1111 /*
1112 * Check to see if we resolved a queued list. If so we
1113 * need to send on the frames and tidy up.
1114 */
1115 spin_lock_bh(&mfc_unres_lock);
1116 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1117 cp = &uc->next) {
4045e57c
BT
1118 if (net_eq(mfc6_net(uc), &init_net) &&
1119 ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
7bc570c8
YH
1120 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1121 *cp = uc->next;
4045e57c 1122 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
7bc570c8
YH
1123 break;
1124 }
1125 }
4045e57c
BT
1126 if (mfc_unres_queue == NULL)
1127 del_timer(&ipmr_expire_timer);
7bc570c8
YH
1128 spin_unlock_bh(&mfc_unres_lock);
1129
1130 if (uc) {
1131 ip6mr_cache_resolve(uc, c);
58701ad4 1132 ip6mr_cache_free(uc);
7bc570c8
YH
1133 }
1134 return 0;
1135}
1136
1137/*
1138 * Close the multicast socket, and clear the vif tables etc
1139 */
1140
1141static void mroute_clean_tables(struct sock *sk)
1142{
1143 int i;
1144
1145 /*
1146 * Shut down all active vif entries
1147 */
4e16880c
BT
1148 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1149 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
7bc570c8
YH
1150 mif6_delete(i);
1151 }
1152
1153 /*
1154 * Wipe the cache
1155 */
4a6258a0 1156 for (i = 0; i < MFC6_LINES; i++) {
7bc570c8
YH
1157 struct mfc6_cache *c, **cp;
1158
4a6258a0 1159 cp = &init_net.ipv6.mfc6_cache_array[i];
7bc570c8
YH
1160 while ((c = *cp) != NULL) {
1161 if (c->mfc_flags & MFC_STATIC) {
1162 cp = &c->next;
1163 continue;
1164 }
1165 write_lock_bh(&mrt_lock);
1166 *cp = c->next;
1167 write_unlock_bh(&mrt_lock);
1168
58701ad4 1169 ip6mr_cache_free(c);
7bc570c8
YH
1170 }
1171 }
1172
4045e57c
BT
1173 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) {
1174 struct mfc6_cache *c, **cp;
7bc570c8
YH
1175
1176 spin_lock_bh(&mfc_unres_lock);
4045e57c
BT
1177 cp = &mfc_unres_queue;
1178 while ((c = *cp) != NULL) {
1179 if (!net_eq(mfc6_net(c), &init_net)) {
1180 cp = &c->next;
1181 continue;
1182 }
1183 *cp = c->next;
7bc570c8 1184 ip6mr_destroy_unres(c);
7bc570c8
YH
1185 }
1186 spin_unlock_bh(&mfc_unres_lock);
1187 }
1188}
1189
1190static int ip6mr_sk_init(struct sock *sk)
1191{
1192 int err = 0;
1193
1194 rtnl_lock();
1195 write_lock_bh(&mrt_lock);
bd91b8bf
BT
1196 if (likely(init_net.ipv6.mroute6_sk == NULL))
1197 init_net.ipv6.mroute6_sk = sk;
7bc570c8
YH
1198 else
1199 err = -EADDRINUSE;
1200 write_unlock_bh(&mrt_lock);
1201
1202 rtnl_unlock();
1203
1204 return err;
1205}
1206
1207int ip6mr_sk_done(struct sock *sk)
1208{
1209 int err = 0;
1210
1211 rtnl_lock();
bd91b8bf 1212 if (sk == init_net.ipv6.mroute6_sk) {
7bc570c8 1213 write_lock_bh(&mrt_lock);
bd91b8bf 1214 init_net.ipv6.mroute6_sk = NULL;
7bc570c8
YH
1215 write_unlock_bh(&mrt_lock);
1216
1217 mroute_clean_tables(sk);
1218 } else
1219 err = -EACCES;
1220 rtnl_unlock();
1221
1222 return err;
1223}
1224
1225/*
1226 * Socket options and virtual interface manipulation. The whole
1227 * virtual interface system is a complete heap, but unfortunately
1228 * that's how BSD mrouted happens to think. Maybe one day with a proper
1229 * MOSPF/PIM router set up we can clean this up.
1230 */
1231
1232int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1233{
1234 int ret;
1235 struct mif6ctl vif;
1236 struct mf6cctl mfc;
1237 mifi_t mifi;
1238
1239 if (optname != MRT6_INIT) {
bd91b8bf 1240 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
7bc570c8
YH
1241 return -EACCES;
1242 }
1243
1244 switch (optname) {
1245 case MRT6_INIT:
1246 if (sk->sk_type != SOCK_RAW ||
1247 inet_sk(sk)->num != IPPROTO_ICMPV6)
1248 return -EOPNOTSUPP;
1249 if (optlen < sizeof(int))
1250 return -EINVAL;
1251
1252 return ip6mr_sk_init(sk);
1253
1254 case MRT6_DONE:
1255 return ip6mr_sk_done(sk);
1256
1257 case MRT6_ADD_MIF:
1258 if (optlen < sizeof(vif))
1259 return -EINVAL;
1260 if (copy_from_user(&vif, optval, sizeof(vif)))
1261 return -EFAULT;
6ac7eb08 1262 if (vif.mif6c_mifi >= MAXMIFS)
7bc570c8
YH
1263 return -ENFILE;
1264 rtnl_lock();
bd91b8bf 1265 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1266 rtnl_unlock();
1267 return ret;
1268
1269 case MRT6_DEL_MIF:
1270 if (optlen < sizeof(mifi_t))
1271 return -EINVAL;
1272 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1273 return -EFAULT;
1274 rtnl_lock();
1275 ret = mif6_delete(mifi);
1276 rtnl_unlock();
1277 return ret;
1278
1279 /*
1280 * Manipulate the forwarding caches. These live
1281 * in a sort of kernel/user symbiosis.
1282 */
1283 case MRT6_ADD_MFC:
1284 case MRT6_DEL_MFC:
1285 if (optlen < sizeof(mfc))
1286 return -EINVAL;
1287 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1288 return -EFAULT;
1289 rtnl_lock();
1290 if (optname == MRT6_DEL_MFC)
1291 ret = ip6mr_mfc_delete(&mfc);
1292 else
bd91b8bf 1293 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1294 rtnl_unlock();
1295 return ret;
1296
14fb64e1
YH
1297 /*
1298 * Control PIM assert (to activate pim will activate assert)
1299 */
1300 case MRT6_ASSERT:
1301 {
1302 int v;
1303 if (get_user(v, (int __user *)optval))
1304 return -EFAULT;
a21f3f99 1305 init_net.ipv6.mroute_do_assert = !!v;
14fb64e1
YH
1306 return 0;
1307 }
1308
1309#ifdef CONFIG_IPV6_PIMSM_V2
1310 case MRT6_PIM:
1311 {
a9f83bf3 1312 int v;
14fb64e1
YH
1313 if (get_user(v, (int __user *)optval))
1314 return -EFAULT;
1315 v = !!v;
1316 rtnl_lock();
1317 ret = 0;
a21f3f99
BT
1318 if (v != init_net.ipv6.mroute_do_pim) {
1319 init_net.ipv6.mroute_do_pim = v;
1320 init_net.ipv6.mroute_do_assert = v;
1321 if (init_net.ipv6.mroute_do_pim)
14fb64e1
YH
1322 ret = inet6_add_protocol(&pim6_protocol,
1323 IPPROTO_PIM);
1324 else
1325 ret = inet6_del_protocol(&pim6_protocol,
1326 IPPROTO_PIM);
1327 if (ret < 0)
1328 ret = -EAGAIN;
1329 }
1330 rtnl_unlock();
1331 return ret;
1332 }
1333
1334#endif
7bc570c8 1335 /*
7d120c55 1336 * Spurious command, or MRT6_VERSION which you cannot
7bc570c8
YH
1337 * set.
1338 */
1339 default:
1340 return -ENOPROTOOPT;
1341 }
1342}
1343
1344/*
1345 * Getsock opt support for the multicast routing system.
1346 */
1347
1348int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1349 int __user *optlen)
1350{
1351 int olr;
1352 int val;
1353
1354 switch (optname) {
1355 case MRT6_VERSION:
1356 val = 0x0305;
1357 break;
14fb64e1
YH
1358#ifdef CONFIG_IPV6_PIMSM_V2
1359 case MRT6_PIM:
a21f3f99 1360 val = init_net.ipv6.mroute_do_pim;
14fb64e1
YH
1361 break;
1362#endif
1363 case MRT6_ASSERT:
a21f3f99 1364 val = init_net.ipv6.mroute_do_assert;
14fb64e1 1365 break;
7bc570c8
YH
1366 default:
1367 return -ENOPROTOOPT;
1368 }
1369
1370 if (get_user(olr, optlen))
1371 return -EFAULT;
1372
1373 olr = min_t(int, olr, sizeof(int));
1374 if (olr < 0)
1375 return -EINVAL;
1376
1377 if (put_user(olr, optlen))
1378 return -EFAULT;
1379 if (copy_to_user(optval, &val, olr))
1380 return -EFAULT;
1381 return 0;
1382}
1383
1384/*
1385 * The IP multicast ioctl support routines.
1386 */
1387
1388int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1389{
1390 struct sioc_sg_req6 sr;
1391 struct sioc_mif_req6 vr;
1392 struct mif_device *vif;
1393 struct mfc6_cache *c;
1394
1395 switch (cmd) {
1396 case SIOCGETMIFCNT_IN6:
1397 if (copy_from_user(&vr, arg, sizeof(vr)))
1398 return -EFAULT;
4e16880c 1399 if (vr.mifi >= init_net.ipv6.maxvif)
7bc570c8
YH
1400 return -EINVAL;
1401 read_lock(&mrt_lock);
4e16880c
BT
1402 vif = &init_net.ipv6.vif6_table[vr.mifi];
1403 if (MIF_EXISTS(&init_net, vr.mifi)) {
7bc570c8
YH
1404 vr.icount = vif->pkt_in;
1405 vr.ocount = vif->pkt_out;
1406 vr.ibytes = vif->bytes_in;
1407 vr.obytes = vif->bytes_out;
1408 read_unlock(&mrt_lock);
1409
1410 if (copy_to_user(arg, &vr, sizeof(vr)))
1411 return -EFAULT;
1412 return 0;
1413 }
1414 read_unlock(&mrt_lock);
1415 return -EADDRNOTAVAIL;
1416 case SIOCGETSGCNT_IN6:
1417 if (copy_from_user(&sr, arg, sizeof(sr)))
1418 return -EFAULT;
1419
1420 read_lock(&mrt_lock);
1421 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1422 if (c) {
1423 sr.pktcnt = c->mfc_un.res.pkt;
1424 sr.bytecnt = c->mfc_un.res.bytes;
1425 sr.wrong_if = c->mfc_un.res.wrong_if;
1426 read_unlock(&mrt_lock);
1427
1428 if (copy_to_user(arg, &sr, sizeof(sr)))
1429 return -EFAULT;
1430 return 0;
1431 }
1432 read_unlock(&mrt_lock);
1433 return -EADDRNOTAVAIL;
1434 default:
1435 return -ENOIOCTLCMD;
1436 }
1437}
1438
1439
1440static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1441{
483a47d2
DL
1442 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1443 IPSTATS_MIB_OUTFORWDATAGRAMS);
7bc570c8
YH
1444 return dst_output(skb);
1445}
1446
1447/*
1448 * Processing handlers for ip6mr_forward
1449 */
1450
1451static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1452{
1453 struct ipv6hdr *ipv6h;
4e16880c 1454 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
1455 struct net_device *dev;
1456 struct dst_entry *dst;
1457 struct flowi fl;
1458
1459 if (vif->dev == NULL)
1460 goto out_free;
1461
14fb64e1
YH
1462#ifdef CONFIG_IPV6_PIMSM_V2
1463 if (vif->flags & MIFF_REGISTER) {
1464 vif->pkt_out++;
1465 vif->bytes_out += skb->len;
dc58c78c
PE
1466 vif->dev->stats.tx_bytes += skb->len;
1467 vif->dev->stats.tx_packets++;
14fb64e1
YH
1468 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1469 kfree_skb(skb);
1470 return 0;
1471 }
1472#endif
1473
7bc570c8
YH
1474 ipv6h = ipv6_hdr(skb);
1475
1476 fl = (struct flowi) {
1477 .oif = vif->link,
1478 .nl_u = { .ip6_u =
1479 { .daddr = ipv6h->daddr, }
1480 }
1481 };
1482
1483 dst = ip6_route_output(&init_net, NULL, &fl);
1484 if (!dst)
1485 goto out_free;
1486
1487 dst_release(skb->dst);
1488 skb->dst = dst;
1489
1490 /*
1491 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1492 * not only before forwarding, but after forwarding on all output
1493 * interfaces. It is clear, if mrouter runs a multicasting
1494 * program, it should receive packets not depending to what interface
1495 * program is joined.
1496 * If we will not make it, the program will have to join on all
1497 * interfaces. On the other hand, multihoming host (or router, but
1498 * not mrouter) cannot join to more than one interface - it will
1499 * result in receiving multiple packets.
1500 */
1501 dev = vif->dev;
1502 skb->dev = dev;
1503 vif->pkt_out++;
1504 vif->bytes_out += skb->len;
1505
1506 /* We are about to write */
1507 /* XXX: extension headers? */
1508 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1509 goto out_free;
1510
1511 ipv6h = ipv6_hdr(skb);
1512 ipv6h->hop_limit--;
1513
1514 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1515
1516 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1517 ip6mr_forward2_finish);
1518
1519out_free:
1520 kfree_skb(skb);
1521 return 0;
1522}
1523
1524static int ip6mr_find_vif(struct net_device *dev)
1525{
1526 int ct;
4e16880c
BT
1527 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1528 if (init_net.ipv6.vif6_table[ct].dev == dev)
7bc570c8
YH
1529 break;
1530 }
1531 return ct;
1532}
1533
1534static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1535{
1536 int psend = -1;
1537 int vif, ct;
1538
1539 vif = cache->mf6c_parent;
1540 cache->mfc_un.res.pkt++;
1541 cache->mfc_un.res.bytes += skb->len;
1542
14fb64e1
YH
1543 /*
1544 * Wrong interface: drop packet and (maybe) send PIM assert.
1545 */
4e16880c 1546 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
14fb64e1
YH
1547 int true_vifi;
1548
1549 cache->mfc_un.res.wrong_if++;
1550 true_vifi = ip6mr_find_vif(skb->dev);
1551
a21f3f99 1552 if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert &&
14fb64e1
YH
1553 /* pimsm uses asserts, when switching from RPT to SPT,
1554 so that we cannot check that packet arrived on an oif.
1555 It is bad, but otherwise we would need to move pretty
1556 large chunk of pimd to kernel. Ough... --ANK
1557 */
a21f3f99
BT
1558 (init_net.ipv6.mroute_do_pim ||
1559 cache->mfc_un.res.ttls[true_vifi] < 255) &&
14fb64e1
YH
1560 time_after(jiffies,
1561 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1562 cache->mfc_un.res.last_assert = jiffies;
1563 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1564 }
1565 goto dont_forward;
1566 }
1567
4e16880c
BT
1568 init_net.ipv6.vif6_table[vif].pkt_in++;
1569 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
7bc570c8
YH
1570
1571 /*
1572 * Forward the frame
1573 */
1574 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1575 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1576 if (psend != -1) {
1577 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1578 if (skb2)
1579 ip6mr_forward2(skb2, cache, psend);
1580 }
1581 psend = ct;
1582 }
1583 }
1584 if (psend != -1) {
1585 ip6mr_forward2(skb, cache, psend);
1586 return 0;
1587 }
1588
14fb64e1 1589dont_forward:
7bc570c8
YH
1590 kfree_skb(skb);
1591 return 0;
1592}
1593
1594
1595/*
1596 * Multicast packets for forwarding arrive here
1597 */
1598
1599int ip6_mr_input(struct sk_buff *skb)
1600{
1601 struct mfc6_cache *cache;
1602
1603 read_lock(&mrt_lock);
1604 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1605
1606 /*
1607 * No usable cache entry
1608 */
1609 if (cache == NULL) {
1610 int vif;
1611
1612 vif = ip6mr_find_vif(skb->dev);
1613 if (vif >= 0) {
1614 int err = ip6mr_cache_unresolved(vif, skb);
1615 read_unlock(&mrt_lock);
1616
1617 return err;
1618 }
1619 read_unlock(&mrt_lock);
1620 kfree_skb(skb);
1621 return -ENODEV;
1622 }
1623
1624 ip6_mr_forward(skb, cache);
1625
1626 read_unlock(&mrt_lock);
1627
1628 return 0;
1629}
1630
1631
1632static int
1633ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1634{
1635 int ct;
1636 struct rtnexthop *nhp;
4e16880c 1637 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
549e028d 1638 u8 *b = skb_tail_pointer(skb);
7bc570c8
YH
1639 struct rtattr *mp_head;
1640
1641 if (dev)
1642 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1643
1644 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1645
1646 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1647 if (c->mfc_un.res.ttls[ct] < 255) {
1648 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1649 goto rtattr_failure;
1650 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1651 nhp->rtnh_flags = 0;
1652 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4e16880c 1653 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
7bc570c8
YH
1654 nhp->rtnh_len = sizeof(*nhp);
1655 }
1656 }
1657 mp_head->rta_type = RTA_MULTIPATH;
549e028d 1658 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
7bc570c8
YH
1659 rtm->rtm_type = RTN_MULTICAST;
1660 return 1;
1661
1662rtattr_failure:
1663 nlmsg_trim(skb, b);
1664 return -EMSGSIZE;
1665}
1666
1667int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1668{
1669 int err;
1670 struct mfc6_cache *cache;
1671 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1672
1673 read_lock(&mrt_lock);
1674 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1675
1676 if (!cache) {
1677 struct sk_buff *skb2;
1678 struct ipv6hdr *iph;
1679 struct net_device *dev;
1680 int vif;
1681
1682 if (nowait) {
1683 read_unlock(&mrt_lock);
1684 return -EAGAIN;
1685 }
1686
1687 dev = skb->dev;
1688 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1689 read_unlock(&mrt_lock);
1690 return -ENODEV;
1691 }
1692
1693 /* really correct? */
1694 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1695 if (!skb2) {
1696 read_unlock(&mrt_lock);
1697 return -ENOMEM;
1698 }
1699
1700 skb_reset_transport_header(skb2);
1701
1702 skb_put(skb2, sizeof(struct ipv6hdr));
1703 skb_reset_network_header(skb2);
1704
1705 iph = ipv6_hdr(skb2);
1706 iph->version = 0;
1707 iph->priority = 0;
1708 iph->flow_lbl[0] = 0;
1709 iph->flow_lbl[1] = 0;
1710 iph->flow_lbl[2] = 0;
1711 iph->payload_len = 0;
1712 iph->nexthdr = IPPROTO_NONE;
1713 iph->hop_limit = 0;
1714 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1715 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1716
1717 err = ip6mr_cache_unresolved(vif, skb2);
1718 read_unlock(&mrt_lock);
1719
1720 return err;
1721 }
1722
1723 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1724 cache->mfc_flags |= MFC_NOTIFY;
1725
1726 err = ip6mr_fill_mroute(skb, cache, rtm);
1727 read_unlock(&mrt_lock);
1728 return err;
1729}
1730