]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6mr.c
netns: ip6mr: declare ip6mr /proc/net entries per-namespace
[net-next-2.6.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
7bc570c8
YH
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
7bc570c8
YH
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
7bc570c8 35#include <linux/init.h>
7bc570c8
YH
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
7bc570c8 39#include <net/raw.h>
7bc570c8
YH
40#include <linux/notifier.h>
41#include <linux/if_arp.h>
7bc570c8
YH
42#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
14fb64e1 48#include <linux/pim.h>
7bc570c8
YH
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
7bc570c8
YH
52/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
4e16880c 62#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
7bc570c8 63
7bc570c8 64static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
7bc570c8
YH
65
66/* Special spinlock for queue of unresolved entries */
67static DEFINE_SPINLOCK(mfc_unres_lock);
68
69/* We return to original Alan's scheme. Hash table of resolved
70 entries is changed only in process context and protected
71 with weak lock mrt_lock. Queue of unresolved entries is protected
72 with strong spinlock mfc_unres_lock.
73
74 In this case data path is free of exclusive locks at all.
75 */
76
77static struct kmem_cache *mrt_cachep __read_mostly;
78
79static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
6ac7eb08 80static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
7bc570c8
YH
81static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
82
14fb64e1
YH
83#ifdef CONFIG_IPV6_PIMSM_V2
84static struct inet6_protocol pim6_protocol;
85#endif
86
7bc570c8
YH
87static struct timer_list ipmr_expire_timer;
88
89
90#ifdef CONFIG_PROC_FS
91
92struct ipmr_mfc_iter {
8b90fc7e 93 struct seq_net_private p;
7bc570c8
YH
94 struct mfc6_cache **cache;
95 int ct;
96};
97
98
8b90fc7e
BT
99static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
100 struct ipmr_mfc_iter *it, loff_t pos)
7bc570c8
YH
101{
102 struct mfc6_cache *mfc;
103
8b90fc7e 104 it->cache = net->ipv6.mfc6_cache_array;
7bc570c8 105 read_lock(&mrt_lock);
4a6258a0 106 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
8b90fc7e 107 for (mfc = net->ipv6.mfc6_cache_array[it->ct];
4a6258a0 108 mfc; mfc = mfc->next)
7bc570c8
YH
109 if (pos-- == 0)
110 return mfc;
111 read_unlock(&mrt_lock);
112
113 it->cache = &mfc_unres_queue;
114 spin_lock_bh(&mfc_unres_lock);
115 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
8b90fc7e
BT
116 if (net_eq(mfc6_net(mfc), net) &&
117 pos-- == 0)
7bc570c8
YH
118 return mfc;
119 spin_unlock_bh(&mfc_unres_lock);
120
121 it->cache = NULL;
122 return NULL;
123}
124
125
126
127
128/*
129 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
130 */
131
132struct ipmr_vif_iter {
8b90fc7e 133 struct seq_net_private p;
7bc570c8
YH
134 int ct;
135};
136
8b90fc7e
BT
137static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
138 struct ipmr_vif_iter *iter,
7bc570c8
YH
139 loff_t pos)
140{
8b90fc7e
BT
141 for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
142 if (!MIF_EXISTS(net, iter->ct))
7bc570c8
YH
143 continue;
144 if (pos-- == 0)
8b90fc7e 145 return &net->ipv6.vif6_table[iter->ct];
7bc570c8
YH
146 }
147 return NULL;
148}
149
150static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
151 __acquires(mrt_lock)
152{
8b90fc7e
BT
153 struct net *net = seq_file_net(seq);
154
7bc570c8 155 read_lock(&mrt_lock);
8b90fc7e
BT
156 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
157 : SEQ_START_TOKEN;
7bc570c8
YH
158}
159
160static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
161{
162 struct ipmr_vif_iter *iter = seq->private;
8b90fc7e 163 struct net *net = seq_file_net(seq);
7bc570c8
YH
164
165 ++*pos;
166 if (v == SEQ_START_TOKEN)
8b90fc7e 167 return ip6mr_vif_seq_idx(net, iter, 0);
7bc570c8 168
8b90fc7e
BT
169 while (++iter->ct < net->ipv6.maxvif) {
170 if (!MIF_EXISTS(net, iter->ct))
7bc570c8 171 continue;
8b90fc7e 172 return &net->ipv6.vif6_table[iter->ct];
7bc570c8
YH
173 }
174 return NULL;
175}
176
177static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
178 __releases(mrt_lock)
179{
180 read_unlock(&mrt_lock);
181}
182
183static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
184{
8b90fc7e
BT
185 struct net *net = seq_file_net(seq);
186
7bc570c8
YH
187 if (v == SEQ_START_TOKEN) {
188 seq_puts(seq,
189 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
190 } else {
191 const struct mif_device *vif = v;
192 const char *name = vif->dev ? vif->dev->name : "none";
193
194 seq_printf(seq,
d430a227 195 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
8b90fc7e 196 vif - net->ipv6.vif6_table,
7bc570c8
YH
197 name, vif->bytes_in, vif->pkt_in,
198 vif->bytes_out, vif->pkt_out,
199 vif->flags);
200 }
201 return 0;
202}
203
204static struct seq_operations ip6mr_vif_seq_ops = {
205 .start = ip6mr_vif_seq_start,
206 .next = ip6mr_vif_seq_next,
207 .stop = ip6mr_vif_seq_stop,
208 .show = ip6mr_vif_seq_show,
209};
210
211static int ip6mr_vif_open(struct inode *inode, struct file *file)
212{
8b90fc7e
BT
213 return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
214 sizeof(struct ipmr_vif_iter));
7bc570c8
YH
215}
216
217static struct file_operations ip6mr_vif_fops = {
218 .owner = THIS_MODULE,
219 .open = ip6mr_vif_open,
220 .read = seq_read,
221 .llseek = seq_lseek,
8b90fc7e 222 .release = seq_release_net,
7bc570c8
YH
223};
224
225static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
226{
8b90fc7e
BT
227 struct net *net = seq_file_net(seq);
228
229 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
230 : SEQ_START_TOKEN;
7bc570c8
YH
231}
232
233static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
234{
235 struct mfc6_cache *mfc = v;
236 struct ipmr_mfc_iter *it = seq->private;
8b90fc7e 237 struct net *net = seq_file_net(seq);
7bc570c8
YH
238
239 ++*pos;
240
241 if (v == SEQ_START_TOKEN)
8b90fc7e 242 return ipmr_mfc_seq_idx(net, seq->private, 0);
7bc570c8
YH
243
244 if (mfc->next)
245 return mfc->next;
246
247 if (it->cache == &mfc_unres_queue)
248 goto end_of_list;
249
8b90fc7e 250 BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
7bc570c8 251
4a6258a0 252 while (++it->ct < MFC6_LINES) {
8b90fc7e 253 mfc = net->ipv6.mfc6_cache_array[it->ct];
7bc570c8
YH
254 if (mfc)
255 return mfc;
256 }
257
258 /* exhausted cache_array, show unresolved */
259 read_unlock(&mrt_lock);
260 it->cache = &mfc_unres_queue;
261 it->ct = 0;
262
263 spin_lock_bh(&mfc_unres_lock);
264 mfc = mfc_unres_queue;
265 if (mfc)
266 return mfc;
267
268 end_of_list:
269 spin_unlock_bh(&mfc_unres_lock);
270 it->cache = NULL;
271
272 return NULL;
273}
274
275static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
276{
277 struct ipmr_mfc_iter *it = seq->private;
8b90fc7e 278 struct net *net = seq_file_net(seq);
7bc570c8
YH
279
280 if (it->cache == &mfc_unres_queue)
281 spin_unlock_bh(&mfc_unres_lock);
8b90fc7e 282 else if (it->cache == net->ipv6.mfc6_cache_array)
7bc570c8
YH
283 read_unlock(&mrt_lock);
284}
285
286static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
287{
288 int n;
8b90fc7e 289 struct net *net = seq_file_net(seq);
7bc570c8
YH
290
291 if (v == SEQ_START_TOKEN) {
292 seq_puts(seq,
293 "Group "
294 "Origin "
295 "Iif Pkts Bytes Wrong Oifs\n");
296 } else {
297 const struct mfc6_cache *mfc = v;
298 const struct ipmr_mfc_iter *it = seq->private;
299
999890b2 300 seq_printf(seq, "%pI6 %pI6 %-3hd",
0c6ce78a 301 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
1ea472e2 302 mfc->mf6c_parent);
7bc570c8
YH
303
304 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
305 seq_printf(seq, " %8lu %8lu %8lu",
306 mfc->mfc_un.res.pkt,
307 mfc->mfc_un.res.bytes,
308 mfc->mfc_un.res.wrong_if);
7bc570c8
YH
309 for (n = mfc->mfc_un.res.minvif;
310 n < mfc->mfc_un.res.maxvif; n++) {
8b90fc7e 311 if (MIF_EXISTS(net, n) &&
7bc570c8
YH
312 mfc->mfc_un.res.ttls[n] < 255)
313 seq_printf(seq,
314 " %2d:%-3d",
315 n, mfc->mfc_un.res.ttls[n]);
316 }
1ea472e2
BT
317 } else {
318 /* unresolved mfc_caches don't contain
319 * pkt, bytes and wrong_if values
320 */
321 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
7bc570c8
YH
322 }
323 seq_putc(seq, '\n');
324 }
325 return 0;
326}
327
328static struct seq_operations ipmr_mfc_seq_ops = {
329 .start = ipmr_mfc_seq_start,
330 .next = ipmr_mfc_seq_next,
331 .stop = ipmr_mfc_seq_stop,
332 .show = ipmr_mfc_seq_show,
333};
334
335static int ipmr_mfc_open(struct inode *inode, struct file *file)
336{
8b90fc7e
BT
337 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
338 sizeof(struct ipmr_mfc_iter));
7bc570c8
YH
339}
340
341static struct file_operations ip6mr_mfc_fops = {
342 .owner = THIS_MODULE,
343 .open = ipmr_mfc_open,
344 .read = seq_read,
345 .llseek = seq_lseek,
8b90fc7e 346 .release = seq_release_net,
7bc570c8
YH
347};
348#endif
349
14fb64e1 350#ifdef CONFIG_IPV6_PIMSM_V2
14fb64e1
YH
351
352static int pim6_rcv(struct sk_buff *skb)
353{
354 struct pimreghdr *pim;
355 struct ipv6hdr *encap;
356 struct net_device *reg_dev = NULL;
950d5704 357 int reg_vif_num = init_net.ipv6.mroute_reg_vif_num;
14fb64e1
YH
358
359 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
360 goto drop;
361
362 pim = (struct pimreghdr *)skb_transport_header(skb);
363 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
364 (pim->flags & PIM_NULL_REGISTER) ||
365 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
ec6b486f 366 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
14fb64e1
YH
367 goto drop;
368
369 /* check if the inner packet is destined to mcast group */
370 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
371 sizeof(*pim));
372
373 if (!ipv6_addr_is_multicast(&encap->daddr) ||
374 encap->payload_len == 0 ||
375 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
376 goto drop;
377
378 read_lock(&mrt_lock);
379 if (reg_vif_num >= 0)
4e16880c 380 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
14fb64e1
YH
381 if (reg_dev)
382 dev_hold(reg_dev);
383 read_unlock(&mrt_lock);
384
385 if (reg_dev == NULL)
386 goto drop;
387
388 skb->mac_header = skb->network_header;
389 skb_pull(skb, (u8 *)encap - skb->data);
390 skb_reset_network_header(skb);
391 skb->dev = reg_dev;
392 skb->protocol = htons(ETH_P_IP);
393 skb->ip_summed = 0;
394 skb->pkt_type = PACKET_HOST;
395 dst_release(skb->dst);
dc58c78c
PE
396 reg_dev->stats.rx_bytes += skb->len;
397 reg_dev->stats.rx_packets++;
14fb64e1
YH
398 skb->dst = NULL;
399 nf_reset(skb);
400 netif_rx(skb);
401 dev_put(reg_dev);
402 return 0;
403 drop:
404 kfree_skb(skb);
405 return 0;
406}
407
408static struct inet6_protocol pim6_protocol = {
409 .handler = pim6_rcv,
410};
411
412/* Service routines creating virtual interfaces: PIMREG */
413
414static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
415{
416 read_lock(&mrt_lock);
dc58c78c
PE
417 dev->stats.tx_bytes += skb->len;
418 dev->stats.tx_packets++;
950d5704 419 ip6mr_cache_report(skb, init_net.ipv6.mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
14fb64e1
YH
420 read_unlock(&mrt_lock);
421 kfree_skb(skb);
422 return 0;
423}
424
007c3838
SH
425static const struct net_device_ops reg_vif_netdev_ops = {
426 .ndo_start_xmit = reg_vif_xmit,
427};
428
14fb64e1
YH
429static void reg_vif_setup(struct net_device *dev)
430{
431 dev->type = ARPHRD_PIMREG;
432 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
433 dev->flags = IFF_NOARP;
007c3838 434 dev->netdev_ops = &reg_vif_netdev_ops;
14fb64e1
YH
435 dev->destructor = free_netdev;
436}
437
438static struct net_device *ip6mr_reg_vif(void)
439{
440 struct net_device *dev;
14fb64e1 441
dc58c78c 442 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
14fb64e1
YH
443 if (dev == NULL)
444 return NULL;
445
446 if (register_netdevice(dev)) {
447 free_netdev(dev);
448 return NULL;
449 }
450 dev->iflink = 0;
451
14fb64e1
YH
452 if (dev_open(dev))
453 goto failure;
454
7af3db78 455 dev_hold(dev);
14fb64e1
YH
456 return dev;
457
458failure:
459 /* allow the register to be completed before unregistering. */
460 rtnl_unlock();
461 rtnl_lock();
462
463 unregister_netdevice(dev);
464 return NULL;
465}
466#endif
467
7bc570c8
YH
468/*
469 * Delete a VIF entry
470 */
471
472static int mif6_delete(int vifi)
473{
474 struct mif_device *v;
475 struct net_device *dev;
4e16880c 476 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
7bc570c8
YH
477 return -EADDRNOTAVAIL;
478
4e16880c 479 v = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
480
481 write_lock_bh(&mrt_lock);
482 dev = v->dev;
483 v->dev = NULL;
484
485 if (!dev) {
486 write_unlock_bh(&mrt_lock);
487 return -EADDRNOTAVAIL;
488 }
489
14fb64e1 490#ifdef CONFIG_IPV6_PIMSM_V2
950d5704
BT
491 if (vifi == init_net.ipv6.mroute_reg_vif_num)
492 init_net.ipv6.mroute_reg_vif_num = -1;
14fb64e1
YH
493#endif
494
4e16880c 495 if (vifi + 1 == init_net.ipv6.maxvif) {
7bc570c8
YH
496 int tmp;
497 for (tmp = vifi - 1; tmp >= 0; tmp--) {
4e16880c 498 if (MIF_EXISTS(&init_net, tmp))
7bc570c8
YH
499 break;
500 }
4e16880c 501 init_net.ipv6.maxvif = tmp + 1;
7bc570c8
YH
502 }
503
504 write_unlock_bh(&mrt_lock);
505
506 dev_set_allmulti(dev, -1);
507
508 if (v->flags & MIFF_REGISTER)
509 unregister_netdevice(dev);
510
511 dev_put(dev);
512 return 0;
513}
514
58701ad4
BT
515static inline void ip6mr_cache_free(struct mfc6_cache *c)
516{
517 release_net(mfc6_net(c));
518 kmem_cache_free(mrt_cachep, c);
519}
520
7bc570c8
YH
521/* Destroy an unresolved cache entry, killing queued skbs
522 and reporting error to netlink readers.
523 */
524
525static void ip6mr_destroy_unres(struct mfc6_cache *c)
526{
527 struct sk_buff *skb;
528
4045e57c 529 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
7bc570c8
YH
530
531 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
532 if (ipv6_hdr(skb)->version == 0) {
533 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
534 nlh->nlmsg_type = NLMSG_ERROR;
535 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
536 skb_trim(skb, nlh->nlmsg_len);
537 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
538 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
539 } else
540 kfree_skb(skb);
541 }
542
58701ad4 543 ip6mr_cache_free(c);
7bc570c8
YH
544}
545
546
547/* Single timer process for all the unresolved queue. */
548
549static void ipmr_do_expire_process(unsigned long dummy)
550{
551 unsigned long now = jiffies;
552 unsigned long expires = 10 * HZ;
553 struct mfc6_cache *c, **cp;
554
555 cp = &mfc_unres_queue;
556
557 while ((c = *cp) != NULL) {
558 if (time_after(c->mfc_un.unres.expires, now)) {
559 /* not yet... */
560 unsigned long interval = c->mfc_un.unres.expires - now;
561 if (interval < expires)
562 expires = interval;
563 cp = &c->next;
564 continue;
565 }
566
567 *cp = c->next;
568 ip6mr_destroy_unres(c);
569 }
570
4045e57c 571 if (mfc_unres_queue != NULL)
7bc570c8
YH
572 mod_timer(&ipmr_expire_timer, jiffies + expires);
573}
574
575static void ipmr_expire_process(unsigned long dummy)
576{
577 if (!spin_trylock(&mfc_unres_lock)) {
578 mod_timer(&ipmr_expire_timer, jiffies + 1);
579 return;
580 }
581
4045e57c 582 if (mfc_unres_queue != NULL)
7bc570c8
YH
583 ipmr_do_expire_process(dummy);
584
585 spin_unlock(&mfc_unres_lock);
586}
587
588/* Fill oifs list. It is called under write locked mrt_lock. */
589
590static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
591{
592 int vifi;
593
6ac7eb08 594 cache->mfc_un.res.minvif = MAXMIFS;
7bc570c8 595 cache->mfc_un.res.maxvif = 0;
6ac7eb08 596 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
7bc570c8 597
4e16880c
BT
598 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
599 if (MIF_EXISTS(&init_net, vifi) &&
600 ttls[vifi] && ttls[vifi] < 255) {
7bc570c8
YH
601 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
602 if (cache->mfc_un.res.minvif > vifi)
603 cache->mfc_un.res.minvif = vifi;
604 if (cache->mfc_un.res.maxvif <= vifi)
605 cache->mfc_un.res.maxvif = vifi + 1;
606 }
607 }
608}
609
610static int mif6_add(struct mif6ctl *vifc, int mrtsock)
611{
612 int vifi = vifc->mif6c_mifi;
4e16880c 613 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
7bc570c8 614 struct net_device *dev;
5ae7b444 615 int err;
7bc570c8
YH
616
617 /* Is vif busy ? */
4e16880c 618 if (MIF_EXISTS(&init_net, vifi))
7bc570c8
YH
619 return -EADDRINUSE;
620
621 switch (vifc->mif6c_flags) {
14fb64e1
YH
622#ifdef CONFIG_IPV6_PIMSM_V2
623 case MIFF_REGISTER:
624 /*
625 * Special Purpose VIF in PIM
626 * All the packets will be sent to the daemon
627 */
950d5704 628 if (init_net.ipv6.mroute_reg_vif_num >= 0)
14fb64e1
YH
629 return -EADDRINUSE;
630 dev = ip6mr_reg_vif();
631 if (!dev)
632 return -ENOBUFS;
5ae7b444
WC
633 err = dev_set_allmulti(dev, 1);
634 if (err) {
635 unregister_netdevice(dev);
7af3db78 636 dev_put(dev);
5ae7b444
WC
637 return err;
638 }
14fb64e1
YH
639 break;
640#endif
7bc570c8
YH
641 case 0:
642 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
643 if (!dev)
644 return -EADDRNOTAVAIL;
5ae7b444 645 err = dev_set_allmulti(dev, 1);
7af3db78
WC
646 if (err) {
647 dev_put(dev);
5ae7b444 648 return err;
7af3db78 649 }
7bc570c8
YH
650 break;
651 default:
652 return -EINVAL;
653 }
654
7bc570c8
YH
655 /*
656 * Fill in the VIF structures
657 */
658 v->rate_limit = vifc->vifc_rate_limit;
659 v->flags = vifc->mif6c_flags;
660 if (!mrtsock)
661 v->flags |= VIFF_STATIC;
662 v->threshold = vifc->vifc_threshold;
663 v->bytes_in = 0;
664 v->bytes_out = 0;
665 v->pkt_in = 0;
666 v->pkt_out = 0;
667 v->link = dev->ifindex;
668 if (v->flags & MIFF_REGISTER)
669 v->link = dev->iflink;
670
671 /* And finish update writing critical data */
672 write_lock_bh(&mrt_lock);
7bc570c8 673 v->dev = dev;
14fb64e1
YH
674#ifdef CONFIG_IPV6_PIMSM_V2
675 if (v->flags & MIFF_REGISTER)
950d5704 676 init_net.ipv6.mroute_reg_vif_num = vifi;
14fb64e1 677#endif
4e16880c
BT
678 if (vifi + 1 > init_net.ipv6.maxvif)
679 init_net.ipv6.maxvif = vifi + 1;
7bc570c8
YH
680 write_unlock_bh(&mrt_lock);
681 return 0;
682}
683
684static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
685{
686 int line = MFC6_HASH(mcastgrp, origin);
687 struct mfc6_cache *c;
688
4a6258a0 689 for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
7bc570c8
YH
690 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
691 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
692 break;
693 }
694 return c;
695}
696
697/*
698 * Allocate a multicast cache entry
699 */
58701ad4 700static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
7bc570c8 701{
36cbac59 702 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
7bc570c8
YH
703 if (c == NULL)
704 return NULL;
6ac7eb08 705 c->mfc_un.res.minvif = MAXMIFS;
58701ad4 706 mfc6_net_set(c, net);
7bc570c8
YH
707 return c;
708}
709
58701ad4 710static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
7bc570c8 711{
36cbac59 712 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
7bc570c8
YH
713 if (c == NULL)
714 return NULL;
7bc570c8
YH
715 skb_queue_head_init(&c->mfc_un.unres.unresolved);
716 c->mfc_un.unres.expires = jiffies + 10 * HZ;
58701ad4 717 mfc6_net_set(c, net);
7bc570c8
YH
718 return c;
719}
720
721/*
722 * A cache entry has gone into a resolved state from queued
723 */
724
725static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
726{
727 struct sk_buff *skb;
728
729 /*
730 * Play the pending entries through our router
731 */
732
733 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
734 if (ipv6_hdr(skb)->version == 0) {
735 int err;
736 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
737
738 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
549e028d 739 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
7bc570c8
YH
740 } else {
741 nlh->nlmsg_type = NLMSG_ERROR;
742 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
743 skb_trim(skb, nlh->nlmsg_len);
744 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
745 }
746 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
747 } else
748 ip6_mr_forward(skb, c);
749 }
750}
751
752/*
753 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
754 * expects the following bizarre scheme.
755 *
756 * Called under mrt_lock.
757 */
758
6ac7eb08 759static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
7bc570c8
YH
760{
761 struct sk_buff *skb;
762 struct mrt6msg *msg;
763 int ret;
764
14fb64e1
YH
765#ifdef CONFIG_IPV6_PIMSM_V2
766 if (assert == MRT6MSG_WHOLEPKT)
767 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
768 +sizeof(*msg));
769 else
770#endif
771 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
772
773 if (!skb)
774 return -ENOBUFS;
775
776 /* I suppose that internal messages
777 * do not require checksums */
778
779 skb->ip_summed = CHECKSUM_UNNECESSARY;
780
14fb64e1
YH
781#ifdef CONFIG_IPV6_PIMSM_V2
782 if (assert == MRT6MSG_WHOLEPKT) {
783 /* Ugly, but we have no choice with this interface.
784 Duplicate old header, fix length etc.
785 And all this only to mangle msg->im6_msgtype and
786 to set msg->im6_mbz to "mbz" :-)
787 */
788 skb_push(skb, -skb_network_offset(pkt));
789
790 skb_push(skb, sizeof(*msg));
791 skb_reset_transport_header(skb);
792 msg = (struct mrt6msg *)skb_transport_header(skb);
793 msg->im6_mbz = 0;
794 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
950d5704 795 msg->im6_mif = init_net.ipv6.mroute_reg_vif_num;
14fb64e1
YH
796 msg->im6_pad = 0;
797 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
798 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
799
800 skb->ip_summed = CHECKSUM_UNNECESSARY;
801 } else
802#endif
803 {
7bc570c8
YH
804 /*
805 * Copy the IP header
806 */
807
808 skb_put(skb, sizeof(struct ipv6hdr));
809 skb_reset_network_header(skb);
810 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
811
812 /*
813 * Add our header
814 */
815 skb_put(skb, sizeof(*msg));
816 skb_reset_transport_header(skb);
817 msg = (struct mrt6msg *)skb_transport_header(skb);
818
819 msg->im6_mbz = 0;
820 msg->im6_msgtype = assert;
6ac7eb08 821 msg->im6_mif = mifi;
7bc570c8
YH
822 msg->im6_pad = 0;
823 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
824 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
825
826 skb->dst = dst_clone(pkt->dst);
827 skb->ip_summed = CHECKSUM_UNNECESSARY;
828
829 skb_pull(skb, sizeof(struct ipv6hdr));
14fb64e1 830 }
7bc570c8 831
bd91b8bf 832 if (init_net.ipv6.mroute6_sk == NULL) {
7bc570c8
YH
833 kfree_skb(skb);
834 return -EINVAL;
835 }
836
837 /*
838 * Deliver to user space multicast routing algorithms
839 */
bd91b8bf
BT
840 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
841 if (ret < 0) {
7bc570c8
YH
842 if (net_ratelimit())
843 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
844 kfree_skb(skb);
845 }
846
847 return ret;
848}
849
850/*
851 * Queue a packet for resolution. It gets locked cache entry!
852 */
853
854static int
6ac7eb08 855ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
7bc570c8
YH
856{
857 int err;
858 struct mfc6_cache *c;
859
860 spin_lock_bh(&mfc_unres_lock);
861 for (c = mfc_unres_queue; c; c = c->next) {
4045e57c
BT
862 if (net_eq(mfc6_net(c), &init_net) &&
863 ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
7bc570c8
YH
864 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
865 break;
866 }
867
868 if (c == NULL) {
869 /*
870 * Create a new entry if allowable
871 */
872
4045e57c 873 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 ||
58701ad4 874 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
7bc570c8
YH
875 spin_unlock_bh(&mfc_unres_lock);
876
877 kfree_skb(skb);
878 return -ENOBUFS;
879 }
880
881 /*
882 * Fill in the new cache entry
883 */
884 c->mf6c_parent = -1;
885 c->mf6c_origin = ipv6_hdr(skb)->saddr;
886 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
887
888 /*
889 * Reflect first query at pim6sd
890 */
6ac7eb08 891 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
7bc570c8
YH
892 /* If the report failed throw the cache entry
893 out - Brad Parker
894 */
895 spin_unlock_bh(&mfc_unres_lock);
896
58701ad4 897 ip6mr_cache_free(c);
7bc570c8
YH
898 kfree_skb(skb);
899 return err;
900 }
901
4045e57c 902 atomic_inc(&init_net.ipv6.cache_resolve_queue_len);
7bc570c8
YH
903 c->next = mfc_unres_queue;
904 mfc_unres_queue = c;
905
906 ipmr_do_expire_process(1);
907 }
908
909 /*
910 * See if we can append the packet
911 */
912 if (c->mfc_un.unres.unresolved.qlen > 3) {
913 kfree_skb(skb);
914 err = -ENOBUFS;
915 } else {
916 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
917 err = 0;
918 }
919
920 spin_unlock_bh(&mfc_unres_lock);
921 return err;
922}
923
924/*
925 * MFC6 cache manipulation by user space
926 */
927
928static int ip6mr_mfc_delete(struct mf6cctl *mfc)
929{
930 int line;
931 struct mfc6_cache *c, **cp;
932
933 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
934
4a6258a0
BT
935 for (cp = &init_net.ipv6.mfc6_cache_array[line];
936 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
937 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
938 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
939 write_lock_bh(&mrt_lock);
940 *cp = c->next;
941 write_unlock_bh(&mrt_lock);
942
58701ad4 943 ip6mr_cache_free(c);
7bc570c8
YH
944 return 0;
945 }
946 }
947 return -ENOENT;
948}
949
950static int ip6mr_device_event(struct notifier_block *this,
951 unsigned long event, void *ptr)
952{
953 struct net_device *dev = ptr;
954 struct mif_device *v;
955 int ct;
956
721499e8 957 if (!net_eq(dev_net(dev), &init_net))
7bc570c8
YH
958 return NOTIFY_DONE;
959
960 if (event != NETDEV_UNREGISTER)
961 return NOTIFY_DONE;
962
4e16880c
BT
963 v = &init_net.ipv6.vif6_table[0];
964 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
7bc570c8
YH
965 if (v->dev == dev)
966 mif6_delete(ct);
967 }
968 return NOTIFY_DONE;
969}
970
971static struct notifier_block ip6_mr_notifier = {
972 .notifier_call = ip6mr_device_event
973};
974
975/*
976 * Setup for IP multicast routing
977 */
978
4e16880c
BT
979static int __net_init ip6mr_net_init(struct net *net)
980{
981 int err = 0;
4e16880c
BT
982 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
983 GFP_KERNEL);
984 if (!net->ipv6.vif6_table) {
985 err = -ENOMEM;
986 goto fail;
987 }
4a6258a0
BT
988
989 /* Forwarding cache */
990 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
991 sizeof(struct mfc6_cache *),
992 GFP_KERNEL);
993 if (!net->ipv6.mfc6_cache_array) {
994 err = -ENOMEM;
995 goto fail_mfc6_cache;
996 }
950d5704
BT
997
998#ifdef CONFIG_IPV6_PIMSM_V2
999 net->ipv6.mroute_reg_vif_num = -1;
1000#endif
8b90fc7e
BT
1001
1002#ifdef CONFIG_PROC_FS
1003 err = -ENOMEM;
1004 if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1005 goto proc_vif_fail;
1006 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1007 goto proc_cache_fail;
1008#endif
4a6258a0
BT
1009 return 0;
1010
8b90fc7e
BT
1011#ifdef CONFIG_PROC_FS
1012proc_cache_fail:
1013 proc_net_remove(net, "ip6_mr_vif");
1014proc_vif_fail:
1015 kfree(net->ipv6.mfc6_cache_array);
1016#endif
4a6258a0
BT
1017fail_mfc6_cache:
1018 kfree(net->ipv6.vif6_table);
4e16880c
BT
1019fail:
1020 return err;
1021}
1022
1023static void __net_exit ip6mr_net_exit(struct net *net)
1024{
8b90fc7e
BT
1025#ifdef CONFIG_PROC_FS
1026 proc_net_remove(net, "ip6_mr_cache");
1027 proc_net_remove(net, "ip6_mr_vif");
1028#endif
4a6258a0 1029 kfree(net->ipv6.mfc6_cache_array);
4e16880c
BT
1030 kfree(net->ipv6.vif6_table);
1031}
1032
1033static struct pernet_operations ip6mr_net_ops = {
1034 .init = ip6mr_net_init,
1035 .exit = ip6mr_net_exit,
1036};
1037
623d1a1a 1038int __init ip6_mr_init(void)
7bc570c8 1039{
623d1a1a
WC
1040 int err;
1041
7bc570c8
YH
1042 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1043 sizeof(struct mfc6_cache),
1044 0, SLAB_HWCACHE_ALIGN,
1045 NULL);
1046 if (!mrt_cachep)
623d1a1a 1047 return -ENOMEM;
7bc570c8 1048
4e16880c
BT
1049 err = register_pernet_subsys(&ip6mr_net_ops);
1050 if (err)
1051 goto reg_pernet_fail;
1052
7bc570c8 1053 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
623d1a1a
WC
1054 err = register_netdevice_notifier(&ip6_mr_notifier);
1055 if (err)
1056 goto reg_notif_fail;
623d1a1a 1057 return 0;
87b30a65
BT
1058reg_notif_fail:
1059 del_timer(&ipmr_expire_timer);
4e16880c
BT
1060 unregister_pernet_subsys(&ip6mr_net_ops);
1061reg_pernet_fail:
87b30a65 1062 kmem_cache_destroy(mrt_cachep);
623d1a1a 1063 return err;
7bc570c8
YH
1064}
1065
623d1a1a
WC
1066void ip6_mr_cleanup(void)
1067{
623d1a1a
WC
1068 unregister_netdevice_notifier(&ip6_mr_notifier);
1069 del_timer(&ipmr_expire_timer);
4e16880c 1070 unregister_pernet_subsys(&ip6mr_net_ops);
623d1a1a
WC
1071 kmem_cache_destroy(mrt_cachep);
1072}
7bc570c8
YH
1073
1074static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1075{
1076 int line;
1077 struct mfc6_cache *uc, *c, **cp;
6ac7eb08 1078 unsigned char ttls[MAXMIFS];
7bc570c8
YH
1079 int i;
1080
6ac7eb08
RR
1081 memset(ttls, 255, MAXMIFS);
1082 for (i = 0; i < MAXMIFS; i++) {
7bc570c8
YH
1083 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1084 ttls[i] = 1;
1085
1086 }
1087
1088 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1089
4a6258a0
BT
1090 for (cp = &init_net.ipv6.mfc6_cache_array[line];
1091 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
1092 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1093 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1094 break;
1095 }
1096
1097 if (c != NULL) {
1098 write_lock_bh(&mrt_lock);
1099 c->mf6c_parent = mfc->mf6cc_parent;
1100 ip6mr_update_thresholds(c, ttls);
1101 if (!mrtsock)
1102 c->mfc_flags |= MFC_STATIC;
1103 write_unlock_bh(&mrt_lock);
1104 return 0;
1105 }
1106
1107 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1108 return -EINVAL;
1109
58701ad4 1110 c = ip6mr_cache_alloc(&init_net);
7bc570c8
YH
1111 if (c == NULL)
1112 return -ENOMEM;
1113
1114 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1115 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1116 c->mf6c_parent = mfc->mf6cc_parent;
1117 ip6mr_update_thresholds(c, ttls);
1118 if (!mrtsock)
1119 c->mfc_flags |= MFC_STATIC;
1120
1121 write_lock_bh(&mrt_lock);
4a6258a0
BT
1122 c->next = init_net.ipv6.mfc6_cache_array[line];
1123 init_net.ipv6.mfc6_cache_array[line] = c;
7bc570c8
YH
1124 write_unlock_bh(&mrt_lock);
1125
1126 /*
1127 * Check to see if we resolved a queued list. If so we
1128 * need to send on the frames and tidy up.
1129 */
1130 spin_lock_bh(&mfc_unres_lock);
1131 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1132 cp = &uc->next) {
4045e57c
BT
1133 if (net_eq(mfc6_net(uc), &init_net) &&
1134 ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
7bc570c8
YH
1135 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1136 *cp = uc->next;
4045e57c 1137 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
7bc570c8
YH
1138 break;
1139 }
1140 }
4045e57c
BT
1141 if (mfc_unres_queue == NULL)
1142 del_timer(&ipmr_expire_timer);
7bc570c8
YH
1143 spin_unlock_bh(&mfc_unres_lock);
1144
1145 if (uc) {
1146 ip6mr_cache_resolve(uc, c);
58701ad4 1147 ip6mr_cache_free(uc);
7bc570c8
YH
1148 }
1149 return 0;
1150}
1151
1152/*
1153 * Close the multicast socket, and clear the vif tables etc
1154 */
1155
1156static void mroute_clean_tables(struct sock *sk)
1157{
1158 int i;
1159
1160 /*
1161 * Shut down all active vif entries
1162 */
4e16880c
BT
1163 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1164 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
7bc570c8
YH
1165 mif6_delete(i);
1166 }
1167
1168 /*
1169 * Wipe the cache
1170 */
4a6258a0 1171 for (i = 0; i < MFC6_LINES; i++) {
7bc570c8
YH
1172 struct mfc6_cache *c, **cp;
1173
4a6258a0 1174 cp = &init_net.ipv6.mfc6_cache_array[i];
7bc570c8
YH
1175 while ((c = *cp) != NULL) {
1176 if (c->mfc_flags & MFC_STATIC) {
1177 cp = &c->next;
1178 continue;
1179 }
1180 write_lock_bh(&mrt_lock);
1181 *cp = c->next;
1182 write_unlock_bh(&mrt_lock);
1183
58701ad4 1184 ip6mr_cache_free(c);
7bc570c8
YH
1185 }
1186 }
1187
4045e57c
BT
1188 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) {
1189 struct mfc6_cache *c, **cp;
7bc570c8
YH
1190
1191 spin_lock_bh(&mfc_unres_lock);
4045e57c
BT
1192 cp = &mfc_unres_queue;
1193 while ((c = *cp) != NULL) {
1194 if (!net_eq(mfc6_net(c), &init_net)) {
1195 cp = &c->next;
1196 continue;
1197 }
1198 *cp = c->next;
7bc570c8 1199 ip6mr_destroy_unres(c);
7bc570c8
YH
1200 }
1201 spin_unlock_bh(&mfc_unres_lock);
1202 }
1203}
1204
1205static int ip6mr_sk_init(struct sock *sk)
1206{
1207 int err = 0;
1208
1209 rtnl_lock();
1210 write_lock_bh(&mrt_lock);
bd91b8bf
BT
1211 if (likely(init_net.ipv6.mroute6_sk == NULL))
1212 init_net.ipv6.mroute6_sk = sk;
7bc570c8
YH
1213 else
1214 err = -EADDRINUSE;
1215 write_unlock_bh(&mrt_lock);
1216
1217 rtnl_unlock();
1218
1219 return err;
1220}
1221
1222int ip6mr_sk_done(struct sock *sk)
1223{
1224 int err = 0;
1225
1226 rtnl_lock();
bd91b8bf 1227 if (sk == init_net.ipv6.mroute6_sk) {
7bc570c8 1228 write_lock_bh(&mrt_lock);
bd91b8bf 1229 init_net.ipv6.mroute6_sk = NULL;
7bc570c8
YH
1230 write_unlock_bh(&mrt_lock);
1231
1232 mroute_clean_tables(sk);
1233 } else
1234 err = -EACCES;
1235 rtnl_unlock();
1236
1237 return err;
1238}
1239
1240/*
1241 * Socket options and virtual interface manipulation. The whole
1242 * virtual interface system is a complete heap, but unfortunately
1243 * that's how BSD mrouted happens to think. Maybe one day with a proper
1244 * MOSPF/PIM router set up we can clean this up.
1245 */
1246
1247int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1248{
1249 int ret;
1250 struct mif6ctl vif;
1251 struct mf6cctl mfc;
1252 mifi_t mifi;
1253
1254 if (optname != MRT6_INIT) {
bd91b8bf 1255 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
7bc570c8
YH
1256 return -EACCES;
1257 }
1258
1259 switch (optname) {
1260 case MRT6_INIT:
1261 if (sk->sk_type != SOCK_RAW ||
1262 inet_sk(sk)->num != IPPROTO_ICMPV6)
1263 return -EOPNOTSUPP;
1264 if (optlen < sizeof(int))
1265 return -EINVAL;
1266
1267 return ip6mr_sk_init(sk);
1268
1269 case MRT6_DONE:
1270 return ip6mr_sk_done(sk);
1271
1272 case MRT6_ADD_MIF:
1273 if (optlen < sizeof(vif))
1274 return -EINVAL;
1275 if (copy_from_user(&vif, optval, sizeof(vif)))
1276 return -EFAULT;
6ac7eb08 1277 if (vif.mif6c_mifi >= MAXMIFS)
7bc570c8
YH
1278 return -ENFILE;
1279 rtnl_lock();
bd91b8bf 1280 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1281 rtnl_unlock();
1282 return ret;
1283
1284 case MRT6_DEL_MIF:
1285 if (optlen < sizeof(mifi_t))
1286 return -EINVAL;
1287 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1288 return -EFAULT;
1289 rtnl_lock();
1290 ret = mif6_delete(mifi);
1291 rtnl_unlock();
1292 return ret;
1293
1294 /*
1295 * Manipulate the forwarding caches. These live
1296 * in a sort of kernel/user symbiosis.
1297 */
1298 case MRT6_ADD_MFC:
1299 case MRT6_DEL_MFC:
1300 if (optlen < sizeof(mfc))
1301 return -EINVAL;
1302 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1303 return -EFAULT;
1304 rtnl_lock();
1305 if (optname == MRT6_DEL_MFC)
1306 ret = ip6mr_mfc_delete(&mfc);
1307 else
bd91b8bf 1308 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1309 rtnl_unlock();
1310 return ret;
1311
14fb64e1
YH
1312 /*
1313 * Control PIM assert (to activate pim will activate assert)
1314 */
1315 case MRT6_ASSERT:
1316 {
1317 int v;
1318 if (get_user(v, (int __user *)optval))
1319 return -EFAULT;
a21f3f99 1320 init_net.ipv6.mroute_do_assert = !!v;
14fb64e1
YH
1321 return 0;
1322 }
1323
1324#ifdef CONFIG_IPV6_PIMSM_V2
1325 case MRT6_PIM:
1326 {
a9f83bf3 1327 int v;
14fb64e1
YH
1328 if (get_user(v, (int __user *)optval))
1329 return -EFAULT;
1330 v = !!v;
1331 rtnl_lock();
1332 ret = 0;
a21f3f99
BT
1333 if (v != init_net.ipv6.mroute_do_pim) {
1334 init_net.ipv6.mroute_do_pim = v;
1335 init_net.ipv6.mroute_do_assert = v;
1336 if (init_net.ipv6.mroute_do_pim)
14fb64e1
YH
1337 ret = inet6_add_protocol(&pim6_protocol,
1338 IPPROTO_PIM);
1339 else
1340 ret = inet6_del_protocol(&pim6_protocol,
1341 IPPROTO_PIM);
1342 if (ret < 0)
1343 ret = -EAGAIN;
1344 }
1345 rtnl_unlock();
1346 return ret;
1347 }
1348
1349#endif
7bc570c8 1350 /*
7d120c55 1351 * Spurious command, or MRT6_VERSION which you cannot
7bc570c8
YH
1352 * set.
1353 */
1354 default:
1355 return -ENOPROTOOPT;
1356 }
1357}
1358
1359/*
1360 * Getsock opt support for the multicast routing system.
1361 */
1362
1363int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1364 int __user *optlen)
1365{
1366 int olr;
1367 int val;
1368
1369 switch (optname) {
1370 case MRT6_VERSION:
1371 val = 0x0305;
1372 break;
14fb64e1
YH
1373#ifdef CONFIG_IPV6_PIMSM_V2
1374 case MRT6_PIM:
a21f3f99 1375 val = init_net.ipv6.mroute_do_pim;
14fb64e1
YH
1376 break;
1377#endif
1378 case MRT6_ASSERT:
a21f3f99 1379 val = init_net.ipv6.mroute_do_assert;
14fb64e1 1380 break;
7bc570c8
YH
1381 default:
1382 return -ENOPROTOOPT;
1383 }
1384
1385 if (get_user(olr, optlen))
1386 return -EFAULT;
1387
1388 olr = min_t(int, olr, sizeof(int));
1389 if (olr < 0)
1390 return -EINVAL;
1391
1392 if (put_user(olr, optlen))
1393 return -EFAULT;
1394 if (copy_to_user(optval, &val, olr))
1395 return -EFAULT;
1396 return 0;
1397}
1398
1399/*
1400 * The IP multicast ioctl support routines.
1401 */
1402
1403int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1404{
1405 struct sioc_sg_req6 sr;
1406 struct sioc_mif_req6 vr;
1407 struct mif_device *vif;
1408 struct mfc6_cache *c;
1409
1410 switch (cmd) {
1411 case SIOCGETMIFCNT_IN6:
1412 if (copy_from_user(&vr, arg, sizeof(vr)))
1413 return -EFAULT;
4e16880c 1414 if (vr.mifi >= init_net.ipv6.maxvif)
7bc570c8
YH
1415 return -EINVAL;
1416 read_lock(&mrt_lock);
4e16880c
BT
1417 vif = &init_net.ipv6.vif6_table[vr.mifi];
1418 if (MIF_EXISTS(&init_net, vr.mifi)) {
7bc570c8
YH
1419 vr.icount = vif->pkt_in;
1420 vr.ocount = vif->pkt_out;
1421 vr.ibytes = vif->bytes_in;
1422 vr.obytes = vif->bytes_out;
1423 read_unlock(&mrt_lock);
1424
1425 if (copy_to_user(arg, &vr, sizeof(vr)))
1426 return -EFAULT;
1427 return 0;
1428 }
1429 read_unlock(&mrt_lock);
1430 return -EADDRNOTAVAIL;
1431 case SIOCGETSGCNT_IN6:
1432 if (copy_from_user(&sr, arg, sizeof(sr)))
1433 return -EFAULT;
1434
1435 read_lock(&mrt_lock);
1436 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1437 if (c) {
1438 sr.pktcnt = c->mfc_un.res.pkt;
1439 sr.bytecnt = c->mfc_un.res.bytes;
1440 sr.wrong_if = c->mfc_un.res.wrong_if;
1441 read_unlock(&mrt_lock);
1442
1443 if (copy_to_user(arg, &sr, sizeof(sr)))
1444 return -EFAULT;
1445 return 0;
1446 }
1447 read_unlock(&mrt_lock);
1448 return -EADDRNOTAVAIL;
1449 default:
1450 return -ENOIOCTLCMD;
1451 }
1452}
1453
1454
1455static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1456{
483a47d2
DL
1457 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1458 IPSTATS_MIB_OUTFORWDATAGRAMS);
7bc570c8
YH
1459 return dst_output(skb);
1460}
1461
1462/*
1463 * Processing handlers for ip6mr_forward
1464 */
1465
1466static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1467{
1468 struct ipv6hdr *ipv6h;
4e16880c 1469 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
1470 struct net_device *dev;
1471 struct dst_entry *dst;
1472 struct flowi fl;
1473
1474 if (vif->dev == NULL)
1475 goto out_free;
1476
14fb64e1
YH
1477#ifdef CONFIG_IPV6_PIMSM_V2
1478 if (vif->flags & MIFF_REGISTER) {
1479 vif->pkt_out++;
1480 vif->bytes_out += skb->len;
dc58c78c
PE
1481 vif->dev->stats.tx_bytes += skb->len;
1482 vif->dev->stats.tx_packets++;
14fb64e1
YH
1483 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1484 kfree_skb(skb);
1485 return 0;
1486 }
1487#endif
1488
7bc570c8
YH
1489 ipv6h = ipv6_hdr(skb);
1490
1491 fl = (struct flowi) {
1492 .oif = vif->link,
1493 .nl_u = { .ip6_u =
1494 { .daddr = ipv6h->daddr, }
1495 }
1496 };
1497
1498 dst = ip6_route_output(&init_net, NULL, &fl);
1499 if (!dst)
1500 goto out_free;
1501
1502 dst_release(skb->dst);
1503 skb->dst = dst;
1504
1505 /*
1506 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1507 * not only before forwarding, but after forwarding on all output
1508 * interfaces. It is clear, if mrouter runs a multicasting
1509 * program, it should receive packets not depending to what interface
1510 * program is joined.
1511 * If we will not make it, the program will have to join on all
1512 * interfaces. On the other hand, multihoming host (or router, but
1513 * not mrouter) cannot join to more than one interface - it will
1514 * result in receiving multiple packets.
1515 */
1516 dev = vif->dev;
1517 skb->dev = dev;
1518 vif->pkt_out++;
1519 vif->bytes_out += skb->len;
1520
1521 /* We are about to write */
1522 /* XXX: extension headers? */
1523 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1524 goto out_free;
1525
1526 ipv6h = ipv6_hdr(skb);
1527 ipv6h->hop_limit--;
1528
1529 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1530
1531 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1532 ip6mr_forward2_finish);
1533
1534out_free:
1535 kfree_skb(skb);
1536 return 0;
1537}
1538
1539static int ip6mr_find_vif(struct net_device *dev)
1540{
1541 int ct;
4e16880c
BT
1542 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1543 if (init_net.ipv6.vif6_table[ct].dev == dev)
7bc570c8
YH
1544 break;
1545 }
1546 return ct;
1547}
1548
1549static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1550{
1551 int psend = -1;
1552 int vif, ct;
1553
1554 vif = cache->mf6c_parent;
1555 cache->mfc_un.res.pkt++;
1556 cache->mfc_un.res.bytes += skb->len;
1557
14fb64e1
YH
1558 /*
1559 * Wrong interface: drop packet and (maybe) send PIM assert.
1560 */
4e16880c 1561 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
14fb64e1
YH
1562 int true_vifi;
1563
1564 cache->mfc_un.res.wrong_if++;
1565 true_vifi = ip6mr_find_vif(skb->dev);
1566
a21f3f99 1567 if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert &&
14fb64e1
YH
1568 /* pimsm uses asserts, when switching from RPT to SPT,
1569 so that we cannot check that packet arrived on an oif.
1570 It is bad, but otherwise we would need to move pretty
1571 large chunk of pimd to kernel. Ough... --ANK
1572 */
a21f3f99
BT
1573 (init_net.ipv6.mroute_do_pim ||
1574 cache->mfc_un.res.ttls[true_vifi] < 255) &&
14fb64e1
YH
1575 time_after(jiffies,
1576 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1577 cache->mfc_un.res.last_assert = jiffies;
1578 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1579 }
1580 goto dont_forward;
1581 }
1582
4e16880c
BT
1583 init_net.ipv6.vif6_table[vif].pkt_in++;
1584 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
7bc570c8
YH
1585
1586 /*
1587 * Forward the frame
1588 */
1589 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1590 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1591 if (psend != -1) {
1592 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1593 if (skb2)
1594 ip6mr_forward2(skb2, cache, psend);
1595 }
1596 psend = ct;
1597 }
1598 }
1599 if (psend != -1) {
1600 ip6mr_forward2(skb, cache, psend);
1601 return 0;
1602 }
1603
14fb64e1 1604dont_forward:
7bc570c8
YH
1605 kfree_skb(skb);
1606 return 0;
1607}
1608
1609
1610/*
1611 * Multicast packets for forwarding arrive here
1612 */
1613
1614int ip6_mr_input(struct sk_buff *skb)
1615{
1616 struct mfc6_cache *cache;
1617
1618 read_lock(&mrt_lock);
1619 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1620
1621 /*
1622 * No usable cache entry
1623 */
1624 if (cache == NULL) {
1625 int vif;
1626
1627 vif = ip6mr_find_vif(skb->dev);
1628 if (vif >= 0) {
1629 int err = ip6mr_cache_unresolved(vif, skb);
1630 read_unlock(&mrt_lock);
1631
1632 return err;
1633 }
1634 read_unlock(&mrt_lock);
1635 kfree_skb(skb);
1636 return -ENODEV;
1637 }
1638
1639 ip6_mr_forward(skb, cache);
1640
1641 read_unlock(&mrt_lock);
1642
1643 return 0;
1644}
1645
1646
1647static int
1648ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1649{
1650 int ct;
1651 struct rtnexthop *nhp;
4e16880c 1652 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
549e028d 1653 u8 *b = skb_tail_pointer(skb);
7bc570c8
YH
1654 struct rtattr *mp_head;
1655
1656 if (dev)
1657 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1658
1659 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1660
1661 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1662 if (c->mfc_un.res.ttls[ct] < 255) {
1663 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1664 goto rtattr_failure;
1665 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1666 nhp->rtnh_flags = 0;
1667 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4e16880c 1668 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
7bc570c8
YH
1669 nhp->rtnh_len = sizeof(*nhp);
1670 }
1671 }
1672 mp_head->rta_type = RTA_MULTIPATH;
549e028d 1673 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
7bc570c8
YH
1674 rtm->rtm_type = RTN_MULTICAST;
1675 return 1;
1676
1677rtattr_failure:
1678 nlmsg_trim(skb, b);
1679 return -EMSGSIZE;
1680}
1681
1682int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1683{
1684 int err;
1685 struct mfc6_cache *cache;
1686 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1687
1688 read_lock(&mrt_lock);
1689 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1690
1691 if (!cache) {
1692 struct sk_buff *skb2;
1693 struct ipv6hdr *iph;
1694 struct net_device *dev;
1695 int vif;
1696
1697 if (nowait) {
1698 read_unlock(&mrt_lock);
1699 return -EAGAIN;
1700 }
1701
1702 dev = skb->dev;
1703 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1704 read_unlock(&mrt_lock);
1705 return -ENODEV;
1706 }
1707
1708 /* really correct? */
1709 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1710 if (!skb2) {
1711 read_unlock(&mrt_lock);
1712 return -ENOMEM;
1713 }
1714
1715 skb_reset_transport_header(skb2);
1716
1717 skb_put(skb2, sizeof(struct ipv6hdr));
1718 skb_reset_network_header(skb2);
1719
1720 iph = ipv6_hdr(skb2);
1721 iph->version = 0;
1722 iph->priority = 0;
1723 iph->flow_lbl[0] = 0;
1724 iph->flow_lbl[1] = 0;
1725 iph->flow_lbl[2] = 0;
1726 iph->payload_len = 0;
1727 iph->nexthdr = IPPROTO_NONE;
1728 iph->hop_limit = 0;
1729 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1730 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1731
1732 err = ip6mr_cache_unresolved(vif, skb2);
1733 read_unlock(&mrt_lock);
1734
1735 return err;
1736 }
1737
1738 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1739 cache->mfc_flags |= MFC_NOTIFY;
1740
1741 err = ip6mr_fill_mroute(skb, cache, rtm);
1742 read_unlock(&mrt_lock);
1743 return err;
1744}
1745