]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6mr.c
packet: Avoid lock_sock in mmap handler
[net-next-2.6.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
7bc570c8
YH
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
7bc570c8
YH
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
7bc570c8 35#include <linux/init.h>
7bc570c8
YH
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
7bc570c8 39#include <net/raw.h>
7bc570c8
YH
40#include <linux/notifier.h>
41#include <linux/if_arp.h>
7bc570c8
YH
42#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
14fb64e1 48#include <linux/pim.h>
7bc570c8
YH
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
7bc570c8
YH
52/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
4e16880c 62#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
7bc570c8 63
7bc570c8 64static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
7bc570c8
YH
65
66/* Special spinlock for queue of unresolved entries */
67static DEFINE_SPINLOCK(mfc_unres_lock);
68
69/* We return to original Alan's scheme. Hash table of resolved
70 entries is changed only in process context and protected
71 with weak lock mrt_lock. Queue of unresolved entries is protected
72 with strong spinlock mfc_unres_lock.
73
74 In this case data path is free of exclusive locks at all.
75 */
76
77static struct kmem_cache *mrt_cachep __read_mostly;
78
79static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
8229efda
BT
80static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
81 mifi_t mifi, int assert);
7bc570c8 82static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
8229efda 83static void mroute_clean_tables(struct net *net);
7bc570c8 84
14fb64e1
YH
85#ifdef CONFIG_IPV6_PIMSM_V2
86static struct inet6_protocol pim6_protocol;
87#endif
88
7bc570c8
YH
89static struct timer_list ipmr_expire_timer;
90
91
92#ifdef CONFIG_PROC_FS
93
94struct ipmr_mfc_iter {
8b90fc7e 95 struct seq_net_private p;
7bc570c8
YH
96 struct mfc6_cache **cache;
97 int ct;
98};
99
100
8b90fc7e
BT
101static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
102 struct ipmr_mfc_iter *it, loff_t pos)
7bc570c8
YH
103{
104 struct mfc6_cache *mfc;
105
8b90fc7e 106 it->cache = net->ipv6.mfc6_cache_array;
7bc570c8 107 read_lock(&mrt_lock);
4a6258a0 108 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
8b90fc7e 109 for (mfc = net->ipv6.mfc6_cache_array[it->ct];
4a6258a0 110 mfc; mfc = mfc->next)
7bc570c8
YH
111 if (pos-- == 0)
112 return mfc;
113 read_unlock(&mrt_lock);
114
115 it->cache = &mfc_unres_queue;
116 spin_lock_bh(&mfc_unres_lock);
117 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
8b90fc7e
BT
118 if (net_eq(mfc6_net(mfc), net) &&
119 pos-- == 0)
7bc570c8
YH
120 return mfc;
121 spin_unlock_bh(&mfc_unres_lock);
122
123 it->cache = NULL;
124 return NULL;
125}
126
127
128
129
130/*
131 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
132 */
133
134struct ipmr_vif_iter {
8b90fc7e 135 struct seq_net_private p;
7bc570c8
YH
136 int ct;
137};
138
8b90fc7e
BT
139static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
140 struct ipmr_vif_iter *iter,
7bc570c8
YH
141 loff_t pos)
142{
8b90fc7e
BT
143 for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
144 if (!MIF_EXISTS(net, iter->ct))
7bc570c8
YH
145 continue;
146 if (pos-- == 0)
8b90fc7e 147 return &net->ipv6.vif6_table[iter->ct];
7bc570c8
YH
148 }
149 return NULL;
150}
151
152static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
153 __acquires(mrt_lock)
154{
8b90fc7e
BT
155 struct net *net = seq_file_net(seq);
156
7bc570c8 157 read_lock(&mrt_lock);
8b90fc7e
BT
158 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
159 : SEQ_START_TOKEN;
7bc570c8
YH
160}
161
162static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
163{
164 struct ipmr_vif_iter *iter = seq->private;
8b90fc7e 165 struct net *net = seq_file_net(seq);
7bc570c8
YH
166
167 ++*pos;
168 if (v == SEQ_START_TOKEN)
8b90fc7e 169 return ip6mr_vif_seq_idx(net, iter, 0);
7bc570c8 170
8b90fc7e
BT
171 while (++iter->ct < net->ipv6.maxvif) {
172 if (!MIF_EXISTS(net, iter->ct))
7bc570c8 173 continue;
8b90fc7e 174 return &net->ipv6.vif6_table[iter->ct];
7bc570c8
YH
175 }
176 return NULL;
177}
178
179static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
180 __releases(mrt_lock)
181{
182 read_unlock(&mrt_lock);
183}
184
185static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
186{
8b90fc7e
BT
187 struct net *net = seq_file_net(seq);
188
7bc570c8
YH
189 if (v == SEQ_START_TOKEN) {
190 seq_puts(seq,
191 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
192 } else {
193 const struct mif_device *vif = v;
194 const char *name = vif->dev ? vif->dev->name : "none";
195
196 seq_printf(seq,
d430a227 197 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
8b90fc7e 198 vif - net->ipv6.vif6_table,
7bc570c8
YH
199 name, vif->bytes_in, vif->pkt_in,
200 vif->bytes_out, vif->pkt_out,
201 vif->flags);
202 }
203 return 0;
204}
205
206static struct seq_operations ip6mr_vif_seq_ops = {
207 .start = ip6mr_vif_seq_start,
208 .next = ip6mr_vif_seq_next,
209 .stop = ip6mr_vif_seq_stop,
210 .show = ip6mr_vif_seq_show,
211};
212
213static int ip6mr_vif_open(struct inode *inode, struct file *file)
214{
8b90fc7e
BT
215 return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
216 sizeof(struct ipmr_vif_iter));
7bc570c8
YH
217}
218
219static struct file_operations ip6mr_vif_fops = {
220 .owner = THIS_MODULE,
221 .open = ip6mr_vif_open,
222 .read = seq_read,
223 .llseek = seq_lseek,
8b90fc7e 224 .release = seq_release_net,
7bc570c8
YH
225};
226
227static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
228{
8b90fc7e
BT
229 struct net *net = seq_file_net(seq);
230
231 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
232 : SEQ_START_TOKEN;
7bc570c8
YH
233}
234
235static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
236{
237 struct mfc6_cache *mfc = v;
238 struct ipmr_mfc_iter *it = seq->private;
8b90fc7e 239 struct net *net = seq_file_net(seq);
7bc570c8
YH
240
241 ++*pos;
242
243 if (v == SEQ_START_TOKEN)
8b90fc7e 244 return ipmr_mfc_seq_idx(net, seq->private, 0);
7bc570c8
YH
245
246 if (mfc->next)
247 return mfc->next;
248
249 if (it->cache == &mfc_unres_queue)
250 goto end_of_list;
251
8b90fc7e 252 BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
7bc570c8 253
4a6258a0 254 while (++it->ct < MFC6_LINES) {
8b90fc7e 255 mfc = net->ipv6.mfc6_cache_array[it->ct];
7bc570c8
YH
256 if (mfc)
257 return mfc;
258 }
259
260 /* exhausted cache_array, show unresolved */
261 read_unlock(&mrt_lock);
262 it->cache = &mfc_unres_queue;
263 it->ct = 0;
264
265 spin_lock_bh(&mfc_unres_lock);
266 mfc = mfc_unres_queue;
267 if (mfc)
268 return mfc;
269
270 end_of_list:
271 spin_unlock_bh(&mfc_unres_lock);
272 it->cache = NULL;
273
274 return NULL;
275}
276
277static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
278{
279 struct ipmr_mfc_iter *it = seq->private;
8b90fc7e 280 struct net *net = seq_file_net(seq);
7bc570c8
YH
281
282 if (it->cache == &mfc_unres_queue)
283 spin_unlock_bh(&mfc_unres_lock);
8b90fc7e 284 else if (it->cache == net->ipv6.mfc6_cache_array)
7bc570c8
YH
285 read_unlock(&mrt_lock);
286}
287
288static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
289{
290 int n;
8b90fc7e 291 struct net *net = seq_file_net(seq);
7bc570c8
YH
292
293 if (v == SEQ_START_TOKEN) {
294 seq_puts(seq,
295 "Group "
296 "Origin "
297 "Iif Pkts Bytes Wrong Oifs\n");
298 } else {
299 const struct mfc6_cache *mfc = v;
300 const struct ipmr_mfc_iter *it = seq->private;
301
999890b2 302 seq_printf(seq, "%pI6 %pI6 %-3hd",
0c6ce78a 303 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
1ea472e2 304 mfc->mf6c_parent);
7bc570c8
YH
305
306 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
307 seq_printf(seq, " %8lu %8lu %8lu",
308 mfc->mfc_un.res.pkt,
309 mfc->mfc_un.res.bytes,
310 mfc->mfc_un.res.wrong_if);
7bc570c8
YH
311 for (n = mfc->mfc_un.res.minvif;
312 n < mfc->mfc_un.res.maxvif; n++) {
8b90fc7e 313 if (MIF_EXISTS(net, n) &&
7bc570c8
YH
314 mfc->mfc_un.res.ttls[n] < 255)
315 seq_printf(seq,
316 " %2d:%-3d",
317 n, mfc->mfc_un.res.ttls[n]);
318 }
1ea472e2
BT
319 } else {
320 /* unresolved mfc_caches don't contain
321 * pkt, bytes and wrong_if values
322 */
323 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
7bc570c8
YH
324 }
325 seq_putc(seq, '\n');
326 }
327 return 0;
328}
329
330static struct seq_operations ipmr_mfc_seq_ops = {
331 .start = ipmr_mfc_seq_start,
332 .next = ipmr_mfc_seq_next,
333 .stop = ipmr_mfc_seq_stop,
334 .show = ipmr_mfc_seq_show,
335};
336
337static int ipmr_mfc_open(struct inode *inode, struct file *file)
338{
8b90fc7e
BT
339 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
340 sizeof(struct ipmr_mfc_iter));
7bc570c8
YH
341}
342
343static struct file_operations ip6mr_mfc_fops = {
344 .owner = THIS_MODULE,
345 .open = ipmr_mfc_open,
346 .read = seq_read,
347 .llseek = seq_lseek,
8b90fc7e 348 .release = seq_release_net,
7bc570c8
YH
349};
350#endif
351
14fb64e1 352#ifdef CONFIG_IPV6_PIMSM_V2
14fb64e1
YH
353
354static int pim6_rcv(struct sk_buff *skb)
355{
356 struct pimreghdr *pim;
357 struct ipv6hdr *encap;
358 struct net_device *reg_dev = NULL;
8229efda
BT
359 struct net *net = dev_net(skb->dev);
360 int reg_vif_num = net->ipv6.mroute_reg_vif_num;
14fb64e1
YH
361
362 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
363 goto drop;
364
365 pim = (struct pimreghdr *)skb_transport_header(skb);
366 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
367 (pim->flags & PIM_NULL_REGISTER) ||
1d6e55f1
TG
368 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
369 sizeof(*pim), IPPROTO_PIM,
370 csum_partial((void *)pim, sizeof(*pim), 0)) &&
ec6b486f 371 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
14fb64e1
YH
372 goto drop;
373
374 /* check if the inner packet is destined to mcast group */
375 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
376 sizeof(*pim));
377
378 if (!ipv6_addr_is_multicast(&encap->daddr) ||
379 encap->payload_len == 0 ||
380 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
381 goto drop;
382
383 read_lock(&mrt_lock);
384 if (reg_vif_num >= 0)
8229efda 385 reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
14fb64e1
YH
386 if (reg_dev)
387 dev_hold(reg_dev);
388 read_unlock(&mrt_lock);
389
390 if (reg_dev == NULL)
391 goto drop;
392
393 skb->mac_header = skb->network_header;
394 skb_pull(skb, (u8 *)encap - skb->data);
395 skb_reset_network_header(skb);
396 skb->dev = reg_dev;
1d6e55f1 397 skb->protocol = htons(ETH_P_IPV6);
14fb64e1
YH
398 skb->ip_summed = 0;
399 skb->pkt_type = PACKET_HOST;
400 dst_release(skb->dst);
dc58c78c
PE
401 reg_dev->stats.rx_bytes += skb->len;
402 reg_dev->stats.rx_packets++;
14fb64e1
YH
403 skb->dst = NULL;
404 nf_reset(skb);
405 netif_rx(skb);
406 dev_put(reg_dev);
407 return 0;
408 drop:
409 kfree_skb(skb);
410 return 0;
411}
412
413static struct inet6_protocol pim6_protocol = {
414 .handler = pim6_rcv,
415};
416
417/* Service routines creating virtual interfaces: PIMREG */
418
419static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
420{
8229efda
BT
421 struct net *net = dev_net(dev);
422
14fb64e1 423 read_lock(&mrt_lock);
dc58c78c
PE
424 dev->stats.tx_bytes += skb->len;
425 dev->stats.tx_packets++;
8229efda
BT
426 ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
427 MRT6MSG_WHOLEPKT);
14fb64e1
YH
428 read_unlock(&mrt_lock);
429 kfree_skb(skb);
430 return 0;
431}
432
007c3838
SH
433static const struct net_device_ops reg_vif_netdev_ops = {
434 .ndo_start_xmit = reg_vif_xmit,
435};
436
14fb64e1
YH
437static void reg_vif_setup(struct net_device *dev)
438{
439 dev->type = ARPHRD_PIMREG;
440 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
441 dev->flags = IFF_NOARP;
007c3838 442 dev->netdev_ops = &reg_vif_netdev_ops;
14fb64e1
YH
443 dev->destructor = free_netdev;
444}
445
8229efda 446static struct net_device *ip6mr_reg_vif(struct net *net)
14fb64e1
YH
447{
448 struct net_device *dev;
14fb64e1 449
dc58c78c 450 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
14fb64e1
YH
451 if (dev == NULL)
452 return NULL;
453
8229efda
BT
454 dev_net_set(dev, net);
455
14fb64e1
YH
456 if (register_netdevice(dev)) {
457 free_netdev(dev);
458 return NULL;
459 }
460 dev->iflink = 0;
461
14fb64e1
YH
462 if (dev_open(dev))
463 goto failure;
464
7af3db78 465 dev_hold(dev);
14fb64e1
YH
466 return dev;
467
468failure:
469 /* allow the register to be completed before unregistering. */
470 rtnl_unlock();
471 rtnl_lock();
472
473 unregister_netdevice(dev);
474 return NULL;
475}
476#endif
477
7bc570c8
YH
478/*
479 * Delete a VIF entry
480 */
481
8229efda 482static int mif6_delete(struct net *net, int vifi)
7bc570c8
YH
483{
484 struct mif_device *v;
485 struct net_device *dev;
1d6e55f1 486 struct inet6_dev *in6_dev;
8229efda 487 if (vifi < 0 || vifi >= net->ipv6.maxvif)
7bc570c8
YH
488 return -EADDRNOTAVAIL;
489
8229efda 490 v = &net->ipv6.vif6_table[vifi];
7bc570c8
YH
491
492 write_lock_bh(&mrt_lock);
493 dev = v->dev;
494 v->dev = NULL;
495
496 if (!dev) {
497 write_unlock_bh(&mrt_lock);
498 return -EADDRNOTAVAIL;
499 }
500
14fb64e1 501#ifdef CONFIG_IPV6_PIMSM_V2
8229efda
BT
502 if (vifi == net->ipv6.mroute_reg_vif_num)
503 net->ipv6.mroute_reg_vif_num = -1;
14fb64e1
YH
504#endif
505
8229efda 506 if (vifi + 1 == net->ipv6.maxvif) {
7bc570c8
YH
507 int tmp;
508 for (tmp = vifi - 1; tmp >= 0; tmp--) {
8229efda 509 if (MIF_EXISTS(net, tmp))
7bc570c8
YH
510 break;
511 }
8229efda 512 net->ipv6.maxvif = tmp + 1;
7bc570c8
YH
513 }
514
515 write_unlock_bh(&mrt_lock);
516
517 dev_set_allmulti(dev, -1);
518
1d6e55f1
TG
519 in6_dev = __in6_dev_get(dev);
520 if (in6_dev)
521 in6_dev->cnf.mc_forwarding--;
522
7bc570c8
YH
523 if (v->flags & MIFF_REGISTER)
524 unregister_netdevice(dev);
525
526 dev_put(dev);
527 return 0;
528}
529
58701ad4
BT
530static inline void ip6mr_cache_free(struct mfc6_cache *c)
531{
532 release_net(mfc6_net(c));
533 kmem_cache_free(mrt_cachep, c);
534}
535
7bc570c8
YH
536/* Destroy an unresolved cache entry, killing queued skbs
537 and reporting error to netlink readers.
538 */
539
540static void ip6mr_destroy_unres(struct mfc6_cache *c)
541{
542 struct sk_buff *skb;
8229efda 543 struct net *net = mfc6_net(c);
7bc570c8 544
8229efda 545 atomic_dec(&net->ipv6.cache_resolve_queue_len);
7bc570c8
YH
546
547 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
548 if (ipv6_hdr(skb)->version == 0) {
549 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
550 nlh->nlmsg_type = NLMSG_ERROR;
551 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
552 skb_trim(skb, nlh->nlmsg_len);
553 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
8229efda 554 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
7bc570c8
YH
555 } else
556 kfree_skb(skb);
557 }
558
58701ad4 559 ip6mr_cache_free(c);
7bc570c8
YH
560}
561
562
563/* Single timer process for all the unresolved queue. */
564
565static void ipmr_do_expire_process(unsigned long dummy)
566{
567 unsigned long now = jiffies;
568 unsigned long expires = 10 * HZ;
569 struct mfc6_cache *c, **cp;
570
571 cp = &mfc_unres_queue;
572
573 while ((c = *cp) != NULL) {
574 if (time_after(c->mfc_un.unres.expires, now)) {
575 /* not yet... */
576 unsigned long interval = c->mfc_un.unres.expires - now;
577 if (interval < expires)
578 expires = interval;
579 cp = &c->next;
580 continue;
581 }
582
583 *cp = c->next;
584 ip6mr_destroy_unres(c);
585 }
586
4045e57c 587 if (mfc_unres_queue != NULL)
7bc570c8
YH
588 mod_timer(&ipmr_expire_timer, jiffies + expires);
589}
590
591static void ipmr_expire_process(unsigned long dummy)
592{
593 if (!spin_trylock(&mfc_unres_lock)) {
594 mod_timer(&ipmr_expire_timer, jiffies + 1);
595 return;
596 }
597
4045e57c 598 if (mfc_unres_queue != NULL)
7bc570c8
YH
599 ipmr_do_expire_process(dummy);
600
601 spin_unlock(&mfc_unres_lock);
602}
603
604/* Fill oifs list. It is called under write locked mrt_lock. */
605
606static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
607{
608 int vifi;
8229efda 609 struct net *net = mfc6_net(cache);
7bc570c8 610
6ac7eb08 611 cache->mfc_un.res.minvif = MAXMIFS;
7bc570c8 612 cache->mfc_un.res.maxvif = 0;
6ac7eb08 613 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
7bc570c8 614
8229efda
BT
615 for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
616 if (MIF_EXISTS(net, vifi) &&
4e16880c 617 ttls[vifi] && ttls[vifi] < 255) {
7bc570c8
YH
618 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
619 if (cache->mfc_un.res.minvif > vifi)
620 cache->mfc_un.res.minvif = vifi;
621 if (cache->mfc_un.res.maxvif <= vifi)
622 cache->mfc_un.res.maxvif = vifi + 1;
623 }
624 }
625}
626
8229efda 627static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
7bc570c8
YH
628{
629 int vifi = vifc->mif6c_mifi;
8229efda 630 struct mif_device *v = &net->ipv6.vif6_table[vifi];
7bc570c8 631 struct net_device *dev;
1d6e55f1 632 struct inet6_dev *in6_dev;
5ae7b444 633 int err;
7bc570c8
YH
634
635 /* Is vif busy ? */
8229efda 636 if (MIF_EXISTS(net, vifi))
7bc570c8
YH
637 return -EADDRINUSE;
638
639 switch (vifc->mif6c_flags) {
14fb64e1
YH
640#ifdef CONFIG_IPV6_PIMSM_V2
641 case MIFF_REGISTER:
642 /*
643 * Special Purpose VIF in PIM
644 * All the packets will be sent to the daemon
645 */
8229efda 646 if (net->ipv6.mroute_reg_vif_num >= 0)
14fb64e1 647 return -EADDRINUSE;
8229efda 648 dev = ip6mr_reg_vif(net);
14fb64e1
YH
649 if (!dev)
650 return -ENOBUFS;
5ae7b444
WC
651 err = dev_set_allmulti(dev, 1);
652 if (err) {
653 unregister_netdevice(dev);
7af3db78 654 dev_put(dev);
5ae7b444
WC
655 return err;
656 }
14fb64e1
YH
657 break;
658#endif
7bc570c8 659 case 0:
8229efda 660 dev = dev_get_by_index(net, vifc->mif6c_pifi);
7bc570c8
YH
661 if (!dev)
662 return -EADDRNOTAVAIL;
5ae7b444 663 err = dev_set_allmulti(dev, 1);
7af3db78
WC
664 if (err) {
665 dev_put(dev);
5ae7b444 666 return err;
7af3db78 667 }
7bc570c8
YH
668 break;
669 default:
670 return -EINVAL;
671 }
672
1d6e55f1
TG
673 in6_dev = __in6_dev_get(dev);
674 if (in6_dev)
675 in6_dev->cnf.mc_forwarding++;
676
7bc570c8
YH
677 /*
678 * Fill in the VIF structures
679 */
680 v->rate_limit = vifc->vifc_rate_limit;
681 v->flags = vifc->mif6c_flags;
682 if (!mrtsock)
683 v->flags |= VIFF_STATIC;
684 v->threshold = vifc->vifc_threshold;
685 v->bytes_in = 0;
686 v->bytes_out = 0;
687 v->pkt_in = 0;
688 v->pkt_out = 0;
689 v->link = dev->ifindex;
690 if (v->flags & MIFF_REGISTER)
691 v->link = dev->iflink;
692
693 /* And finish update writing critical data */
694 write_lock_bh(&mrt_lock);
7bc570c8 695 v->dev = dev;
14fb64e1
YH
696#ifdef CONFIG_IPV6_PIMSM_V2
697 if (v->flags & MIFF_REGISTER)
8229efda 698 net->ipv6.mroute_reg_vif_num = vifi;
14fb64e1 699#endif
8229efda
BT
700 if (vifi + 1 > net->ipv6.maxvif)
701 net->ipv6.maxvif = vifi + 1;
7bc570c8
YH
702 write_unlock_bh(&mrt_lock);
703 return 0;
704}
705
8229efda
BT
706static struct mfc6_cache *ip6mr_cache_find(struct net *net,
707 struct in6_addr *origin,
708 struct in6_addr *mcastgrp)
7bc570c8
YH
709{
710 int line = MFC6_HASH(mcastgrp, origin);
711 struct mfc6_cache *c;
712
8229efda 713 for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
7bc570c8
YH
714 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
715 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
716 break;
717 }
718 return c;
719}
720
721/*
722 * Allocate a multicast cache entry
723 */
58701ad4 724static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
7bc570c8 725{
36cbac59 726 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
7bc570c8
YH
727 if (c == NULL)
728 return NULL;
6ac7eb08 729 c->mfc_un.res.minvif = MAXMIFS;
58701ad4 730 mfc6_net_set(c, net);
7bc570c8
YH
731 return c;
732}
733
58701ad4 734static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
7bc570c8 735{
36cbac59 736 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
7bc570c8
YH
737 if (c == NULL)
738 return NULL;
7bc570c8
YH
739 skb_queue_head_init(&c->mfc_un.unres.unresolved);
740 c->mfc_un.unres.expires = jiffies + 10 * HZ;
58701ad4 741 mfc6_net_set(c, net);
7bc570c8
YH
742 return c;
743}
744
745/*
746 * A cache entry has gone into a resolved state from queued
747 */
748
749static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
750{
751 struct sk_buff *skb;
752
753 /*
754 * Play the pending entries through our router
755 */
756
757 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
758 if (ipv6_hdr(skb)->version == 0) {
759 int err;
760 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
761
762 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
549e028d 763 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
7bc570c8
YH
764 } else {
765 nlh->nlmsg_type = NLMSG_ERROR;
766 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
767 skb_trim(skb, nlh->nlmsg_len);
768 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
769 }
8229efda 770 err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
7bc570c8
YH
771 } else
772 ip6_mr_forward(skb, c);
773 }
774}
775
776/*
777 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
778 * expects the following bizarre scheme.
779 *
780 * Called under mrt_lock.
781 */
782
8229efda
BT
783static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
784 int assert)
7bc570c8
YH
785{
786 struct sk_buff *skb;
787 struct mrt6msg *msg;
788 int ret;
789
14fb64e1
YH
790#ifdef CONFIG_IPV6_PIMSM_V2
791 if (assert == MRT6MSG_WHOLEPKT)
792 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
793 +sizeof(*msg));
794 else
795#endif
796 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
797
798 if (!skb)
799 return -ENOBUFS;
800
801 /* I suppose that internal messages
802 * do not require checksums */
803
804 skb->ip_summed = CHECKSUM_UNNECESSARY;
805
14fb64e1
YH
806#ifdef CONFIG_IPV6_PIMSM_V2
807 if (assert == MRT6MSG_WHOLEPKT) {
808 /* Ugly, but we have no choice with this interface.
809 Duplicate old header, fix length etc.
810 And all this only to mangle msg->im6_msgtype and
811 to set msg->im6_mbz to "mbz" :-)
812 */
813 skb_push(skb, -skb_network_offset(pkt));
814
815 skb_push(skb, sizeof(*msg));
816 skb_reset_transport_header(skb);
817 msg = (struct mrt6msg *)skb_transport_header(skb);
818 msg->im6_mbz = 0;
819 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
8229efda 820 msg->im6_mif = net->ipv6.mroute_reg_vif_num;
14fb64e1
YH
821 msg->im6_pad = 0;
822 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
823 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
824
825 skb->ip_summed = CHECKSUM_UNNECESSARY;
826 } else
827#endif
828 {
7bc570c8
YH
829 /*
830 * Copy the IP header
831 */
832
833 skb_put(skb, sizeof(struct ipv6hdr));
834 skb_reset_network_header(skb);
835 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
836
837 /*
838 * Add our header
839 */
840 skb_put(skb, sizeof(*msg));
841 skb_reset_transport_header(skb);
842 msg = (struct mrt6msg *)skb_transport_header(skb);
843
844 msg->im6_mbz = 0;
845 msg->im6_msgtype = assert;
6ac7eb08 846 msg->im6_mif = mifi;
7bc570c8
YH
847 msg->im6_pad = 0;
848 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
849 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
850
851 skb->dst = dst_clone(pkt->dst);
852 skb->ip_summed = CHECKSUM_UNNECESSARY;
14fb64e1 853 }
7bc570c8 854
8229efda 855 if (net->ipv6.mroute6_sk == NULL) {
7bc570c8
YH
856 kfree_skb(skb);
857 return -EINVAL;
858 }
859
860 /*
861 * Deliver to user space multicast routing algorithms
862 */
8229efda 863 ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
bd91b8bf 864 if (ret < 0) {
7bc570c8
YH
865 if (net_ratelimit())
866 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
867 kfree_skb(skb);
868 }
869
870 return ret;
871}
872
873/*
874 * Queue a packet for resolution. It gets locked cache entry!
875 */
876
877static int
8229efda 878ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
7bc570c8
YH
879{
880 int err;
881 struct mfc6_cache *c;
882
883 spin_lock_bh(&mfc_unres_lock);
884 for (c = mfc_unres_queue; c; c = c->next) {
8229efda 885 if (net_eq(mfc6_net(c), net) &&
4045e57c 886 ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
7bc570c8
YH
887 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
888 break;
889 }
890
891 if (c == NULL) {
892 /*
893 * Create a new entry if allowable
894 */
895
8229efda
BT
896 if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
897 (c = ip6mr_cache_alloc_unres(net)) == NULL) {
7bc570c8
YH
898 spin_unlock_bh(&mfc_unres_lock);
899
900 kfree_skb(skb);
901 return -ENOBUFS;
902 }
903
904 /*
905 * Fill in the new cache entry
906 */
907 c->mf6c_parent = -1;
908 c->mf6c_origin = ipv6_hdr(skb)->saddr;
909 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
910
911 /*
912 * Reflect first query at pim6sd
913 */
8229efda
BT
914 err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
915 if (err < 0) {
7bc570c8
YH
916 /* If the report failed throw the cache entry
917 out - Brad Parker
918 */
919 spin_unlock_bh(&mfc_unres_lock);
920
58701ad4 921 ip6mr_cache_free(c);
7bc570c8
YH
922 kfree_skb(skb);
923 return err;
924 }
925
8229efda 926 atomic_inc(&net->ipv6.cache_resolve_queue_len);
7bc570c8
YH
927 c->next = mfc_unres_queue;
928 mfc_unres_queue = c;
929
930 ipmr_do_expire_process(1);
931 }
932
933 /*
934 * See if we can append the packet
935 */
936 if (c->mfc_un.unres.unresolved.qlen > 3) {
937 kfree_skb(skb);
938 err = -ENOBUFS;
939 } else {
940 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
941 err = 0;
942 }
943
944 spin_unlock_bh(&mfc_unres_lock);
945 return err;
946}
947
948/*
949 * MFC6 cache manipulation by user space
950 */
951
8229efda 952static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
7bc570c8
YH
953{
954 int line;
955 struct mfc6_cache *c, **cp;
956
957 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
958
8229efda 959 for (cp = &net->ipv6.mfc6_cache_array[line];
4a6258a0 960 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
961 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
962 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
963 write_lock_bh(&mrt_lock);
964 *cp = c->next;
965 write_unlock_bh(&mrt_lock);
966
58701ad4 967 ip6mr_cache_free(c);
7bc570c8
YH
968 return 0;
969 }
970 }
971 return -ENOENT;
972}
973
974static int ip6mr_device_event(struct notifier_block *this,
975 unsigned long event, void *ptr)
976{
977 struct net_device *dev = ptr;
8229efda 978 struct net *net = dev_net(dev);
7bc570c8
YH
979 struct mif_device *v;
980 int ct;
981
7bc570c8
YH
982 if (event != NETDEV_UNREGISTER)
983 return NOTIFY_DONE;
984
8229efda
BT
985 v = &net->ipv6.vif6_table[0];
986 for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
7bc570c8 987 if (v->dev == dev)
8229efda 988 mif6_delete(net, ct);
7bc570c8
YH
989 }
990 return NOTIFY_DONE;
991}
992
993static struct notifier_block ip6_mr_notifier = {
994 .notifier_call = ip6mr_device_event
995};
996
997/*
998 * Setup for IP multicast routing
999 */
1000
4e16880c
BT
1001static int __net_init ip6mr_net_init(struct net *net)
1002{
1003 int err = 0;
4e16880c
BT
1004 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
1005 GFP_KERNEL);
1006 if (!net->ipv6.vif6_table) {
1007 err = -ENOMEM;
1008 goto fail;
1009 }
4a6258a0
BT
1010
1011 /* Forwarding cache */
1012 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
1013 sizeof(struct mfc6_cache *),
1014 GFP_KERNEL);
1015 if (!net->ipv6.mfc6_cache_array) {
1016 err = -ENOMEM;
1017 goto fail_mfc6_cache;
1018 }
950d5704
BT
1019
1020#ifdef CONFIG_IPV6_PIMSM_V2
1021 net->ipv6.mroute_reg_vif_num = -1;
1022#endif
8b90fc7e
BT
1023
1024#ifdef CONFIG_PROC_FS
1025 err = -ENOMEM;
1026 if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1027 goto proc_vif_fail;
1028 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1029 goto proc_cache_fail;
1030#endif
4a6258a0
BT
1031 return 0;
1032
8b90fc7e
BT
1033#ifdef CONFIG_PROC_FS
1034proc_cache_fail:
1035 proc_net_remove(net, "ip6_mr_vif");
1036proc_vif_fail:
1037 kfree(net->ipv6.mfc6_cache_array);
1038#endif
4a6258a0
BT
1039fail_mfc6_cache:
1040 kfree(net->ipv6.vif6_table);
4e16880c
BT
1041fail:
1042 return err;
1043}
1044
1045static void __net_exit ip6mr_net_exit(struct net *net)
1046{
8b90fc7e
BT
1047#ifdef CONFIG_PROC_FS
1048 proc_net_remove(net, "ip6_mr_cache");
1049 proc_net_remove(net, "ip6_mr_vif");
1050#endif
8229efda 1051 mroute_clean_tables(net);
4a6258a0 1052 kfree(net->ipv6.mfc6_cache_array);
4e16880c
BT
1053 kfree(net->ipv6.vif6_table);
1054}
1055
1056static struct pernet_operations ip6mr_net_ops = {
1057 .init = ip6mr_net_init,
1058 .exit = ip6mr_net_exit,
1059};
1060
623d1a1a 1061int __init ip6_mr_init(void)
7bc570c8 1062{
623d1a1a
WC
1063 int err;
1064
7bc570c8
YH
1065 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1066 sizeof(struct mfc6_cache),
1067 0, SLAB_HWCACHE_ALIGN,
1068 NULL);
1069 if (!mrt_cachep)
623d1a1a 1070 return -ENOMEM;
7bc570c8 1071
4e16880c
BT
1072 err = register_pernet_subsys(&ip6mr_net_ops);
1073 if (err)
1074 goto reg_pernet_fail;
1075
7bc570c8 1076 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
623d1a1a
WC
1077 err = register_netdevice_notifier(&ip6_mr_notifier);
1078 if (err)
1079 goto reg_notif_fail;
623d1a1a 1080 return 0;
87b30a65
BT
1081reg_notif_fail:
1082 del_timer(&ipmr_expire_timer);
4e16880c
BT
1083 unregister_pernet_subsys(&ip6mr_net_ops);
1084reg_pernet_fail:
87b30a65 1085 kmem_cache_destroy(mrt_cachep);
623d1a1a 1086 return err;
7bc570c8
YH
1087}
1088
623d1a1a
WC
1089void ip6_mr_cleanup(void)
1090{
623d1a1a
WC
1091 unregister_netdevice_notifier(&ip6_mr_notifier);
1092 del_timer(&ipmr_expire_timer);
4e16880c 1093 unregister_pernet_subsys(&ip6mr_net_ops);
623d1a1a
WC
1094 kmem_cache_destroy(mrt_cachep);
1095}
7bc570c8 1096
8229efda 1097static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
7bc570c8
YH
1098{
1099 int line;
1100 struct mfc6_cache *uc, *c, **cp;
6ac7eb08 1101 unsigned char ttls[MAXMIFS];
7bc570c8
YH
1102 int i;
1103
6ac7eb08
RR
1104 memset(ttls, 255, MAXMIFS);
1105 for (i = 0; i < MAXMIFS; i++) {
7bc570c8
YH
1106 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1107 ttls[i] = 1;
1108
1109 }
1110
1111 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1112
8229efda 1113 for (cp = &net->ipv6.mfc6_cache_array[line];
4a6258a0 1114 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
1115 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1116 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1117 break;
1118 }
1119
1120 if (c != NULL) {
1121 write_lock_bh(&mrt_lock);
1122 c->mf6c_parent = mfc->mf6cc_parent;
1123 ip6mr_update_thresholds(c, ttls);
1124 if (!mrtsock)
1125 c->mfc_flags |= MFC_STATIC;
1126 write_unlock_bh(&mrt_lock);
1127 return 0;
1128 }
1129
1130 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1131 return -EINVAL;
1132
8229efda 1133 c = ip6mr_cache_alloc(net);
7bc570c8
YH
1134 if (c == NULL)
1135 return -ENOMEM;
1136
1137 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1138 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1139 c->mf6c_parent = mfc->mf6cc_parent;
1140 ip6mr_update_thresholds(c, ttls);
1141 if (!mrtsock)
1142 c->mfc_flags |= MFC_STATIC;
1143
1144 write_lock_bh(&mrt_lock);
8229efda
BT
1145 c->next = net->ipv6.mfc6_cache_array[line];
1146 net->ipv6.mfc6_cache_array[line] = c;
7bc570c8
YH
1147 write_unlock_bh(&mrt_lock);
1148
1149 /*
1150 * Check to see if we resolved a queued list. If so we
1151 * need to send on the frames and tidy up.
1152 */
1153 spin_lock_bh(&mfc_unres_lock);
1154 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1155 cp = &uc->next) {
8229efda 1156 if (net_eq(mfc6_net(uc), net) &&
4045e57c 1157 ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
7bc570c8
YH
1158 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1159 *cp = uc->next;
8229efda 1160 atomic_dec(&net->ipv6.cache_resolve_queue_len);
7bc570c8
YH
1161 break;
1162 }
1163 }
4045e57c
BT
1164 if (mfc_unres_queue == NULL)
1165 del_timer(&ipmr_expire_timer);
7bc570c8
YH
1166 spin_unlock_bh(&mfc_unres_lock);
1167
1168 if (uc) {
1169 ip6mr_cache_resolve(uc, c);
58701ad4 1170 ip6mr_cache_free(uc);
7bc570c8
YH
1171 }
1172 return 0;
1173}
1174
1175/*
1176 * Close the multicast socket, and clear the vif tables etc
1177 */
1178
8229efda 1179static void mroute_clean_tables(struct net *net)
7bc570c8
YH
1180{
1181 int i;
1182
1183 /*
1184 * Shut down all active vif entries
1185 */
8229efda
BT
1186 for (i = 0; i < net->ipv6.maxvif; i++) {
1187 if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
1188 mif6_delete(net, i);
7bc570c8
YH
1189 }
1190
1191 /*
1192 * Wipe the cache
1193 */
4a6258a0 1194 for (i = 0; i < MFC6_LINES; i++) {
7bc570c8
YH
1195 struct mfc6_cache *c, **cp;
1196
8229efda 1197 cp = &net->ipv6.mfc6_cache_array[i];
7bc570c8
YH
1198 while ((c = *cp) != NULL) {
1199 if (c->mfc_flags & MFC_STATIC) {
1200 cp = &c->next;
1201 continue;
1202 }
1203 write_lock_bh(&mrt_lock);
1204 *cp = c->next;
1205 write_unlock_bh(&mrt_lock);
1206
58701ad4 1207 ip6mr_cache_free(c);
7bc570c8
YH
1208 }
1209 }
1210
8229efda 1211 if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
4045e57c 1212 struct mfc6_cache *c, **cp;
7bc570c8
YH
1213
1214 spin_lock_bh(&mfc_unres_lock);
4045e57c
BT
1215 cp = &mfc_unres_queue;
1216 while ((c = *cp) != NULL) {
8229efda 1217 if (!net_eq(mfc6_net(c), net)) {
4045e57c
BT
1218 cp = &c->next;
1219 continue;
1220 }
1221 *cp = c->next;
7bc570c8 1222 ip6mr_destroy_unres(c);
7bc570c8
YH
1223 }
1224 spin_unlock_bh(&mfc_unres_lock);
1225 }
1226}
1227
1228static int ip6mr_sk_init(struct sock *sk)
1229{
1230 int err = 0;
8229efda 1231 struct net *net = sock_net(sk);
7bc570c8
YH
1232
1233 rtnl_lock();
1234 write_lock_bh(&mrt_lock);
1d6e55f1 1235 if (likely(net->ipv6.mroute6_sk == NULL)) {
8229efda 1236 net->ipv6.mroute6_sk = sk;
1d6e55f1
TG
1237 net->ipv6.devconf_all->mc_forwarding++;
1238 }
7bc570c8
YH
1239 else
1240 err = -EADDRINUSE;
1241 write_unlock_bh(&mrt_lock);
1242
1243 rtnl_unlock();
1244
1245 return err;
1246}
1247
1248int ip6mr_sk_done(struct sock *sk)
1249{
1250 int err = 0;
8229efda 1251 struct net *net = sock_net(sk);
7bc570c8
YH
1252
1253 rtnl_lock();
8229efda 1254 if (sk == net->ipv6.mroute6_sk) {
7bc570c8 1255 write_lock_bh(&mrt_lock);
8229efda 1256 net->ipv6.mroute6_sk = NULL;
1d6e55f1 1257 net->ipv6.devconf_all->mc_forwarding--;
7bc570c8
YH
1258 write_unlock_bh(&mrt_lock);
1259
8229efda 1260 mroute_clean_tables(net);
7bc570c8
YH
1261 } else
1262 err = -EACCES;
1263 rtnl_unlock();
1264
1265 return err;
1266}
1267
1268/*
1269 * Socket options and virtual interface manipulation. The whole
1270 * virtual interface system is a complete heap, but unfortunately
1271 * that's how BSD mrouted happens to think. Maybe one day with a proper
1272 * MOSPF/PIM router set up we can clean this up.
1273 */
1274
1275int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1276{
1277 int ret;
1278 struct mif6ctl vif;
1279 struct mf6cctl mfc;
1280 mifi_t mifi;
8229efda 1281 struct net *net = sock_net(sk);
7bc570c8
YH
1282
1283 if (optname != MRT6_INIT) {
8229efda 1284 if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
7bc570c8
YH
1285 return -EACCES;
1286 }
1287
1288 switch (optname) {
1289 case MRT6_INIT:
1290 if (sk->sk_type != SOCK_RAW ||
1291 inet_sk(sk)->num != IPPROTO_ICMPV6)
1292 return -EOPNOTSUPP;
1293 if (optlen < sizeof(int))
1294 return -EINVAL;
1295
1296 return ip6mr_sk_init(sk);
1297
1298 case MRT6_DONE:
1299 return ip6mr_sk_done(sk);
1300
1301 case MRT6_ADD_MIF:
1302 if (optlen < sizeof(vif))
1303 return -EINVAL;
1304 if (copy_from_user(&vif, optval, sizeof(vif)))
1305 return -EFAULT;
6ac7eb08 1306 if (vif.mif6c_mifi >= MAXMIFS)
7bc570c8
YH
1307 return -ENFILE;
1308 rtnl_lock();
8229efda 1309 ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
7bc570c8
YH
1310 rtnl_unlock();
1311 return ret;
1312
1313 case MRT6_DEL_MIF:
1314 if (optlen < sizeof(mifi_t))
1315 return -EINVAL;
1316 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1317 return -EFAULT;
1318 rtnl_lock();
8229efda 1319 ret = mif6_delete(net, mifi);
7bc570c8
YH
1320 rtnl_unlock();
1321 return ret;
1322
1323 /*
1324 * Manipulate the forwarding caches. These live
1325 * in a sort of kernel/user symbiosis.
1326 */
1327 case MRT6_ADD_MFC:
1328 case MRT6_DEL_MFC:
1329 if (optlen < sizeof(mfc))
1330 return -EINVAL;
1331 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1332 return -EFAULT;
1333 rtnl_lock();
1334 if (optname == MRT6_DEL_MFC)
8229efda 1335 ret = ip6mr_mfc_delete(net, &mfc);
7bc570c8 1336 else
8229efda
BT
1337 ret = ip6mr_mfc_add(net, &mfc,
1338 sk == net->ipv6.mroute6_sk);
7bc570c8
YH
1339 rtnl_unlock();
1340 return ret;
1341
14fb64e1
YH
1342 /*
1343 * Control PIM assert (to activate pim will activate assert)
1344 */
1345 case MRT6_ASSERT:
1346 {
1347 int v;
1348 if (get_user(v, (int __user *)optval))
1349 return -EFAULT;
8229efda 1350 net->ipv6.mroute_do_assert = !!v;
14fb64e1
YH
1351 return 0;
1352 }
1353
1354#ifdef CONFIG_IPV6_PIMSM_V2
1355 case MRT6_PIM:
1356 {
a9f83bf3 1357 int v;
14fb64e1
YH
1358 if (get_user(v, (int __user *)optval))
1359 return -EFAULT;
1360 v = !!v;
1361 rtnl_lock();
1362 ret = 0;
8229efda
BT
1363 if (v != net->ipv6.mroute_do_pim) {
1364 net->ipv6.mroute_do_pim = v;
1365 net->ipv6.mroute_do_assert = v;
1366 if (net->ipv6.mroute_do_pim)
14fb64e1
YH
1367 ret = inet6_add_protocol(&pim6_protocol,
1368 IPPROTO_PIM);
1369 else
1370 ret = inet6_del_protocol(&pim6_protocol,
1371 IPPROTO_PIM);
1372 if (ret < 0)
1373 ret = -EAGAIN;
1374 }
1375 rtnl_unlock();
1376 return ret;
1377 }
1378
1379#endif
7bc570c8 1380 /*
7d120c55 1381 * Spurious command, or MRT6_VERSION which you cannot
7bc570c8
YH
1382 * set.
1383 */
1384 default:
1385 return -ENOPROTOOPT;
1386 }
1387}
1388
1389/*
1390 * Getsock opt support for the multicast routing system.
1391 */
1392
1393int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1394 int __user *optlen)
1395{
1396 int olr;
1397 int val;
8229efda 1398 struct net *net = sock_net(sk);
7bc570c8
YH
1399
1400 switch (optname) {
1401 case MRT6_VERSION:
1402 val = 0x0305;
1403 break;
14fb64e1
YH
1404#ifdef CONFIG_IPV6_PIMSM_V2
1405 case MRT6_PIM:
8229efda 1406 val = net->ipv6.mroute_do_pim;
14fb64e1
YH
1407 break;
1408#endif
1409 case MRT6_ASSERT:
8229efda 1410 val = net->ipv6.mroute_do_assert;
14fb64e1 1411 break;
7bc570c8
YH
1412 default:
1413 return -ENOPROTOOPT;
1414 }
1415
1416 if (get_user(olr, optlen))
1417 return -EFAULT;
1418
1419 olr = min_t(int, olr, sizeof(int));
1420 if (olr < 0)
1421 return -EINVAL;
1422
1423 if (put_user(olr, optlen))
1424 return -EFAULT;
1425 if (copy_to_user(optval, &val, olr))
1426 return -EFAULT;
1427 return 0;
1428}
1429
1430/*
1431 * The IP multicast ioctl support routines.
1432 */
1433
1434int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1435{
1436 struct sioc_sg_req6 sr;
1437 struct sioc_mif_req6 vr;
1438 struct mif_device *vif;
1439 struct mfc6_cache *c;
8229efda 1440 struct net *net = sock_net(sk);
7bc570c8
YH
1441
1442 switch (cmd) {
1443 case SIOCGETMIFCNT_IN6:
1444 if (copy_from_user(&vr, arg, sizeof(vr)))
1445 return -EFAULT;
8229efda 1446 if (vr.mifi >= net->ipv6.maxvif)
7bc570c8
YH
1447 return -EINVAL;
1448 read_lock(&mrt_lock);
8229efda
BT
1449 vif = &net->ipv6.vif6_table[vr.mifi];
1450 if (MIF_EXISTS(net, vr.mifi)) {
7bc570c8
YH
1451 vr.icount = vif->pkt_in;
1452 vr.ocount = vif->pkt_out;
1453 vr.ibytes = vif->bytes_in;
1454 vr.obytes = vif->bytes_out;
1455 read_unlock(&mrt_lock);
1456
1457 if (copy_to_user(arg, &vr, sizeof(vr)))
1458 return -EFAULT;
1459 return 0;
1460 }
1461 read_unlock(&mrt_lock);
1462 return -EADDRNOTAVAIL;
1463 case SIOCGETSGCNT_IN6:
1464 if (copy_from_user(&sr, arg, sizeof(sr)))
1465 return -EFAULT;
1466
1467 read_lock(&mrt_lock);
8229efda 1468 c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
7bc570c8
YH
1469 if (c) {
1470 sr.pktcnt = c->mfc_un.res.pkt;
1471 sr.bytecnt = c->mfc_un.res.bytes;
1472 sr.wrong_if = c->mfc_un.res.wrong_if;
1473 read_unlock(&mrt_lock);
1474
1475 if (copy_to_user(arg, &sr, sizeof(sr)))
1476 return -EFAULT;
1477 return 0;
1478 }
1479 read_unlock(&mrt_lock);
1480 return -EADDRNOTAVAIL;
1481 default:
1482 return -ENOIOCTLCMD;
1483 }
1484}
1485
1486
1487static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1488{
483a47d2
DL
1489 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1490 IPSTATS_MIB_OUTFORWDATAGRAMS);
7bc570c8
YH
1491 return dst_output(skb);
1492}
1493
1494/*
1495 * Processing handlers for ip6mr_forward
1496 */
1497
1498static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1499{
1500 struct ipv6hdr *ipv6h;
8229efda
BT
1501 struct net *net = mfc6_net(c);
1502 struct mif_device *vif = &net->ipv6.vif6_table[vifi];
7bc570c8
YH
1503 struct net_device *dev;
1504 struct dst_entry *dst;
1505 struct flowi fl;
1506
1507 if (vif->dev == NULL)
1508 goto out_free;
1509
14fb64e1
YH
1510#ifdef CONFIG_IPV6_PIMSM_V2
1511 if (vif->flags & MIFF_REGISTER) {
1512 vif->pkt_out++;
1513 vif->bytes_out += skb->len;
dc58c78c
PE
1514 vif->dev->stats.tx_bytes += skb->len;
1515 vif->dev->stats.tx_packets++;
8229efda 1516 ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
8da73b73 1517 goto out_free;
14fb64e1
YH
1518 }
1519#endif
1520
7bc570c8
YH
1521 ipv6h = ipv6_hdr(skb);
1522
1523 fl = (struct flowi) {
1524 .oif = vif->link,
1525 .nl_u = { .ip6_u =
1526 { .daddr = ipv6h->daddr, }
1527 }
1528 };
1529
8229efda 1530 dst = ip6_route_output(net, NULL, &fl);
7bc570c8
YH
1531 if (!dst)
1532 goto out_free;
1533
1534 dst_release(skb->dst);
1535 skb->dst = dst;
1536
1537 /*
1538 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1539 * not only before forwarding, but after forwarding on all output
1540 * interfaces. It is clear, if mrouter runs a multicasting
1541 * program, it should receive packets not depending to what interface
1542 * program is joined.
1543 * If we will not make it, the program will have to join on all
1544 * interfaces. On the other hand, multihoming host (or router, but
1545 * not mrouter) cannot join to more than one interface - it will
1546 * result in receiving multiple packets.
1547 */
1548 dev = vif->dev;
1549 skb->dev = dev;
1550 vif->pkt_out++;
1551 vif->bytes_out += skb->len;
1552
1553 /* We are about to write */
1554 /* XXX: extension headers? */
1555 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1556 goto out_free;
1557
1558 ipv6h = ipv6_hdr(skb);
1559 ipv6h->hop_limit--;
1560
1561 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1562
1563 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1564 ip6mr_forward2_finish);
1565
1566out_free:
1567 kfree_skb(skb);
1568 return 0;
1569}
1570
1571static int ip6mr_find_vif(struct net_device *dev)
1572{
8229efda 1573 struct net *net = dev_net(dev);
7bc570c8 1574 int ct;
8229efda
BT
1575 for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
1576 if (net->ipv6.vif6_table[ct].dev == dev)
7bc570c8
YH
1577 break;
1578 }
1579 return ct;
1580}
1581
1582static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1583{
1584 int psend = -1;
1585 int vif, ct;
8229efda 1586 struct net *net = mfc6_net(cache);
7bc570c8
YH
1587
1588 vif = cache->mf6c_parent;
1589 cache->mfc_un.res.pkt++;
1590 cache->mfc_un.res.bytes += skb->len;
1591
14fb64e1
YH
1592 /*
1593 * Wrong interface: drop packet and (maybe) send PIM assert.
1594 */
8229efda 1595 if (net->ipv6.vif6_table[vif].dev != skb->dev) {
14fb64e1
YH
1596 int true_vifi;
1597
1598 cache->mfc_un.res.wrong_if++;
1599 true_vifi = ip6mr_find_vif(skb->dev);
1600
8229efda 1601 if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
14fb64e1
YH
1602 /* pimsm uses asserts, when switching from RPT to SPT,
1603 so that we cannot check that packet arrived on an oif.
1604 It is bad, but otherwise we would need to move pretty
1605 large chunk of pimd to kernel. Ough... --ANK
1606 */
8229efda 1607 (net->ipv6.mroute_do_pim ||
a21f3f99 1608 cache->mfc_un.res.ttls[true_vifi] < 255) &&
14fb64e1
YH
1609 time_after(jiffies,
1610 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1611 cache->mfc_un.res.last_assert = jiffies;
8229efda 1612 ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
14fb64e1
YH
1613 }
1614 goto dont_forward;
1615 }
1616
8229efda
BT
1617 net->ipv6.vif6_table[vif].pkt_in++;
1618 net->ipv6.vif6_table[vif].bytes_in += skb->len;
7bc570c8
YH
1619
1620 /*
1621 * Forward the frame
1622 */
1623 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1624 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1625 if (psend != -1) {
1626 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1627 if (skb2)
1628 ip6mr_forward2(skb2, cache, psend);
1629 }
1630 psend = ct;
1631 }
1632 }
1633 if (psend != -1) {
1634 ip6mr_forward2(skb, cache, psend);
1635 return 0;
1636 }
1637
14fb64e1 1638dont_forward:
7bc570c8
YH
1639 kfree_skb(skb);
1640 return 0;
1641}
1642
1643
1644/*
1645 * Multicast packets for forwarding arrive here
1646 */
1647
1648int ip6_mr_input(struct sk_buff *skb)
1649{
1650 struct mfc6_cache *cache;
8229efda 1651 struct net *net = dev_net(skb->dev);
7bc570c8
YH
1652
1653 read_lock(&mrt_lock);
8229efda
BT
1654 cache = ip6mr_cache_find(net,
1655 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
7bc570c8
YH
1656
1657 /*
1658 * No usable cache entry
1659 */
1660 if (cache == NULL) {
1661 int vif;
1662
1663 vif = ip6mr_find_vif(skb->dev);
1664 if (vif >= 0) {
8229efda 1665 int err = ip6mr_cache_unresolved(net, vif, skb);
7bc570c8
YH
1666 read_unlock(&mrt_lock);
1667
1668 return err;
1669 }
1670 read_unlock(&mrt_lock);
1671 kfree_skb(skb);
1672 return -ENODEV;
1673 }
1674
1675 ip6_mr_forward(skb, cache);
1676
1677 read_unlock(&mrt_lock);
1678
1679 return 0;
1680}
1681
1682
1683static int
1684ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1685{
1686 int ct;
1687 struct rtnexthop *nhp;
8229efda
BT
1688 struct net *net = mfc6_net(c);
1689 struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev;
549e028d 1690 u8 *b = skb_tail_pointer(skb);
7bc570c8
YH
1691 struct rtattr *mp_head;
1692
1693 if (dev)
1694 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1695
1696 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1697
1698 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1699 if (c->mfc_un.res.ttls[ct] < 255) {
1700 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1701 goto rtattr_failure;
1702 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1703 nhp->rtnh_flags = 0;
1704 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
8229efda 1705 nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
7bc570c8
YH
1706 nhp->rtnh_len = sizeof(*nhp);
1707 }
1708 }
1709 mp_head->rta_type = RTA_MULTIPATH;
549e028d 1710 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
7bc570c8
YH
1711 rtm->rtm_type = RTN_MULTICAST;
1712 return 1;
1713
1714rtattr_failure:
1715 nlmsg_trim(skb, b);
1716 return -EMSGSIZE;
1717}
1718
8229efda
BT
1719int ip6mr_get_route(struct net *net,
1720 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
7bc570c8
YH
1721{
1722 int err;
1723 struct mfc6_cache *cache;
1724 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1725
1726 read_lock(&mrt_lock);
8229efda 1727 cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
7bc570c8
YH
1728
1729 if (!cache) {
1730 struct sk_buff *skb2;
1731 struct ipv6hdr *iph;
1732 struct net_device *dev;
1733 int vif;
1734
1735 if (nowait) {
1736 read_unlock(&mrt_lock);
1737 return -EAGAIN;
1738 }
1739
1740 dev = skb->dev;
1741 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1742 read_unlock(&mrt_lock);
1743 return -ENODEV;
1744 }
1745
1746 /* really correct? */
1747 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1748 if (!skb2) {
1749 read_unlock(&mrt_lock);
1750 return -ENOMEM;
1751 }
1752
1753 skb_reset_transport_header(skb2);
1754
1755 skb_put(skb2, sizeof(struct ipv6hdr));
1756 skb_reset_network_header(skb2);
1757
1758 iph = ipv6_hdr(skb2);
1759 iph->version = 0;
1760 iph->priority = 0;
1761 iph->flow_lbl[0] = 0;
1762 iph->flow_lbl[1] = 0;
1763 iph->flow_lbl[2] = 0;
1764 iph->payload_len = 0;
1765 iph->nexthdr = IPPROTO_NONE;
1766 iph->hop_limit = 0;
1767 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1768 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1769
8229efda 1770 err = ip6mr_cache_unresolved(net, vif, skb2);
7bc570c8
YH
1771 read_unlock(&mrt_lock);
1772
1773 return err;
1774 }
1775
1776 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1777 cache->mfc_flags |= MFC_NOTIFY;
1778
1779 err = ip6mr_fill_mroute(skb, cache, rtm);
1780 read_unlock(&mrt_lock);
1781 return err;
1782}
1783