]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6mr.c
netns: ip6mr: dynamically allocate mfc6_cache_array
[net-next-2.6.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
7bc570c8
YH
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
7bc570c8
YH
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
7bc570c8 35#include <linux/init.h>
7bc570c8
YH
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
7bc570c8 39#include <net/raw.h>
7bc570c8
YH
40#include <linux/notifier.h>
41#include <linux/if_arp.h>
7bc570c8
YH
42#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
14fb64e1 48#include <linux/pim.h>
7bc570c8
YH
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
7bc570c8
YH
52/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
4e16880c 62#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
7bc570c8 63
14fb64e1
YH
64static int mroute_do_assert; /* Set in PIM assert */
65#ifdef CONFIG_IPV6_PIMSM_V2
66static int mroute_do_pim;
67#else
68#define mroute_do_pim 0
69#endif
70
7bc570c8
YH
71static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
72static atomic_t cache_resolve_queue_len; /* Size of unresolved */
73
74/* Special spinlock for queue of unresolved entries */
75static DEFINE_SPINLOCK(mfc_unres_lock);
76
77/* We return to original Alan's scheme. Hash table of resolved
78 entries is changed only in process context and protected
79 with weak lock mrt_lock. Queue of unresolved entries is protected
80 with strong spinlock mfc_unres_lock.
81
82 In this case data path is free of exclusive locks at all.
83 */
84
85static struct kmem_cache *mrt_cachep __read_mostly;
86
87static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
6ac7eb08 88static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
7bc570c8
YH
89static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
90
14fb64e1
YH
91#ifdef CONFIG_IPV6_PIMSM_V2
92static struct inet6_protocol pim6_protocol;
93#endif
94
7bc570c8
YH
95static struct timer_list ipmr_expire_timer;
96
97
98#ifdef CONFIG_PROC_FS
99
100struct ipmr_mfc_iter {
101 struct mfc6_cache **cache;
102 int ct;
103};
104
105
106static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
107{
108 struct mfc6_cache *mfc;
109
4a6258a0 110 it->cache = init_net.ipv6.mfc6_cache_array;
7bc570c8 111 read_lock(&mrt_lock);
4a6258a0
BT
112 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
113 for (mfc = init_net.ipv6.mfc6_cache_array[it->ct];
114 mfc; mfc = mfc->next)
7bc570c8
YH
115 if (pos-- == 0)
116 return mfc;
117 read_unlock(&mrt_lock);
118
119 it->cache = &mfc_unres_queue;
120 spin_lock_bh(&mfc_unres_lock);
121 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
122 if (pos-- == 0)
123 return mfc;
124 spin_unlock_bh(&mfc_unres_lock);
125
126 it->cache = NULL;
127 return NULL;
128}
129
130
131
132
133/*
134 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
135 */
136
137struct ipmr_vif_iter {
138 int ct;
139};
140
141static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
142 loff_t pos)
143{
4e16880c
BT
144 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
145 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8
YH
146 continue;
147 if (pos-- == 0)
4e16880c 148 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
149 }
150 return NULL;
151}
152
153static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
154 __acquires(mrt_lock)
155{
156 read_lock(&mrt_lock);
157 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
158 : SEQ_START_TOKEN);
159}
160
161static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
162{
163 struct ipmr_vif_iter *iter = seq->private;
164
165 ++*pos;
166 if (v == SEQ_START_TOKEN)
167 return ip6mr_vif_seq_idx(iter, 0);
168
4e16880c
BT
169 while (++iter->ct < init_net.ipv6.maxvif) {
170 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8 171 continue;
4e16880c 172 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
173 }
174 return NULL;
175}
176
177static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
178 __releases(mrt_lock)
179{
180 read_unlock(&mrt_lock);
181}
182
183static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
184{
185 if (v == SEQ_START_TOKEN) {
186 seq_puts(seq,
187 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
188 } else {
189 const struct mif_device *vif = v;
190 const char *name = vif->dev ? vif->dev->name : "none";
191
192 seq_printf(seq,
d430a227 193 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
4e16880c 194 vif - init_net.ipv6.vif6_table,
7bc570c8
YH
195 name, vif->bytes_in, vif->pkt_in,
196 vif->bytes_out, vif->pkt_out,
197 vif->flags);
198 }
199 return 0;
200}
201
202static struct seq_operations ip6mr_vif_seq_ops = {
203 .start = ip6mr_vif_seq_start,
204 .next = ip6mr_vif_seq_next,
205 .stop = ip6mr_vif_seq_stop,
206 .show = ip6mr_vif_seq_show,
207};
208
209static int ip6mr_vif_open(struct inode *inode, struct file *file)
210{
211 return seq_open_private(file, &ip6mr_vif_seq_ops,
212 sizeof(struct ipmr_vif_iter));
213}
214
215static struct file_operations ip6mr_vif_fops = {
216 .owner = THIS_MODULE,
217 .open = ip6mr_vif_open,
218 .read = seq_read,
219 .llseek = seq_lseek,
eedd726e 220 .release = seq_release_private,
7bc570c8
YH
221};
222
223static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
224{
225 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
226 : SEQ_START_TOKEN);
227}
228
229static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
230{
231 struct mfc6_cache *mfc = v;
232 struct ipmr_mfc_iter *it = seq->private;
233
234 ++*pos;
235
236 if (v == SEQ_START_TOKEN)
237 return ipmr_mfc_seq_idx(seq->private, 0);
238
239 if (mfc->next)
240 return mfc->next;
241
242 if (it->cache == &mfc_unres_queue)
243 goto end_of_list;
244
4a6258a0 245 BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array);
7bc570c8 246
4a6258a0
BT
247 while (++it->ct < MFC6_LINES) {
248 mfc = init_net.ipv6.mfc6_cache_array[it->ct];
7bc570c8
YH
249 if (mfc)
250 return mfc;
251 }
252
253 /* exhausted cache_array, show unresolved */
254 read_unlock(&mrt_lock);
255 it->cache = &mfc_unres_queue;
256 it->ct = 0;
257
258 spin_lock_bh(&mfc_unres_lock);
259 mfc = mfc_unres_queue;
260 if (mfc)
261 return mfc;
262
263 end_of_list:
264 spin_unlock_bh(&mfc_unres_lock);
265 it->cache = NULL;
266
267 return NULL;
268}
269
270static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
271{
272 struct ipmr_mfc_iter *it = seq->private;
273
274 if (it->cache == &mfc_unres_queue)
275 spin_unlock_bh(&mfc_unres_lock);
4a6258a0 276 else if (it->cache == init_net.ipv6.mfc6_cache_array)
7bc570c8
YH
277 read_unlock(&mrt_lock);
278}
279
280static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
281{
282 int n;
283
284 if (v == SEQ_START_TOKEN) {
285 seq_puts(seq,
286 "Group "
287 "Origin "
288 "Iif Pkts Bytes Wrong Oifs\n");
289 } else {
290 const struct mfc6_cache *mfc = v;
291 const struct ipmr_mfc_iter *it = seq->private;
292
999890b2 293 seq_printf(seq, "%pI6 %pI6 %-3hd",
0c6ce78a 294 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
1ea472e2 295 mfc->mf6c_parent);
7bc570c8
YH
296
297 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
298 seq_printf(seq, " %8lu %8lu %8lu",
299 mfc->mfc_un.res.pkt,
300 mfc->mfc_un.res.bytes,
301 mfc->mfc_un.res.wrong_if);
7bc570c8
YH
302 for (n = mfc->mfc_un.res.minvif;
303 n < mfc->mfc_un.res.maxvif; n++) {
4e16880c 304 if (MIF_EXISTS(&init_net, n) &&
7bc570c8
YH
305 mfc->mfc_un.res.ttls[n] < 255)
306 seq_printf(seq,
307 " %2d:%-3d",
308 n, mfc->mfc_un.res.ttls[n]);
309 }
1ea472e2
BT
310 } else {
311 /* unresolved mfc_caches don't contain
312 * pkt, bytes and wrong_if values
313 */
314 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
7bc570c8
YH
315 }
316 seq_putc(seq, '\n');
317 }
318 return 0;
319}
320
321static struct seq_operations ipmr_mfc_seq_ops = {
322 .start = ipmr_mfc_seq_start,
323 .next = ipmr_mfc_seq_next,
324 .stop = ipmr_mfc_seq_stop,
325 .show = ipmr_mfc_seq_show,
326};
327
328static int ipmr_mfc_open(struct inode *inode, struct file *file)
329{
330 return seq_open_private(file, &ipmr_mfc_seq_ops,
331 sizeof(struct ipmr_mfc_iter));
332}
333
334static struct file_operations ip6mr_mfc_fops = {
335 .owner = THIS_MODULE,
336 .open = ipmr_mfc_open,
337 .read = seq_read,
338 .llseek = seq_lseek,
eedd726e 339 .release = seq_release_private,
7bc570c8
YH
340};
341#endif
342
14fb64e1
YH
343#ifdef CONFIG_IPV6_PIMSM_V2
344static int reg_vif_num = -1;
345
346static int pim6_rcv(struct sk_buff *skb)
347{
348 struct pimreghdr *pim;
349 struct ipv6hdr *encap;
350 struct net_device *reg_dev = NULL;
351
352 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
353 goto drop;
354
355 pim = (struct pimreghdr *)skb_transport_header(skb);
356 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
357 (pim->flags & PIM_NULL_REGISTER) ||
358 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
ec6b486f 359 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
14fb64e1
YH
360 goto drop;
361
362 /* check if the inner packet is destined to mcast group */
363 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
364 sizeof(*pim));
365
366 if (!ipv6_addr_is_multicast(&encap->daddr) ||
367 encap->payload_len == 0 ||
368 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
369 goto drop;
370
371 read_lock(&mrt_lock);
372 if (reg_vif_num >= 0)
4e16880c 373 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
14fb64e1
YH
374 if (reg_dev)
375 dev_hold(reg_dev);
376 read_unlock(&mrt_lock);
377
378 if (reg_dev == NULL)
379 goto drop;
380
381 skb->mac_header = skb->network_header;
382 skb_pull(skb, (u8 *)encap - skb->data);
383 skb_reset_network_header(skb);
384 skb->dev = reg_dev;
385 skb->protocol = htons(ETH_P_IP);
386 skb->ip_summed = 0;
387 skb->pkt_type = PACKET_HOST;
388 dst_release(skb->dst);
dc58c78c
PE
389 reg_dev->stats.rx_bytes += skb->len;
390 reg_dev->stats.rx_packets++;
14fb64e1
YH
391 skb->dst = NULL;
392 nf_reset(skb);
393 netif_rx(skb);
394 dev_put(reg_dev);
395 return 0;
396 drop:
397 kfree_skb(skb);
398 return 0;
399}
400
401static struct inet6_protocol pim6_protocol = {
402 .handler = pim6_rcv,
403};
404
405/* Service routines creating virtual interfaces: PIMREG */
406
407static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
408{
409 read_lock(&mrt_lock);
dc58c78c
PE
410 dev->stats.tx_bytes += skb->len;
411 dev->stats.tx_packets++;
14fb64e1
YH
412 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
413 read_unlock(&mrt_lock);
414 kfree_skb(skb);
415 return 0;
416}
417
007c3838
SH
418static const struct net_device_ops reg_vif_netdev_ops = {
419 .ndo_start_xmit = reg_vif_xmit,
420};
421
14fb64e1
YH
422static void reg_vif_setup(struct net_device *dev)
423{
424 dev->type = ARPHRD_PIMREG;
425 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
426 dev->flags = IFF_NOARP;
007c3838 427 dev->netdev_ops = &reg_vif_netdev_ops;
14fb64e1
YH
428 dev->destructor = free_netdev;
429}
430
431static struct net_device *ip6mr_reg_vif(void)
432{
433 struct net_device *dev;
14fb64e1 434
dc58c78c 435 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
14fb64e1
YH
436 if (dev == NULL)
437 return NULL;
438
439 if (register_netdevice(dev)) {
440 free_netdev(dev);
441 return NULL;
442 }
443 dev->iflink = 0;
444
14fb64e1
YH
445 if (dev_open(dev))
446 goto failure;
447
7af3db78 448 dev_hold(dev);
14fb64e1
YH
449 return dev;
450
451failure:
452 /* allow the register to be completed before unregistering. */
453 rtnl_unlock();
454 rtnl_lock();
455
456 unregister_netdevice(dev);
457 return NULL;
458}
459#endif
460
7bc570c8
YH
461/*
462 * Delete a VIF entry
463 */
464
465static int mif6_delete(int vifi)
466{
467 struct mif_device *v;
468 struct net_device *dev;
4e16880c 469 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
7bc570c8
YH
470 return -EADDRNOTAVAIL;
471
4e16880c 472 v = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
473
474 write_lock_bh(&mrt_lock);
475 dev = v->dev;
476 v->dev = NULL;
477
478 if (!dev) {
479 write_unlock_bh(&mrt_lock);
480 return -EADDRNOTAVAIL;
481 }
482
14fb64e1
YH
483#ifdef CONFIG_IPV6_PIMSM_V2
484 if (vifi == reg_vif_num)
485 reg_vif_num = -1;
486#endif
487
4e16880c 488 if (vifi + 1 == init_net.ipv6.maxvif) {
7bc570c8
YH
489 int tmp;
490 for (tmp = vifi - 1; tmp >= 0; tmp--) {
4e16880c 491 if (MIF_EXISTS(&init_net, tmp))
7bc570c8
YH
492 break;
493 }
4e16880c 494 init_net.ipv6.maxvif = tmp + 1;
7bc570c8
YH
495 }
496
497 write_unlock_bh(&mrt_lock);
498
499 dev_set_allmulti(dev, -1);
500
501 if (v->flags & MIFF_REGISTER)
502 unregister_netdevice(dev);
503
504 dev_put(dev);
505 return 0;
506}
507
58701ad4
BT
508static inline void ip6mr_cache_free(struct mfc6_cache *c)
509{
510 release_net(mfc6_net(c));
511 kmem_cache_free(mrt_cachep, c);
512}
513
7bc570c8
YH
514/* Destroy an unresolved cache entry, killing queued skbs
515 and reporting error to netlink readers.
516 */
517
518static void ip6mr_destroy_unres(struct mfc6_cache *c)
519{
520 struct sk_buff *skb;
521
522 atomic_dec(&cache_resolve_queue_len);
523
524 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
525 if (ipv6_hdr(skb)->version == 0) {
526 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
527 nlh->nlmsg_type = NLMSG_ERROR;
528 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
529 skb_trim(skb, nlh->nlmsg_len);
530 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
531 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
532 } else
533 kfree_skb(skb);
534 }
535
58701ad4 536 ip6mr_cache_free(c);
7bc570c8
YH
537}
538
539
540/* Single timer process for all the unresolved queue. */
541
542static void ipmr_do_expire_process(unsigned long dummy)
543{
544 unsigned long now = jiffies;
545 unsigned long expires = 10 * HZ;
546 struct mfc6_cache *c, **cp;
547
548 cp = &mfc_unres_queue;
549
550 while ((c = *cp) != NULL) {
551 if (time_after(c->mfc_un.unres.expires, now)) {
552 /* not yet... */
553 unsigned long interval = c->mfc_un.unres.expires - now;
554 if (interval < expires)
555 expires = interval;
556 cp = &c->next;
557 continue;
558 }
559
560 *cp = c->next;
561 ip6mr_destroy_unres(c);
562 }
563
564 if (atomic_read(&cache_resolve_queue_len))
565 mod_timer(&ipmr_expire_timer, jiffies + expires);
566}
567
568static void ipmr_expire_process(unsigned long dummy)
569{
570 if (!spin_trylock(&mfc_unres_lock)) {
571 mod_timer(&ipmr_expire_timer, jiffies + 1);
572 return;
573 }
574
575 if (atomic_read(&cache_resolve_queue_len))
576 ipmr_do_expire_process(dummy);
577
578 spin_unlock(&mfc_unres_lock);
579}
580
581/* Fill oifs list. It is called under write locked mrt_lock. */
582
583static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
584{
585 int vifi;
586
6ac7eb08 587 cache->mfc_un.res.minvif = MAXMIFS;
7bc570c8 588 cache->mfc_un.res.maxvif = 0;
6ac7eb08 589 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
7bc570c8 590
4e16880c
BT
591 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
592 if (MIF_EXISTS(&init_net, vifi) &&
593 ttls[vifi] && ttls[vifi] < 255) {
7bc570c8
YH
594 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
595 if (cache->mfc_un.res.minvif > vifi)
596 cache->mfc_un.res.minvif = vifi;
597 if (cache->mfc_un.res.maxvif <= vifi)
598 cache->mfc_un.res.maxvif = vifi + 1;
599 }
600 }
601}
602
603static int mif6_add(struct mif6ctl *vifc, int mrtsock)
604{
605 int vifi = vifc->mif6c_mifi;
4e16880c 606 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
7bc570c8 607 struct net_device *dev;
5ae7b444 608 int err;
7bc570c8
YH
609
610 /* Is vif busy ? */
4e16880c 611 if (MIF_EXISTS(&init_net, vifi))
7bc570c8
YH
612 return -EADDRINUSE;
613
614 switch (vifc->mif6c_flags) {
14fb64e1
YH
615#ifdef CONFIG_IPV6_PIMSM_V2
616 case MIFF_REGISTER:
617 /*
618 * Special Purpose VIF in PIM
619 * All the packets will be sent to the daemon
620 */
621 if (reg_vif_num >= 0)
622 return -EADDRINUSE;
623 dev = ip6mr_reg_vif();
624 if (!dev)
625 return -ENOBUFS;
5ae7b444
WC
626 err = dev_set_allmulti(dev, 1);
627 if (err) {
628 unregister_netdevice(dev);
7af3db78 629 dev_put(dev);
5ae7b444
WC
630 return err;
631 }
14fb64e1
YH
632 break;
633#endif
7bc570c8
YH
634 case 0:
635 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
636 if (!dev)
637 return -EADDRNOTAVAIL;
5ae7b444 638 err = dev_set_allmulti(dev, 1);
7af3db78
WC
639 if (err) {
640 dev_put(dev);
5ae7b444 641 return err;
7af3db78 642 }
7bc570c8
YH
643 break;
644 default:
645 return -EINVAL;
646 }
647
7bc570c8
YH
648 /*
649 * Fill in the VIF structures
650 */
651 v->rate_limit = vifc->vifc_rate_limit;
652 v->flags = vifc->mif6c_flags;
653 if (!mrtsock)
654 v->flags |= VIFF_STATIC;
655 v->threshold = vifc->vifc_threshold;
656 v->bytes_in = 0;
657 v->bytes_out = 0;
658 v->pkt_in = 0;
659 v->pkt_out = 0;
660 v->link = dev->ifindex;
661 if (v->flags & MIFF_REGISTER)
662 v->link = dev->iflink;
663
664 /* And finish update writing critical data */
665 write_lock_bh(&mrt_lock);
7bc570c8 666 v->dev = dev;
14fb64e1
YH
667#ifdef CONFIG_IPV6_PIMSM_V2
668 if (v->flags & MIFF_REGISTER)
669 reg_vif_num = vifi;
670#endif
4e16880c
BT
671 if (vifi + 1 > init_net.ipv6.maxvif)
672 init_net.ipv6.maxvif = vifi + 1;
7bc570c8
YH
673 write_unlock_bh(&mrt_lock);
674 return 0;
675}
676
677static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
678{
679 int line = MFC6_HASH(mcastgrp, origin);
680 struct mfc6_cache *c;
681
4a6258a0 682 for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
7bc570c8
YH
683 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
684 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
685 break;
686 }
687 return c;
688}
689
690/*
691 * Allocate a multicast cache entry
692 */
58701ad4 693static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
7bc570c8 694{
36cbac59 695 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
7bc570c8
YH
696 if (c == NULL)
697 return NULL;
6ac7eb08 698 c->mfc_un.res.minvif = MAXMIFS;
58701ad4 699 mfc6_net_set(c, net);
7bc570c8
YH
700 return c;
701}
702
58701ad4 703static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
7bc570c8 704{
36cbac59 705 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
7bc570c8
YH
706 if (c == NULL)
707 return NULL;
7bc570c8
YH
708 skb_queue_head_init(&c->mfc_un.unres.unresolved);
709 c->mfc_un.unres.expires = jiffies + 10 * HZ;
58701ad4 710 mfc6_net_set(c, net);
7bc570c8
YH
711 return c;
712}
713
714/*
715 * A cache entry has gone into a resolved state from queued
716 */
717
718static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
719{
720 struct sk_buff *skb;
721
722 /*
723 * Play the pending entries through our router
724 */
725
726 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
727 if (ipv6_hdr(skb)->version == 0) {
728 int err;
729 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
730
731 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
549e028d 732 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
7bc570c8
YH
733 } else {
734 nlh->nlmsg_type = NLMSG_ERROR;
735 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
736 skb_trim(skb, nlh->nlmsg_len);
737 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
738 }
739 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
740 } else
741 ip6_mr_forward(skb, c);
742 }
743}
744
745/*
746 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
747 * expects the following bizarre scheme.
748 *
749 * Called under mrt_lock.
750 */
751
6ac7eb08 752static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
7bc570c8
YH
753{
754 struct sk_buff *skb;
755 struct mrt6msg *msg;
756 int ret;
757
14fb64e1
YH
758#ifdef CONFIG_IPV6_PIMSM_V2
759 if (assert == MRT6MSG_WHOLEPKT)
760 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
761 +sizeof(*msg));
762 else
763#endif
764 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
765
766 if (!skb)
767 return -ENOBUFS;
768
769 /* I suppose that internal messages
770 * do not require checksums */
771
772 skb->ip_summed = CHECKSUM_UNNECESSARY;
773
14fb64e1
YH
774#ifdef CONFIG_IPV6_PIMSM_V2
775 if (assert == MRT6MSG_WHOLEPKT) {
776 /* Ugly, but we have no choice with this interface.
777 Duplicate old header, fix length etc.
778 And all this only to mangle msg->im6_msgtype and
779 to set msg->im6_mbz to "mbz" :-)
780 */
781 skb_push(skb, -skb_network_offset(pkt));
782
783 skb_push(skb, sizeof(*msg));
784 skb_reset_transport_header(skb);
785 msg = (struct mrt6msg *)skb_transport_header(skb);
786 msg->im6_mbz = 0;
787 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
788 msg->im6_mif = reg_vif_num;
789 msg->im6_pad = 0;
790 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
791 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
792
793 skb->ip_summed = CHECKSUM_UNNECESSARY;
794 } else
795#endif
796 {
7bc570c8
YH
797 /*
798 * Copy the IP header
799 */
800
801 skb_put(skb, sizeof(struct ipv6hdr));
802 skb_reset_network_header(skb);
803 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
804
805 /*
806 * Add our header
807 */
808 skb_put(skb, sizeof(*msg));
809 skb_reset_transport_header(skb);
810 msg = (struct mrt6msg *)skb_transport_header(skb);
811
812 msg->im6_mbz = 0;
813 msg->im6_msgtype = assert;
6ac7eb08 814 msg->im6_mif = mifi;
7bc570c8
YH
815 msg->im6_pad = 0;
816 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
817 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
818
819 skb->dst = dst_clone(pkt->dst);
820 skb->ip_summed = CHECKSUM_UNNECESSARY;
821
822 skb_pull(skb, sizeof(struct ipv6hdr));
14fb64e1 823 }
7bc570c8 824
bd91b8bf 825 if (init_net.ipv6.mroute6_sk == NULL) {
7bc570c8
YH
826 kfree_skb(skb);
827 return -EINVAL;
828 }
829
830 /*
831 * Deliver to user space multicast routing algorithms
832 */
bd91b8bf
BT
833 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
834 if (ret < 0) {
7bc570c8
YH
835 if (net_ratelimit())
836 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
837 kfree_skb(skb);
838 }
839
840 return ret;
841}
842
843/*
844 * Queue a packet for resolution. It gets locked cache entry!
845 */
846
847static int
6ac7eb08 848ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
7bc570c8
YH
849{
850 int err;
851 struct mfc6_cache *c;
852
853 spin_lock_bh(&mfc_unres_lock);
854 for (c = mfc_unres_queue; c; c = c->next) {
855 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
856 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
857 break;
858 }
859
860 if (c == NULL) {
861 /*
862 * Create a new entry if allowable
863 */
864
865 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
58701ad4 866 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
7bc570c8
YH
867 spin_unlock_bh(&mfc_unres_lock);
868
869 kfree_skb(skb);
870 return -ENOBUFS;
871 }
872
873 /*
874 * Fill in the new cache entry
875 */
876 c->mf6c_parent = -1;
877 c->mf6c_origin = ipv6_hdr(skb)->saddr;
878 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
879
880 /*
881 * Reflect first query at pim6sd
882 */
6ac7eb08 883 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
7bc570c8
YH
884 /* If the report failed throw the cache entry
885 out - Brad Parker
886 */
887 spin_unlock_bh(&mfc_unres_lock);
888
58701ad4 889 ip6mr_cache_free(c);
7bc570c8
YH
890 kfree_skb(skb);
891 return err;
892 }
893
894 atomic_inc(&cache_resolve_queue_len);
895 c->next = mfc_unres_queue;
896 mfc_unres_queue = c;
897
898 ipmr_do_expire_process(1);
899 }
900
901 /*
902 * See if we can append the packet
903 */
904 if (c->mfc_un.unres.unresolved.qlen > 3) {
905 kfree_skb(skb);
906 err = -ENOBUFS;
907 } else {
908 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
909 err = 0;
910 }
911
912 spin_unlock_bh(&mfc_unres_lock);
913 return err;
914}
915
916/*
917 * MFC6 cache manipulation by user space
918 */
919
920static int ip6mr_mfc_delete(struct mf6cctl *mfc)
921{
922 int line;
923 struct mfc6_cache *c, **cp;
924
925 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
926
4a6258a0
BT
927 for (cp = &init_net.ipv6.mfc6_cache_array[line];
928 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
929 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
930 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
931 write_lock_bh(&mrt_lock);
932 *cp = c->next;
933 write_unlock_bh(&mrt_lock);
934
58701ad4 935 ip6mr_cache_free(c);
7bc570c8
YH
936 return 0;
937 }
938 }
939 return -ENOENT;
940}
941
942static int ip6mr_device_event(struct notifier_block *this,
943 unsigned long event, void *ptr)
944{
945 struct net_device *dev = ptr;
946 struct mif_device *v;
947 int ct;
948
721499e8 949 if (!net_eq(dev_net(dev), &init_net))
7bc570c8
YH
950 return NOTIFY_DONE;
951
952 if (event != NETDEV_UNREGISTER)
953 return NOTIFY_DONE;
954
4e16880c
BT
955 v = &init_net.ipv6.vif6_table[0];
956 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
7bc570c8
YH
957 if (v->dev == dev)
958 mif6_delete(ct);
959 }
960 return NOTIFY_DONE;
961}
962
963static struct notifier_block ip6_mr_notifier = {
964 .notifier_call = ip6mr_device_event
965};
966
967/*
968 * Setup for IP multicast routing
969 */
970
4e16880c
BT
971static int __net_init ip6mr_net_init(struct net *net)
972{
973 int err = 0;
974
975 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
976 GFP_KERNEL);
977 if (!net->ipv6.vif6_table) {
978 err = -ENOMEM;
979 goto fail;
980 }
4a6258a0
BT
981
982 /* Forwarding cache */
983 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
984 sizeof(struct mfc6_cache *),
985 GFP_KERNEL);
986 if (!net->ipv6.mfc6_cache_array) {
987 err = -ENOMEM;
988 goto fail_mfc6_cache;
989 }
990 return 0;
991
992fail_mfc6_cache:
993 kfree(net->ipv6.vif6_table);
4e16880c
BT
994fail:
995 return err;
996}
997
998static void __net_exit ip6mr_net_exit(struct net *net)
999{
4a6258a0 1000 kfree(net->ipv6.mfc6_cache_array);
4e16880c
BT
1001 kfree(net->ipv6.vif6_table);
1002}
1003
1004static struct pernet_operations ip6mr_net_ops = {
1005 .init = ip6mr_net_init,
1006 .exit = ip6mr_net_exit,
1007};
1008
623d1a1a 1009int __init ip6_mr_init(void)
7bc570c8 1010{
623d1a1a
WC
1011 int err;
1012
7bc570c8
YH
1013 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1014 sizeof(struct mfc6_cache),
1015 0, SLAB_HWCACHE_ALIGN,
1016 NULL);
1017 if (!mrt_cachep)
623d1a1a 1018 return -ENOMEM;
7bc570c8 1019
4e16880c
BT
1020 err = register_pernet_subsys(&ip6mr_net_ops);
1021 if (err)
1022 goto reg_pernet_fail;
1023
7bc570c8 1024 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
623d1a1a
WC
1025 err = register_netdevice_notifier(&ip6_mr_notifier);
1026 if (err)
1027 goto reg_notif_fail;
1028#ifdef CONFIG_PROC_FS
1029 err = -ENOMEM;
1030 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1031 goto proc_vif_fail;
1032 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1033 0, &ip6mr_mfc_fops))
1034 goto proc_cache_fail;
1035#endif
1036 return 0;
7bc570c8 1037#ifdef CONFIG_PROC_FS
623d1a1a
WC
1038proc_cache_fail:
1039 proc_net_remove(&init_net, "ip6_mr_vif");
87b30a65
BT
1040proc_vif_fail:
1041 unregister_netdevice_notifier(&ip6_mr_notifier);
7bc570c8 1042#endif
87b30a65
BT
1043reg_notif_fail:
1044 del_timer(&ipmr_expire_timer);
4e16880c
BT
1045 unregister_pernet_subsys(&ip6mr_net_ops);
1046reg_pernet_fail:
87b30a65 1047 kmem_cache_destroy(mrt_cachep);
623d1a1a 1048 return err;
7bc570c8
YH
1049}
1050
623d1a1a
WC
1051void ip6_mr_cleanup(void)
1052{
1053#ifdef CONFIG_PROC_FS
1054 proc_net_remove(&init_net, "ip6_mr_cache");
1055 proc_net_remove(&init_net, "ip6_mr_vif");
1056#endif
1057 unregister_netdevice_notifier(&ip6_mr_notifier);
1058 del_timer(&ipmr_expire_timer);
4e16880c 1059 unregister_pernet_subsys(&ip6mr_net_ops);
623d1a1a
WC
1060 kmem_cache_destroy(mrt_cachep);
1061}
7bc570c8
YH
1062
1063static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1064{
1065 int line;
1066 struct mfc6_cache *uc, *c, **cp;
6ac7eb08 1067 unsigned char ttls[MAXMIFS];
7bc570c8
YH
1068 int i;
1069
6ac7eb08
RR
1070 memset(ttls, 255, MAXMIFS);
1071 for (i = 0; i < MAXMIFS; i++) {
7bc570c8
YH
1072 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1073 ttls[i] = 1;
1074
1075 }
1076
1077 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1078
4a6258a0
BT
1079 for (cp = &init_net.ipv6.mfc6_cache_array[line];
1080 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
1081 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1082 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1083 break;
1084 }
1085
1086 if (c != NULL) {
1087 write_lock_bh(&mrt_lock);
1088 c->mf6c_parent = mfc->mf6cc_parent;
1089 ip6mr_update_thresholds(c, ttls);
1090 if (!mrtsock)
1091 c->mfc_flags |= MFC_STATIC;
1092 write_unlock_bh(&mrt_lock);
1093 return 0;
1094 }
1095
1096 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1097 return -EINVAL;
1098
58701ad4 1099 c = ip6mr_cache_alloc(&init_net);
7bc570c8
YH
1100 if (c == NULL)
1101 return -ENOMEM;
1102
1103 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1104 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1105 c->mf6c_parent = mfc->mf6cc_parent;
1106 ip6mr_update_thresholds(c, ttls);
1107 if (!mrtsock)
1108 c->mfc_flags |= MFC_STATIC;
1109
1110 write_lock_bh(&mrt_lock);
4a6258a0
BT
1111 c->next = init_net.ipv6.mfc6_cache_array[line];
1112 init_net.ipv6.mfc6_cache_array[line] = c;
7bc570c8
YH
1113 write_unlock_bh(&mrt_lock);
1114
1115 /*
1116 * Check to see if we resolved a queued list. If so we
1117 * need to send on the frames and tidy up.
1118 */
1119 spin_lock_bh(&mfc_unres_lock);
1120 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1121 cp = &uc->next) {
1122 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1123 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1124 *cp = uc->next;
1125 if (atomic_dec_and_test(&cache_resolve_queue_len))
1126 del_timer(&ipmr_expire_timer);
1127 break;
1128 }
1129 }
1130 spin_unlock_bh(&mfc_unres_lock);
1131
1132 if (uc) {
1133 ip6mr_cache_resolve(uc, c);
58701ad4 1134 ip6mr_cache_free(uc);
7bc570c8
YH
1135 }
1136 return 0;
1137}
1138
1139/*
1140 * Close the multicast socket, and clear the vif tables etc
1141 */
1142
1143static void mroute_clean_tables(struct sock *sk)
1144{
1145 int i;
1146
1147 /*
1148 * Shut down all active vif entries
1149 */
4e16880c
BT
1150 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1151 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
7bc570c8
YH
1152 mif6_delete(i);
1153 }
1154
1155 /*
1156 * Wipe the cache
1157 */
4a6258a0 1158 for (i = 0; i < MFC6_LINES; i++) {
7bc570c8
YH
1159 struct mfc6_cache *c, **cp;
1160
4a6258a0 1161 cp = &init_net.ipv6.mfc6_cache_array[i];
7bc570c8
YH
1162 while ((c = *cp) != NULL) {
1163 if (c->mfc_flags & MFC_STATIC) {
1164 cp = &c->next;
1165 continue;
1166 }
1167 write_lock_bh(&mrt_lock);
1168 *cp = c->next;
1169 write_unlock_bh(&mrt_lock);
1170
58701ad4 1171 ip6mr_cache_free(c);
7bc570c8
YH
1172 }
1173 }
1174
1175 if (atomic_read(&cache_resolve_queue_len) != 0) {
1176 struct mfc6_cache *c;
1177
1178 spin_lock_bh(&mfc_unres_lock);
1179 while (mfc_unres_queue != NULL) {
1180 c = mfc_unres_queue;
1181 mfc_unres_queue = c->next;
1182 spin_unlock_bh(&mfc_unres_lock);
1183
1184 ip6mr_destroy_unres(c);
1185
1186 spin_lock_bh(&mfc_unres_lock);
1187 }
1188 spin_unlock_bh(&mfc_unres_lock);
1189 }
1190}
1191
1192static int ip6mr_sk_init(struct sock *sk)
1193{
1194 int err = 0;
1195
1196 rtnl_lock();
1197 write_lock_bh(&mrt_lock);
bd91b8bf
BT
1198 if (likely(init_net.ipv6.mroute6_sk == NULL))
1199 init_net.ipv6.mroute6_sk = sk;
7bc570c8
YH
1200 else
1201 err = -EADDRINUSE;
1202 write_unlock_bh(&mrt_lock);
1203
1204 rtnl_unlock();
1205
1206 return err;
1207}
1208
1209int ip6mr_sk_done(struct sock *sk)
1210{
1211 int err = 0;
1212
1213 rtnl_lock();
bd91b8bf 1214 if (sk == init_net.ipv6.mroute6_sk) {
7bc570c8 1215 write_lock_bh(&mrt_lock);
bd91b8bf 1216 init_net.ipv6.mroute6_sk = NULL;
7bc570c8
YH
1217 write_unlock_bh(&mrt_lock);
1218
1219 mroute_clean_tables(sk);
1220 } else
1221 err = -EACCES;
1222 rtnl_unlock();
1223
1224 return err;
1225}
1226
1227/*
1228 * Socket options and virtual interface manipulation. The whole
1229 * virtual interface system is a complete heap, but unfortunately
1230 * that's how BSD mrouted happens to think. Maybe one day with a proper
1231 * MOSPF/PIM router set up we can clean this up.
1232 */
1233
1234int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1235{
1236 int ret;
1237 struct mif6ctl vif;
1238 struct mf6cctl mfc;
1239 mifi_t mifi;
1240
1241 if (optname != MRT6_INIT) {
bd91b8bf 1242 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
7bc570c8
YH
1243 return -EACCES;
1244 }
1245
1246 switch (optname) {
1247 case MRT6_INIT:
1248 if (sk->sk_type != SOCK_RAW ||
1249 inet_sk(sk)->num != IPPROTO_ICMPV6)
1250 return -EOPNOTSUPP;
1251 if (optlen < sizeof(int))
1252 return -EINVAL;
1253
1254 return ip6mr_sk_init(sk);
1255
1256 case MRT6_DONE:
1257 return ip6mr_sk_done(sk);
1258
1259 case MRT6_ADD_MIF:
1260 if (optlen < sizeof(vif))
1261 return -EINVAL;
1262 if (copy_from_user(&vif, optval, sizeof(vif)))
1263 return -EFAULT;
6ac7eb08 1264 if (vif.mif6c_mifi >= MAXMIFS)
7bc570c8
YH
1265 return -ENFILE;
1266 rtnl_lock();
bd91b8bf 1267 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1268 rtnl_unlock();
1269 return ret;
1270
1271 case MRT6_DEL_MIF:
1272 if (optlen < sizeof(mifi_t))
1273 return -EINVAL;
1274 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1275 return -EFAULT;
1276 rtnl_lock();
1277 ret = mif6_delete(mifi);
1278 rtnl_unlock();
1279 return ret;
1280
1281 /*
1282 * Manipulate the forwarding caches. These live
1283 * in a sort of kernel/user symbiosis.
1284 */
1285 case MRT6_ADD_MFC:
1286 case MRT6_DEL_MFC:
1287 if (optlen < sizeof(mfc))
1288 return -EINVAL;
1289 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1290 return -EFAULT;
1291 rtnl_lock();
1292 if (optname == MRT6_DEL_MFC)
1293 ret = ip6mr_mfc_delete(&mfc);
1294 else
bd91b8bf 1295 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1296 rtnl_unlock();
1297 return ret;
1298
14fb64e1
YH
1299 /*
1300 * Control PIM assert (to activate pim will activate assert)
1301 */
1302 case MRT6_ASSERT:
1303 {
1304 int v;
1305 if (get_user(v, (int __user *)optval))
1306 return -EFAULT;
1307 mroute_do_assert = !!v;
1308 return 0;
1309 }
1310
1311#ifdef CONFIG_IPV6_PIMSM_V2
1312 case MRT6_PIM:
1313 {
a9f83bf3 1314 int v;
14fb64e1
YH
1315 if (get_user(v, (int __user *)optval))
1316 return -EFAULT;
1317 v = !!v;
1318 rtnl_lock();
1319 ret = 0;
1320 if (v != mroute_do_pim) {
1321 mroute_do_pim = v;
1322 mroute_do_assert = v;
1323 if (mroute_do_pim)
1324 ret = inet6_add_protocol(&pim6_protocol,
1325 IPPROTO_PIM);
1326 else
1327 ret = inet6_del_protocol(&pim6_protocol,
1328 IPPROTO_PIM);
1329 if (ret < 0)
1330 ret = -EAGAIN;
1331 }
1332 rtnl_unlock();
1333 return ret;
1334 }
1335
1336#endif
7bc570c8 1337 /*
7d120c55 1338 * Spurious command, or MRT6_VERSION which you cannot
7bc570c8
YH
1339 * set.
1340 */
1341 default:
1342 return -ENOPROTOOPT;
1343 }
1344}
1345
1346/*
1347 * Getsock opt support for the multicast routing system.
1348 */
1349
1350int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1351 int __user *optlen)
1352{
1353 int olr;
1354 int val;
1355
1356 switch (optname) {
1357 case MRT6_VERSION:
1358 val = 0x0305;
1359 break;
14fb64e1
YH
1360#ifdef CONFIG_IPV6_PIMSM_V2
1361 case MRT6_PIM:
1362 val = mroute_do_pim;
1363 break;
1364#endif
1365 case MRT6_ASSERT:
1366 val = mroute_do_assert;
1367 break;
7bc570c8
YH
1368 default:
1369 return -ENOPROTOOPT;
1370 }
1371
1372 if (get_user(olr, optlen))
1373 return -EFAULT;
1374
1375 olr = min_t(int, olr, sizeof(int));
1376 if (olr < 0)
1377 return -EINVAL;
1378
1379 if (put_user(olr, optlen))
1380 return -EFAULT;
1381 if (copy_to_user(optval, &val, olr))
1382 return -EFAULT;
1383 return 0;
1384}
1385
1386/*
1387 * The IP multicast ioctl support routines.
1388 */
1389
1390int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1391{
1392 struct sioc_sg_req6 sr;
1393 struct sioc_mif_req6 vr;
1394 struct mif_device *vif;
1395 struct mfc6_cache *c;
1396
1397 switch (cmd) {
1398 case SIOCGETMIFCNT_IN6:
1399 if (copy_from_user(&vr, arg, sizeof(vr)))
1400 return -EFAULT;
4e16880c 1401 if (vr.mifi >= init_net.ipv6.maxvif)
7bc570c8
YH
1402 return -EINVAL;
1403 read_lock(&mrt_lock);
4e16880c
BT
1404 vif = &init_net.ipv6.vif6_table[vr.mifi];
1405 if (MIF_EXISTS(&init_net, vr.mifi)) {
7bc570c8
YH
1406 vr.icount = vif->pkt_in;
1407 vr.ocount = vif->pkt_out;
1408 vr.ibytes = vif->bytes_in;
1409 vr.obytes = vif->bytes_out;
1410 read_unlock(&mrt_lock);
1411
1412 if (copy_to_user(arg, &vr, sizeof(vr)))
1413 return -EFAULT;
1414 return 0;
1415 }
1416 read_unlock(&mrt_lock);
1417 return -EADDRNOTAVAIL;
1418 case SIOCGETSGCNT_IN6:
1419 if (copy_from_user(&sr, arg, sizeof(sr)))
1420 return -EFAULT;
1421
1422 read_lock(&mrt_lock);
1423 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1424 if (c) {
1425 sr.pktcnt = c->mfc_un.res.pkt;
1426 sr.bytecnt = c->mfc_un.res.bytes;
1427 sr.wrong_if = c->mfc_un.res.wrong_if;
1428 read_unlock(&mrt_lock);
1429
1430 if (copy_to_user(arg, &sr, sizeof(sr)))
1431 return -EFAULT;
1432 return 0;
1433 }
1434 read_unlock(&mrt_lock);
1435 return -EADDRNOTAVAIL;
1436 default:
1437 return -ENOIOCTLCMD;
1438 }
1439}
1440
1441
1442static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1443{
483a47d2
DL
1444 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1445 IPSTATS_MIB_OUTFORWDATAGRAMS);
7bc570c8
YH
1446 return dst_output(skb);
1447}
1448
1449/*
1450 * Processing handlers for ip6mr_forward
1451 */
1452
1453static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1454{
1455 struct ipv6hdr *ipv6h;
4e16880c 1456 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
1457 struct net_device *dev;
1458 struct dst_entry *dst;
1459 struct flowi fl;
1460
1461 if (vif->dev == NULL)
1462 goto out_free;
1463
14fb64e1
YH
1464#ifdef CONFIG_IPV6_PIMSM_V2
1465 if (vif->flags & MIFF_REGISTER) {
1466 vif->pkt_out++;
1467 vif->bytes_out += skb->len;
dc58c78c
PE
1468 vif->dev->stats.tx_bytes += skb->len;
1469 vif->dev->stats.tx_packets++;
14fb64e1
YH
1470 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1471 kfree_skb(skb);
1472 return 0;
1473 }
1474#endif
1475
7bc570c8
YH
1476 ipv6h = ipv6_hdr(skb);
1477
1478 fl = (struct flowi) {
1479 .oif = vif->link,
1480 .nl_u = { .ip6_u =
1481 { .daddr = ipv6h->daddr, }
1482 }
1483 };
1484
1485 dst = ip6_route_output(&init_net, NULL, &fl);
1486 if (!dst)
1487 goto out_free;
1488
1489 dst_release(skb->dst);
1490 skb->dst = dst;
1491
1492 /*
1493 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1494 * not only before forwarding, but after forwarding on all output
1495 * interfaces. It is clear, if mrouter runs a multicasting
1496 * program, it should receive packets not depending to what interface
1497 * program is joined.
1498 * If we will not make it, the program will have to join on all
1499 * interfaces. On the other hand, multihoming host (or router, but
1500 * not mrouter) cannot join to more than one interface - it will
1501 * result in receiving multiple packets.
1502 */
1503 dev = vif->dev;
1504 skb->dev = dev;
1505 vif->pkt_out++;
1506 vif->bytes_out += skb->len;
1507
1508 /* We are about to write */
1509 /* XXX: extension headers? */
1510 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1511 goto out_free;
1512
1513 ipv6h = ipv6_hdr(skb);
1514 ipv6h->hop_limit--;
1515
1516 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1517
1518 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1519 ip6mr_forward2_finish);
1520
1521out_free:
1522 kfree_skb(skb);
1523 return 0;
1524}
1525
1526static int ip6mr_find_vif(struct net_device *dev)
1527{
1528 int ct;
4e16880c
BT
1529 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1530 if (init_net.ipv6.vif6_table[ct].dev == dev)
7bc570c8
YH
1531 break;
1532 }
1533 return ct;
1534}
1535
1536static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1537{
1538 int psend = -1;
1539 int vif, ct;
1540
1541 vif = cache->mf6c_parent;
1542 cache->mfc_un.res.pkt++;
1543 cache->mfc_un.res.bytes += skb->len;
1544
14fb64e1
YH
1545 /*
1546 * Wrong interface: drop packet and (maybe) send PIM assert.
1547 */
4e16880c 1548 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
14fb64e1
YH
1549 int true_vifi;
1550
1551 cache->mfc_un.res.wrong_if++;
1552 true_vifi = ip6mr_find_vif(skb->dev);
1553
1554 if (true_vifi >= 0 && mroute_do_assert &&
1555 /* pimsm uses asserts, when switching from RPT to SPT,
1556 so that we cannot check that packet arrived on an oif.
1557 It is bad, but otherwise we would need to move pretty
1558 large chunk of pimd to kernel. Ough... --ANK
1559 */
1560 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1561 time_after(jiffies,
1562 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1563 cache->mfc_un.res.last_assert = jiffies;
1564 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1565 }
1566 goto dont_forward;
1567 }
1568
4e16880c
BT
1569 init_net.ipv6.vif6_table[vif].pkt_in++;
1570 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
7bc570c8
YH
1571
1572 /*
1573 * Forward the frame
1574 */
1575 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1576 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1577 if (psend != -1) {
1578 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1579 if (skb2)
1580 ip6mr_forward2(skb2, cache, psend);
1581 }
1582 psend = ct;
1583 }
1584 }
1585 if (psend != -1) {
1586 ip6mr_forward2(skb, cache, psend);
1587 return 0;
1588 }
1589
14fb64e1 1590dont_forward:
7bc570c8
YH
1591 kfree_skb(skb);
1592 return 0;
1593}
1594
1595
1596/*
1597 * Multicast packets for forwarding arrive here
1598 */
1599
1600int ip6_mr_input(struct sk_buff *skb)
1601{
1602 struct mfc6_cache *cache;
1603
1604 read_lock(&mrt_lock);
1605 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1606
1607 /*
1608 * No usable cache entry
1609 */
1610 if (cache == NULL) {
1611 int vif;
1612
1613 vif = ip6mr_find_vif(skb->dev);
1614 if (vif >= 0) {
1615 int err = ip6mr_cache_unresolved(vif, skb);
1616 read_unlock(&mrt_lock);
1617
1618 return err;
1619 }
1620 read_unlock(&mrt_lock);
1621 kfree_skb(skb);
1622 return -ENODEV;
1623 }
1624
1625 ip6_mr_forward(skb, cache);
1626
1627 read_unlock(&mrt_lock);
1628
1629 return 0;
1630}
1631
1632
1633static int
1634ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1635{
1636 int ct;
1637 struct rtnexthop *nhp;
4e16880c 1638 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
549e028d 1639 u8 *b = skb_tail_pointer(skb);
7bc570c8
YH
1640 struct rtattr *mp_head;
1641
1642 if (dev)
1643 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1644
1645 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1646
1647 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1648 if (c->mfc_un.res.ttls[ct] < 255) {
1649 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1650 goto rtattr_failure;
1651 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1652 nhp->rtnh_flags = 0;
1653 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4e16880c 1654 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
7bc570c8
YH
1655 nhp->rtnh_len = sizeof(*nhp);
1656 }
1657 }
1658 mp_head->rta_type = RTA_MULTIPATH;
549e028d 1659 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
7bc570c8
YH
1660 rtm->rtm_type = RTN_MULTICAST;
1661 return 1;
1662
1663rtattr_failure:
1664 nlmsg_trim(skb, b);
1665 return -EMSGSIZE;
1666}
1667
1668int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1669{
1670 int err;
1671 struct mfc6_cache *cache;
1672 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1673
1674 read_lock(&mrt_lock);
1675 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1676
1677 if (!cache) {
1678 struct sk_buff *skb2;
1679 struct ipv6hdr *iph;
1680 struct net_device *dev;
1681 int vif;
1682
1683 if (nowait) {
1684 read_unlock(&mrt_lock);
1685 return -EAGAIN;
1686 }
1687
1688 dev = skb->dev;
1689 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1690 read_unlock(&mrt_lock);
1691 return -ENODEV;
1692 }
1693
1694 /* really correct? */
1695 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1696 if (!skb2) {
1697 read_unlock(&mrt_lock);
1698 return -ENOMEM;
1699 }
1700
1701 skb_reset_transport_header(skb2);
1702
1703 skb_put(skb2, sizeof(struct ipv6hdr));
1704 skb_reset_network_header(skb2);
1705
1706 iph = ipv6_hdr(skb2);
1707 iph->version = 0;
1708 iph->priority = 0;
1709 iph->flow_lbl[0] = 0;
1710 iph->flow_lbl[1] = 0;
1711 iph->flow_lbl[2] = 0;
1712 iph->payload_len = 0;
1713 iph->nexthdr = IPPROTO_NONE;
1714 iph->hop_limit = 0;
1715 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1716 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1717
1718 err = ip6mr_cache_unresolved(vif, skb2);
1719 read_unlock(&mrt_lock);
1720
1721 return err;
1722 }
1723
1724 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1725 cache->mfc_flags |= MFC_NOTIFY;
1726
1727 err = ip6mr_fill_mroute(skb, cache, rtm);
1728 read_unlock(&mrt_lock);
1729 return err;
1730}
1731