]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6mr.c
ipv6: Check return of dev_set_allmulti
[net-next-2.6.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
7bc570c8
YH
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
7bc570c8
YH
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
7bc570c8 35#include <linux/init.h>
7bc570c8
YH
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
7bc570c8 39#include <net/raw.h>
7bc570c8
YH
40#include <linux/notifier.h>
41#include <linux/if_arp.h>
7bc570c8
YH
42#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
14fb64e1 48#include <linux/pim.h>
7bc570c8
YH
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
52struct sock *mroute6_socket;
53
54
55/* Big lock, protecting vif table, mrt cache and mroute socket state.
56 Note that the changes are semaphored via rtnl_lock.
57 */
58
59static DEFINE_RWLOCK(mrt_lock);
60
61/*
62 * Multicast router control variables
63 */
64
65static struct mif_device vif6_table[MAXMIFS]; /* Devices */
66static int maxvif;
67
68#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
69
14fb64e1
YH
70static int mroute_do_assert; /* Set in PIM assert */
71#ifdef CONFIG_IPV6_PIMSM_V2
72static int mroute_do_pim;
73#else
74#define mroute_do_pim 0
75#endif
76
6ac7eb08 77static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
7bc570c8
YH
78
79static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
80static atomic_t cache_resolve_queue_len; /* Size of unresolved */
81
82/* Special spinlock for queue of unresolved entries */
83static DEFINE_SPINLOCK(mfc_unres_lock);
84
85/* We return to original Alan's scheme. Hash table of resolved
86 entries is changed only in process context and protected
87 with weak lock mrt_lock. Queue of unresolved entries is protected
88 with strong spinlock mfc_unres_lock.
89
90 In this case data path is free of exclusive locks at all.
91 */
92
93static struct kmem_cache *mrt_cachep __read_mostly;
94
95static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
6ac7eb08 96static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
7bc570c8
YH
97static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
98
14fb64e1
YH
99#ifdef CONFIG_IPV6_PIMSM_V2
100static struct inet6_protocol pim6_protocol;
101#endif
102
7bc570c8
YH
103static struct timer_list ipmr_expire_timer;
104
105
106#ifdef CONFIG_PROC_FS
107
108struct ipmr_mfc_iter {
109 struct mfc6_cache **cache;
110 int ct;
111};
112
113
114static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
115{
116 struct mfc6_cache *mfc;
117
118 it->cache = mfc6_cache_array;
119 read_lock(&mrt_lock);
120 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
121 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
122 if (pos-- == 0)
123 return mfc;
124 read_unlock(&mrt_lock);
125
126 it->cache = &mfc_unres_queue;
127 spin_lock_bh(&mfc_unres_lock);
128 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
129 if (pos-- == 0)
130 return mfc;
131 spin_unlock_bh(&mfc_unres_lock);
132
133 it->cache = NULL;
134 return NULL;
135}
136
137
138
139
140/*
141 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
142 */
143
144struct ipmr_vif_iter {
145 int ct;
146};
147
148static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
149 loff_t pos)
150{
151 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
152 if (!MIF_EXISTS(iter->ct))
153 continue;
154 if (pos-- == 0)
155 return &vif6_table[iter->ct];
156 }
157 return NULL;
158}
159
160static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
161 __acquires(mrt_lock)
162{
163 read_lock(&mrt_lock);
164 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
165 : SEQ_START_TOKEN);
166}
167
168static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
169{
170 struct ipmr_vif_iter *iter = seq->private;
171
172 ++*pos;
173 if (v == SEQ_START_TOKEN)
174 return ip6mr_vif_seq_idx(iter, 0);
175
176 while (++iter->ct < maxvif) {
177 if (!MIF_EXISTS(iter->ct))
178 continue;
179 return &vif6_table[iter->ct];
180 }
181 return NULL;
182}
183
184static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
185 __releases(mrt_lock)
186{
187 read_unlock(&mrt_lock);
188}
189
190static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
191{
192 if (v == SEQ_START_TOKEN) {
193 seq_puts(seq,
194 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
195 } else {
196 const struct mif_device *vif = v;
197 const char *name = vif->dev ? vif->dev->name : "none";
198
199 seq_printf(seq,
d430a227 200 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
7bc570c8
YH
201 vif - vif6_table,
202 name, vif->bytes_in, vif->pkt_in,
203 vif->bytes_out, vif->pkt_out,
204 vif->flags);
205 }
206 return 0;
207}
208
209static struct seq_operations ip6mr_vif_seq_ops = {
210 .start = ip6mr_vif_seq_start,
211 .next = ip6mr_vif_seq_next,
212 .stop = ip6mr_vif_seq_stop,
213 .show = ip6mr_vif_seq_show,
214};
215
216static int ip6mr_vif_open(struct inode *inode, struct file *file)
217{
218 return seq_open_private(file, &ip6mr_vif_seq_ops,
219 sizeof(struct ipmr_vif_iter));
220}
221
222static struct file_operations ip6mr_vif_fops = {
223 .owner = THIS_MODULE,
224 .open = ip6mr_vif_open,
225 .read = seq_read,
226 .llseek = seq_lseek,
227 .release = seq_release,
228};
229
230static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
231{
232 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
233 : SEQ_START_TOKEN);
234}
235
236static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
237{
238 struct mfc6_cache *mfc = v;
239 struct ipmr_mfc_iter *it = seq->private;
240
241 ++*pos;
242
243 if (v == SEQ_START_TOKEN)
244 return ipmr_mfc_seq_idx(seq->private, 0);
245
246 if (mfc->next)
247 return mfc->next;
248
249 if (it->cache == &mfc_unres_queue)
250 goto end_of_list;
251
252 BUG_ON(it->cache != mfc6_cache_array);
253
254 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
255 mfc = mfc6_cache_array[it->ct];
256 if (mfc)
257 return mfc;
258 }
259
260 /* exhausted cache_array, show unresolved */
261 read_unlock(&mrt_lock);
262 it->cache = &mfc_unres_queue;
263 it->ct = 0;
264
265 spin_lock_bh(&mfc_unres_lock);
266 mfc = mfc_unres_queue;
267 if (mfc)
268 return mfc;
269
270 end_of_list:
271 spin_unlock_bh(&mfc_unres_lock);
272 it->cache = NULL;
273
274 return NULL;
275}
276
277static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
278{
279 struct ipmr_mfc_iter *it = seq->private;
280
281 if (it->cache == &mfc_unres_queue)
282 spin_unlock_bh(&mfc_unres_lock);
283 else if (it->cache == mfc6_cache_array)
284 read_unlock(&mrt_lock);
285}
286
287static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
288{
289 int n;
290
291 if (v == SEQ_START_TOKEN) {
292 seq_puts(seq,
293 "Group "
294 "Origin "
295 "Iif Pkts Bytes Wrong Oifs\n");
296 } else {
297 const struct mfc6_cache *mfc = v;
298 const struct ipmr_mfc_iter *it = seq->private;
299
300 seq_printf(seq,
301 NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
302 NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
303 mfc->mf6c_parent,
304 mfc->mfc_un.res.pkt,
305 mfc->mfc_un.res.bytes,
306 mfc->mfc_un.res.wrong_if);
307
308 if (it->cache != &mfc_unres_queue) {
309 for (n = mfc->mfc_un.res.minvif;
310 n < mfc->mfc_un.res.maxvif; n++) {
311 if (MIF_EXISTS(n) &&
312 mfc->mfc_un.res.ttls[n] < 255)
313 seq_printf(seq,
314 " %2d:%-3d",
315 n, mfc->mfc_un.res.ttls[n]);
316 }
317 }
318 seq_putc(seq, '\n');
319 }
320 return 0;
321}
322
323static struct seq_operations ipmr_mfc_seq_ops = {
324 .start = ipmr_mfc_seq_start,
325 .next = ipmr_mfc_seq_next,
326 .stop = ipmr_mfc_seq_stop,
327 .show = ipmr_mfc_seq_show,
328};
329
330static int ipmr_mfc_open(struct inode *inode, struct file *file)
331{
332 return seq_open_private(file, &ipmr_mfc_seq_ops,
333 sizeof(struct ipmr_mfc_iter));
334}
335
336static struct file_operations ip6mr_mfc_fops = {
337 .owner = THIS_MODULE,
338 .open = ipmr_mfc_open,
339 .read = seq_read,
340 .llseek = seq_lseek,
341 .release = seq_release,
342};
343#endif
344
14fb64e1
YH
345#ifdef CONFIG_IPV6_PIMSM_V2
346static int reg_vif_num = -1;
347
348static int pim6_rcv(struct sk_buff *skb)
349{
350 struct pimreghdr *pim;
351 struct ipv6hdr *encap;
352 struct net_device *reg_dev = NULL;
353
354 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
355 goto drop;
356
357 pim = (struct pimreghdr *)skb_transport_header(skb);
358 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
359 (pim->flags & PIM_NULL_REGISTER) ||
360 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
ec6b486f 361 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
14fb64e1
YH
362 goto drop;
363
364 /* check if the inner packet is destined to mcast group */
365 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
366 sizeof(*pim));
367
368 if (!ipv6_addr_is_multicast(&encap->daddr) ||
369 encap->payload_len == 0 ||
370 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
371 goto drop;
372
373 read_lock(&mrt_lock);
374 if (reg_vif_num >= 0)
375 reg_dev = vif6_table[reg_vif_num].dev;
376 if (reg_dev)
377 dev_hold(reg_dev);
378 read_unlock(&mrt_lock);
379
380 if (reg_dev == NULL)
381 goto drop;
382
383 skb->mac_header = skb->network_header;
384 skb_pull(skb, (u8 *)encap - skb->data);
385 skb_reset_network_header(skb);
386 skb->dev = reg_dev;
387 skb->protocol = htons(ETH_P_IP);
388 skb->ip_summed = 0;
389 skb->pkt_type = PACKET_HOST;
390 dst_release(skb->dst);
dc58c78c
PE
391 reg_dev->stats.rx_bytes += skb->len;
392 reg_dev->stats.rx_packets++;
14fb64e1
YH
393 skb->dst = NULL;
394 nf_reset(skb);
395 netif_rx(skb);
396 dev_put(reg_dev);
397 return 0;
398 drop:
399 kfree_skb(skb);
400 return 0;
401}
402
403static struct inet6_protocol pim6_protocol = {
404 .handler = pim6_rcv,
405};
406
407/* Service routines creating virtual interfaces: PIMREG */
408
409static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
410{
411 read_lock(&mrt_lock);
dc58c78c
PE
412 dev->stats.tx_bytes += skb->len;
413 dev->stats.tx_packets++;
14fb64e1
YH
414 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
415 read_unlock(&mrt_lock);
416 kfree_skb(skb);
417 return 0;
418}
419
14fb64e1
YH
420static void reg_vif_setup(struct net_device *dev)
421{
422 dev->type = ARPHRD_PIMREG;
423 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
424 dev->flags = IFF_NOARP;
425 dev->hard_start_xmit = reg_vif_xmit;
14fb64e1
YH
426 dev->destructor = free_netdev;
427}
428
429static struct net_device *ip6mr_reg_vif(void)
430{
431 struct net_device *dev;
14fb64e1 432
dc58c78c 433 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
14fb64e1
YH
434 if (dev == NULL)
435 return NULL;
436
437 if (register_netdevice(dev)) {
438 free_netdev(dev);
439 return NULL;
440 }
441 dev->iflink = 0;
442
14fb64e1
YH
443 if (dev_open(dev))
444 goto failure;
445
446 return dev;
447
448failure:
449 /* allow the register to be completed before unregistering. */
450 rtnl_unlock();
451 rtnl_lock();
452
453 unregister_netdevice(dev);
454 return NULL;
455}
456#endif
457
7bc570c8
YH
458/*
459 * Delete a VIF entry
460 */
461
462static int mif6_delete(int vifi)
463{
464 struct mif_device *v;
465 struct net_device *dev;
466 if (vifi < 0 || vifi >= maxvif)
467 return -EADDRNOTAVAIL;
468
469 v = &vif6_table[vifi];
470
471 write_lock_bh(&mrt_lock);
472 dev = v->dev;
473 v->dev = NULL;
474
475 if (!dev) {
476 write_unlock_bh(&mrt_lock);
477 return -EADDRNOTAVAIL;
478 }
479
14fb64e1
YH
480#ifdef CONFIG_IPV6_PIMSM_V2
481 if (vifi == reg_vif_num)
482 reg_vif_num = -1;
483#endif
484
7bc570c8
YH
485 if (vifi + 1 == maxvif) {
486 int tmp;
487 for (tmp = vifi - 1; tmp >= 0; tmp--) {
488 if (MIF_EXISTS(tmp))
489 break;
490 }
491 maxvif = tmp + 1;
492 }
493
494 write_unlock_bh(&mrt_lock);
495
496 dev_set_allmulti(dev, -1);
497
498 if (v->flags & MIFF_REGISTER)
499 unregister_netdevice(dev);
500
501 dev_put(dev);
502 return 0;
503}
504
505/* Destroy an unresolved cache entry, killing queued skbs
506 and reporting error to netlink readers.
507 */
508
509static void ip6mr_destroy_unres(struct mfc6_cache *c)
510{
511 struct sk_buff *skb;
512
513 atomic_dec(&cache_resolve_queue_len);
514
515 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
516 if (ipv6_hdr(skb)->version == 0) {
517 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
518 nlh->nlmsg_type = NLMSG_ERROR;
519 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
520 skb_trim(skb, nlh->nlmsg_len);
521 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
522 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
523 } else
524 kfree_skb(skb);
525 }
526
527 kmem_cache_free(mrt_cachep, c);
528}
529
530
531/* Single timer process for all the unresolved queue. */
532
533static void ipmr_do_expire_process(unsigned long dummy)
534{
535 unsigned long now = jiffies;
536 unsigned long expires = 10 * HZ;
537 struct mfc6_cache *c, **cp;
538
539 cp = &mfc_unres_queue;
540
541 while ((c = *cp) != NULL) {
542 if (time_after(c->mfc_un.unres.expires, now)) {
543 /* not yet... */
544 unsigned long interval = c->mfc_un.unres.expires - now;
545 if (interval < expires)
546 expires = interval;
547 cp = &c->next;
548 continue;
549 }
550
551 *cp = c->next;
552 ip6mr_destroy_unres(c);
553 }
554
555 if (atomic_read(&cache_resolve_queue_len))
556 mod_timer(&ipmr_expire_timer, jiffies + expires);
557}
558
559static void ipmr_expire_process(unsigned long dummy)
560{
561 if (!spin_trylock(&mfc_unres_lock)) {
562 mod_timer(&ipmr_expire_timer, jiffies + 1);
563 return;
564 }
565
566 if (atomic_read(&cache_resolve_queue_len))
567 ipmr_do_expire_process(dummy);
568
569 spin_unlock(&mfc_unres_lock);
570}
571
572/* Fill oifs list. It is called under write locked mrt_lock. */
573
574static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
575{
576 int vifi;
577
6ac7eb08 578 cache->mfc_un.res.minvif = MAXMIFS;
7bc570c8 579 cache->mfc_un.res.maxvif = 0;
6ac7eb08 580 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
7bc570c8
YH
581
582 for (vifi = 0; vifi < maxvif; vifi++) {
583 if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
584 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
585 if (cache->mfc_un.res.minvif > vifi)
586 cache->mfc_un.res.minvif = vifi;
587 if (cache->mfc_un.res.maxvif <= vifi)
588 cache->mfc_un.res.maxvif = vifi + 1;
589 }
590 }
591}
592
593static int mif6_add(struct mif6ctl *vifc, int mrtsock)
594{
595 int vifi = vifc->mif6c_mifi;
596 struct mif_device *v = &vif6_table[vifi];
597 struct net_device *dev;
5ae7b444 598 int err;
7bc570c8
YH
599
600 /* Is vif busy ? */
601 if (MIF_EXISTS(vifi))
602 return -EADDRINUSE;
603
604 switch (vifc->mif6c_flags) {
14fb64e1
YH
605#ifdef CONFIG_IPV6_PIMSM_V2
606 case MIFF_REGISTER:
607 /*
608 * Special Purpose VIF in PIM
609 * All the packets will be sent to the daemon
610 */
611 if (reg_vif_num >= 0)
612 return -EADDRINUSE;
613 dev = ip6mr_reg_vif();
614 if (!dev)
615 return -ENOBUFS;
5ae7b444
WC
616 err = dev_set_allmulti(dev, 1);
617 if (err) {
618 unregister_netdevice(dev);
619 return err;
620 }
14fb64e1
YH
621 break;
622#endif
7bc570c8
YH
623 case 0:
624 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
625 if (!dev)
626 return -EADDRNOTAVAIL;
627 dev_put(dev);
5ae7b444
WC
628 err = dev_set_allmulti(dev, 1);
629 if (err)
630 return err;
7bc570c8
YH
631 break;
632 default:
633 return -EINVAL;
634 }
635
7bc570c8
YH
636 /*
637 * Fill in the VIF structures
638 */
639 v->rate_limit = vifc->vifc_rate_limit;
640 v->flags = vifc->mif6c_flags;
641 if (!mrtsock)
642 v->flags |= VIFF_STATIC;
643 v->threshold = vifc->vifc_threshold;
644 v->bytes_in = 0;
645 v->bytes_out = 0;
646 v->pkt_in = 0;
647 v->pkt_out = 0;
648 v->link = dev->ifindex;
649 if (v->flags & MIFF_REGISTER)
650 v->link = dev->iflink;
651
652 /* And finish update writing critical data */
653 write_lock_bh(&mrt_lock);
654 dev_hold(dev);
655 v->dev = dev;
14fb64e1
YH
656#ifdef CONFIG_IPV6_PIMSM_V2
657 if (v->flags & MIFF_REGISTER)
658 reg_vif_num = vifi;
659#endif
7bc570c8
YH
660 if (vifi + 1 > maxvif)
661 maxvif = vifi + 1;
662 write_unlock_bh(&mrt_lock);
663 return 0;
664}
665
666static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
667{
668 int line = MFC6_HASH(mcastgrp, origin);
669 struct mfc6_cache *c;
670
671 for (c = mfc6_cache_array[line]; c; c = c->next) {
672 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
673 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
674 break;
675 }
676 return c;
677}
678
679/*
680 * Allocate a multicast cache entry
681 */
682static struct mfc6_cache *ip6mr_cache_alloc(void)
683{
684 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
685 if (c == NULL)
686 return NULL;
687 memset(c, 0, sizeof(*c));
6ac7eb08 688 c->mfc_un.res.minvif = MAXMIFS;
7bc570c8
YH
689 return c;
690}
691
692static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
693{
694 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
695 if (c == NULL)
696 return NULL;
697 memset(c, 0, sizeof(*c));
698 skb_queue_head_init(&c->mfc_un.unres.unresolved);
699 c->mfc_un.unres.expires = jiffies + 10 * HZ;
700 return c;
701}
702
703/*
704 * A cache entry has gone into a resolved state from queued
705 */
706
707static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
708{
709 struct sk_buff *skb;
710
711 /*
712 * Play the pending entries through our router
713 */
714
715 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
716 if (ipv6_hdr(skb)->version == 0) {
717 int err;
718 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
719
720 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
549e028d 721 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
7bc570c8
YH
722 } else {
723 nlh->nlmsg_type = NLMSG_ERROR;
724 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
725 skb_trim(skb, nlh->nlmsg_len);
726 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
727 }
728 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
729 } else
730 ip6_mr_forward(skb, c);
731 }
732}
733
734/*
735 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
736 * expects the following bizarre scheme.
737 *
738 * Called under mrt_lock.
739 */
740
6ac7eb08 741static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
7bc570c8
YH
742{
743 struct sk_buff *skb;
744 struct mrt6msg *msg;
745 int ret;
746
14fb64e1
YH
747#ifdef CONFIG_IPV6_PIMSM_V2
748 if (assert == MRT6MSG_WHOLEPKT)
749 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
750 +sizeof(*msg));
751 else
752#endif
753 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
754
755 if (!skb)
756 return -ENOBUFS;
757
758 /* I suppose that internal messages
759 * do not require checksums */
760
761 skb->ip_summed = CHECKSUM_UNNECESSARY;
762
14fb64e1
YH
763#ifdef CONFIG_IPV6_PIMSM_V2
764 if (assert == MRT6MSG_WHOLEPKT) {
765 /* Ugly, but we have no choice with this interface.
766 Duplicate old header, fix length etc.
767 And all this only to mangle msg->im6_msgtype and
768 to set msg->im6_mbz to "mbz" :-)
769 */
770 skb_push(skb, -skb_network_offset(pkt));
771
772 skb_push(skb, sizeof(*msg));
773 skb_reset_transport_header(skb);
774 msg = (struct mrt6msg *)skb_transport_header(skb);
775 msg->im6_mbz = 0;
776 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
777 msg->im6_mif = reg_vif_num;
778 msg->im6_pad = 0;
779 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
780 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
781
782 skb->ip_summed = CHECKSUM_UNNECESSARY;
783 } else
784#endif
785 {
7bc570c8
YH
786 /*
787 * Copy the IP header
788 */
789
790 skb_put(skb, sizeof(struct ipv6hdr));
791 skb_reset_network_header(skb);
792 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
793
794 /*
795 * Add our header
796 */
797 skb_put(skb, sizeof(*msg));
798 skb_reset_transport_header(skb);
799 msg = (struct mrt6msg *)skb_transport_header(skb);
800
801 msg->im6_mbz = 0;
802 msg->im6_msgtype = assert;
6ac7eb08 803 msg->im6_mif = mifi;
7bc570c8
YH
804 msg->im6_pad = 0;
805 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
806 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
807
808 skb->dst = dst_clone(pkt->dst);
809 skb->ip_summed = CHECKSUM_UNNECESSARY;
810
811 skb_pull(skb, sizeof(struct ipv6hdr));
14fb64e1 812 }
7bc570c8
YH
813
814 if (mroute6_socket == NULL) {
815 kfree_skb(skb);
816 return -EINVAL;
817 }
818
819 /*
820 * Deliver to user space multicast routing algorithms
821 */
822 if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
823 if (net_ratelimit())
824 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
825 kfree_skb(skb);
826 }
827
828 return ret;
829}
830
831/*
832 * Queue a packet for resolution. It gets locked cache entry!
833 */
834
835static int
6ac7eb08 836ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
7bc570c8
YH
837{
838 int err;
839 struct mfc6_cache *c;
840
841 spin_lock_bh(&mfc_unres_lock);
842 for (c = mfc_unres_queue; c; c = c->next) {
843 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
844 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
845 break;
846 }
847
848 if (c == NULL) {
849 /*
850 * Create a new entry if allowable
851 */
852
853 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
854 (c = ip6mr_cache_alloc_unres()) == NULL) {
855 spin_unlock_bh(&mfc_unres_lock);
856
857 kfree_skb(skb);
858 return -ENOBUFS;
859 }
860
861 /*
862 * Fill in the new cache entry
863 */
864 c->mf6c_parent = -1;
865 c->mf6c_origin = ipv6_hdr(skb)->saddr;
866 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
867
868 /*
869 * Reflect first query at pim6sd
870 */
6ac7eb08 871 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
7bc570c8
YH
872 /* If the report failed throw the cache entry
873 out - Brad Parker
874 */
875 spin_unlock_bh(&mfc_unres_lock);
876
877 kmem_cache_free(mrt_cachep, c);
878 kfree_skb(skb);
879 return err;
880 }
881
882 atomic_inc(&cache_resolve_queue_len);
883 c->next = mfc_unres_queue;
884 mfc_unres_queue = c;
885
886 ipmr_do_expire_process(1);
887 }
888
889 /*
890 * See if we can append the packet
891 */
892 if (c->mfc_un.unres.unresolved.qlen > 3) {
893 kfree_skb(skb);
894 err = -ENOBUFS;
895 } else {
896 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
897 err = 0;
898 }
899
900 spin_unlock_bh(&mfc_unres_lock);
901 return err;
902}
903
904/*
905 * MFC6 cache manipulation by user space
906 */
907
908static int ip6mr_mfc_delete(struct mf6cctl *mfc)
909{
910 int line;
911 struct mfc6_cache *c, **cp;
912
913 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
914
915 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
916 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
917 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
918 write_lock_bh(&mrt_lock);
919 *cp = c->next;
920 write_unlock_bh(&mrt_lock);
921
922 kmem_cache_free(mrt_cachep, c);
923 return 0;
924 }
925 }
926 return -ENOENT;
927}
928
929static int ip6mr_device_event(struct notifier_block *this,
930 unsigned long event, void *ptr)
931{
932 struct net_device *dev = ptr;
933 struct mif_device *v;
934 int ct;
935
936 if (dev_net(dev) != &init_net)
937 return NOTIFY_DONE;
938
939 if (event != NETDEV_UNREGISTER)
940 return NOTIFY_DONE;
941
942 v = &vif6_table[0];
943 for (ct = 0; ct < maxvif; ct++, v++) {
944 if (v->dev == dev)
945 mif6_delete(ct);
946 }
947 return NOTIFY_DONE;
948}
949
950static struct notifier_block ip6_mr_notifier = {
951 .notifier_call = ip6mr_device_event
952};
953
954/*
955 * Setup for IP multicast routing
956 */
957
623d1a1a 958int __init ip6_mr_init(void)
7bc570c8 959{
623d1a1a
WC
960 int err;
961
7bc570c8
YH
962 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
963 sizeof(struct mfc6_cache),
964 0, SLAB_HWCACHE_ALIGN,
965 NULL);
966 if (!mrt_cachep)
623d1a1a 967 return -ENOMEM;
7bc570c8
YH
968
969 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
623d1a1a
WC
970 err = register_netdevice_notifier(&ip6_mr_notifier);
971 if (err)
972 goto reg_notif_fail;
973#ifdef CONFIG_PROC_FS
974 err = -ENOMEM;
975 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
976 goto proc_vif_fail;
977 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
978 0, &ip6mr_mfc_fops))
979 goto proc_cache_fail;
980#endif
981 return 0;
982reg_notif_fail:
983 kmem_cache_destroy(mrt_cachep);
7bc570c8 984#ifdef CONFIG_PROC_FS
623d1a1a
WC
985proc_vif_fail:
986 unregister_netdevice_notifier(&ip6_mr_notifier);
987proc_cache_fail:
988 proc_net_remove(&init_net, "ip6_mr_vif");
7bc570c8 989#endif
623d1a1a 990 return err;
7bc570c8
YH
991}
992
623d1a1a
WC
993void ip6_mr_cleanup(void)
994{
995#ifdef CONFIG_PROC_FS
996 proc_net_remove(&init_net, "ip6_mr_cache");
997 proc_net_remove(&init_net, "ip6_mr_vif");
998#endif
999 unregister_netdevice_notifier(&ip6_mr_notifier);
1000 del_timer(&ipmr_expire_timer);
1001 kmem_cache_destroy(mrt_cachep);
1002}
7bc570c8
YH
1003
1004static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1005{
1006 int line;
1007 struct mfc6_cache *uc, *c, **cp;
6ac7eb08 1008 unsigned char ttls[MAXMIFS];
7bc570c8
YH
1009 int i;
1010
6ac7eb08
RR
1011 memset(ttls, 255, MAXMIFS);
1012 for (i = 0; i < MAXMIFS; i++) {
7bc570c8
YH
1013 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1014 ttls[i] = 1;
1015
1016 }
1017
1018 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1019
1020 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1021 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1022 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1023 break;
1024 }
1025
1026 if (c != NULL) {
1027 write_lock_bh(&mrt_lock);
1028 c->mf6c_parent = mfc->mf6cc_parent;
1029 ip6mr_update_thresholds(c, ttls);
1030 if (!mrtsock)
1031 c->mfc_flags |= MFC_STATIC;
1032 write_unlock_bh(&mrt_lock);
1033 return 0;
1034 }
1035
1036 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1037 return -EINVAL;
1038
1039 c = ip6mr_cache_alloc();
1040 if (c == NULL)
1041 return -ENOMEM;
1042
1043 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1044 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1045 c->mf6c_parent = mfc->mf6cc_parent;
1046 ip6mr_update_thresholds(c, ttls);
1047 if (!mrtsock)
1048 c->mfc_flags |= MFC_STATIC;
1049
1050 write_lock_bh(&mrt_lock);
1051 c->next = mfc6_cache_array[line];
1052 mfc6_cache_array[line] = c;
1053 write_unlock_bh(&mrt_lock);
1054
1055 /*
1056 * Check to see if we resolved a queued list. If so we
1057 * need to send on the frames and tidy up.
1058 */
1059 spin_lock_bh(&mfc_unres_lock);
1060 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1061 cp = &uc->next) {
1062 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1063 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1064 *cp = uc->next;
1065 if (atomic_dec_and_test(&cache_resolve_queue_len))
1066 del_timer(&ipmr_expire_timer);
1067 break;
1068 }
1069 }
1070 spin_unlock_bh(&mfc_unres_lock);
1071
1072 if (uc) {
1073 ip6mr_cache_resolve(uc, c);
1074 kmem_cache_free(mrt_cachep, uc);
1075 }
1076 return 0;
1077}
1078
1079/*
1080 * Close the multicast socket, and clear the vif tables etc
1081 */
1082
1083static void mroute_clean_tables(struct sock *sk)
1084{
1085 int i;
1086
1087 /*
1088 * Shut down all active vif entries
1089 */
1090 for (i = 0; i < maxvif; i++) {
1091 if (!(vif6_table[i].flags & VIFF_STATIC))
1092 mif6_delete(i);
1093 }
1094
1095 /*
1096 * Wipe the cache
1097 */
1098 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1099 struct mfc6_cache *c, **cp;
1100
1101 cp = &mfc6_cache_array[i];
1102 while ((c = *cp) != NULL) {
1103 if (c->mfc_flags & MFC_STATIC) {
1104 cp = &c->next;
1105 continue;
1106 }
1107 write_lock_bh(&mrt_lock);
1108 *cp = c->next;
1109 write_unlock_bh(&mrt_lock);
1110
1111 kmem_cache_free(mrt_cachep, c);
1112 }
1113 }
1114
1115 if (atomic_read(&cache_resolve_queue_len) != 0) {
1116 struct mfc6_cache *c;
1117
1118 spin_lock_bh(&mfc_unres_lock);
1119 while (mfc_unres_queue != NULL) {
1120 c = mfc_unres_queue;
1121 mfc_unres_queue = c->next;
1122 spin_unlock_bh(&mfc_unres_lock);
1123
1124 ip6mr_destroy_unres(c);
1125
1126 spin_lock_bh(&mfc_unres_lock);
1127 }
1128 spin_unlock_bh(&mfc_unres_lock);
1129 }
1130}
1131
1132static int ip6mr_sk_init(struct sock *sk)
1133{
1134 int err = 0;
1135
1136 rtnl_lock();
1137 write_lock_bh(&mrt_lock);
1138 if (likely(mroute6_socket == NULL))
1139 mroute6_socket = sk;
1140 else
1141 err = -EADDRINUSE;
1142 write_unlock_bh(&mrt_lock);
1143
1144 rtnl_unlock();
1145
1146 return err;
1147}
1148
1149int ip6mr_sk_done(struct sock *sk)
1150{
1151 int err = 0;
1152
1153 rtnl_lock();
1154 if (sk == mroute6_socket) {
1155 write_lock_bh(&mrt_lock);
1156 mroute6_socket = NULL;
1157 write_unlock_bh(&mrt_lock);
1158
1159 mroute_clean_tables(sk);
1160 } else
1161 err = -EACCES;
1162 rtnl_unlock();
1163
1164 return err;
1165}
1166
1167/*
1168 * Socket options and virtual interface manipulation. The whole
1169 * virtual interface system is a complete heap, but unfortunately
1170 * that's how BSD mrouted happens to think. Maybe one day with a proper
1171 * MOSPF/PIM router set up we can clean this up.
1172 */
1173
1174int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1175{
1176 int ret;
1177 struct mif6ctl vif;
1178 struct mf6cctl mfc;
1179 mifi_t mifi;
1180
1181 if (optname != MRT6_INIT) {
1182 if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
1183 return -EACCES;
1184 }
1185
1186 switch (optname) {
1187 case MRT6_INIT:
1188 if (sk->sk_type != SOCK_RAW ||
1189 inet_sk(sk)->num != IPPROTO_ICMPV6)
1190 return -EOPNOTSUPP;
1191 if (optlen < sizeof(int))
1192 return -EINVAL;
1193
1194 return ip6mr_sk_init(sk);
1195
1196 case MRT6_DONE:
1197 return ip6mr_sk_done(sk);
1198
1199 case MRT6_ADD_MIF:
1200 if (optlen < sizeof(vif))
1201 return -EINVAL;
1202 if (copy_from_user(&vif, optval, sizeof(vif)))
1203 return -EFAULT;
6ac7eb08 1204 if (vif.mif6c_mifi >= MAXMIFS)
7bc570c8
YH
1205 return -ENFILE;
1206 rtnl_lock();
1207 ret = mif6_add(&vif, sk == mroute6_socket);
1208 rtnl_unlock();
1209 return ret;
1210
1211 case MRT6_DEL_MIF:
1212 if (optlen < sizeof(mifi_t))
1213 return -EINVAL;
1214 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1215 return -EFAULT;
1216 rtnl_lock();
1217 ret = mif6_delete(mifi);
1218 rtnl_unlock();
1219 return ret;
1220
1221 /*
1222 * Manipulate the forwarding caches. These live
1223 * in a sort of kernel/user symbiosis.
1224 */
1225 case MRT6_ADD_MFC:
1226 case MRT6_DEL_MFC:
1227 if (optlen < sizeof(mfc))
1228 return -EINVAL;
1229 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1230 return -EFAULT;
1231 rtnl_lock();
1232 if (optname == MRT6_DEL_MFC)
1233 ret = ip6mr_mfc_delete(&mfc);
1234 else
1235 ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
1236 rtnl_unlock();
1237 return ret;
1238
14fb64e1
YH
1239 /*
1240 * Control PIM assert (to activate pim will activate assert)
1241 */
1242 case MRT6_ASSERT:
1243 {
1244 int v;
1245 if (get_user(v, (int __user *)optval))
1246 return -EFAULT;
1247 mroute_do_assert = !!v;
1248 return 0;
1249 }
1250
1251#ifdef CONFIG_IPV6_PIMSM_V2
1252 case MRT6_PIM:
1253 {
a9f83bf3 1254 int v;
14fb64e1
YH
1255 if (get_user(v, (int __user *)optval))
1256 return -EFAULT;
1257 v = !!v;
1258 rtnl_lock();
1259 ret = 0;
1260 if (v != mroute_do_pim) {
1261 mroute_do_pim = v;
1262 mroute_do_assert = v;
1263 if (mroute_do_pim)
1264 ret = inet6_add_protocol(&pim6_protocol,
1265 IPPROTO_PIM);
1266 else
1267 ret = inet6_del_protocol(&pim6_protocol,
1268 IPPROTO_PIM);
1269 if (ret < 0)
1270 ret = -EAGAIN;
1271 }
1272 rtnl_unlock();
1273 return ret;
1274 }
1275
1276#endif
7bc570c8 1277 /*
7d120c55 1278 * Spurious command, or MRT6_VERSION which you cannot
7bc570c8
YH
1279 * set.
1280 */
1281 default:
1282 return -ENOPROTOOPT;
1283 }
1284}
1285
1286/*
1287 * Getsock opt support for the multicast routing system.
1288 */
1289
1290int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1291 int __user *optlen)
1292{
1293 int olr;
1294 int val;
1295
1296 switch (optname) {
1297 case MRT6_VERSION:
1298 val = 0x0305;
1299 break;
14fb64e1
YH
1300#ifdef CONFIG_IPV6_PIMSM_V2
1301 case MRT6_PIM:
1302 val = mroute_do_pim;
1303 break;
1304#endif
1305 case MRT6_ASSERT:
1306 val = mroute_do_assert;
1307 break;
7bc570c8
YH
1308 default:
1309 return -ENOPROTOOPT;
1310 }
1311
1312 if (get_user(olr, optlen))
1313 return -EFAULT;
1314
1315 olr = min_t(int, olr, sizeof(int));
1316 if (olr < 0)
1317 return -EINVAL;
1318
1319 if (put_user(olr, optlen))
1320 return -EFAULT;
1321 if (copy_to_user(optval, &val, olr))
1322 return -EFAULT;
1323 return 0;
1324}
1325
1326/*
1327 * The IP multicast ioctl support routines.
1328 */
1329
1330int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1331{
1332 struct sioc_sg_req6 sr;
1333 struct sioc_mif_req6 vr;
1334 struct mif_device *vif;
1335 struct mfc6_cache *c;
1336
1337 switch (cmd) {
1338 case SIOCGETMIFCNT_IN6:
1339 if (copy_from_user(&vr, arg, sizeof(vr)))
1340 return -EFAULT;
1341 if (vr.mifi >= maxvif)
1342 return -EINVAL;
1343 read_lock(&mrt_lock);
1344 vif = &vif6_table[vr.mifi];
1345 if (MIF_EXISTS(vr.mifi)) {
1346 vr.icount = vif->pkt_in;
1347 vr.ocount = vif->pkt_out;
1348 vr.ibytes = vif->bytes_in;
1349 vr.obytes = vif->bytes_out;
1350 read_unlock(&mrt_lock);
1351
1352 if (copy_to_user(arg, &vr, sizeof(vr)))
1353 return -EFAULT;
1354 return 0;
1355 }
1356 read_unlock(&mrt_lock);
1357 return -EADDRNOTAVAIL;
1358 case SIOCGETSGCNT_IN6:
1359 if (copy_from_user(&sr, arg, sizeof(sr)))
1360 return -EFAULT;
1361
1362 read_lock(&mrt_lock);
1363 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1364 if (c) {
1365 sr.pktcnt = c->mfc_un.res.pkt;
1366 sr.bytecnt = c->mfc_un.res.bytes;
1367 sr.wrong_if = c->mfc_un.res.wrong_if;
1368 read_unlock(&mrt_lock);
1369
1370 if (copy_to_user(arg, &sr, sizeof(sr)))
1371 return -EFAULT;
1372 return 0;
1373 }
1374 read_unlock(&mrt_lock);
1375 return -EADDRNOTAVAIL;
1376 default:
1377 return -ENOIOCTLCMD;
1378 }
1379}
1380
1381
1382static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1383{
0912ea38 1384 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
7bc570c8
YH
1385 return dst_output(skb);
1386}
1387
1388/*
1389 * Processing handlers for ip6mr_forward
1390 */
1391
1392static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1393{
1394 struct ipv6hdr *ipv6h;
1395 struct mif_device *vif = &vif6_table[vifi];
1396 struct net_device *dev;
1397 struct dst_entry *dst;
1398 struct flowi fl;
1399
1400 if (vif->dev == NULL)
1401 goto out_free;
1402
14fb64e1
YH
1403#ifdef CONFIG_IPV6_PIMSM_V2
1404 if (vif->flags & MIFF_REGISTER) {
1405 vif->pkt_out++;
1406 vif->bytes_out += skb->len;
dc58c78c
PE
1407 vif->dev->stats.tx_bytes += skb->len;
1408 vif->dev->stats.tx_packets++;
14fb64e1
YH
1409 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1410 kfree_skb(skb);
1411 return 0;
1412 }
1413#endif
1414
7bc570c8
YH
1415 ipv6h = ipv6_hdr(skb);
1416
1417 fl = (struct flowi) {
1418 .oif = vif->link,
1419 .nl_u = { .ip6_u =
1420 { .daddr = ipv6h->daddr, }
1421 }
1422 };
1423
1424 dst = ip6_route_output(&init_net, NULL, &fl);
1425 if (!dst)
1426 goto out_free;
1427
1428 dst_release(skb->dst);
1429 skb->dst = dst;
1430
1431 /*
1432 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1433 * not only before forwarding, but after forwarding on all output
1434 * interfaces. It is clear, if mrouter runs a multicasting
1435 * program, it should receive packets not depending to what interface
1436 * program is joined.
1437 * If we will not make it, the program will have to join on all
1438 * interfaces. On the other hand, multihoming host (or router, but
1439 * not mrouter) cannot join to more than one interface - it will
1440 * result in receiving multiple packets.
1441 */
1442 dev = vif->dev;
1443 skb->dev = dev;
1444 vif->pkt_out++;
1445 vif->bytes_out += skb->len;
1446
1447 /* We are about to write */
1448 /* XXX: extension headers? */
1449 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1450 goto out_free;
1451
1452 ipv6h = ipv6_hdr(skb);
1453 ipv6h->hop_limit--;
1454
1455 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1456
1457 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1458 ip6mr_forward2_finish);
1459
1460out_free:
1461 kfree_skb(skb);
1462 return 0;
1463}
1464
1465static int ip6mr_find_vif(struct net_device *dev)
1466{
1467 int ct;
1468 for (ct = maxvif - 1; ct >= 0; ct--) {
1469 if (vif6_table[ct].dev == dev)
1470 break;
1471 }
1472 return ct;
1473}
1474
1475static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1476{
1477 int psend = -1;
1478 int vif, ct;
1479
1480 vif = cache->mf6c_parent;
1481 cache->mfc_un.res.pkt++;
1482 cache->mfc_un.res.bytes += skb->len;
1483
14fb64e1
YH
1484 /*
1485 * Wrong interface: drop packet and (maybe) send PIM assert.
1486 */
1487 if (vif6_table[vif].dev != skb->dev) {
1488 int true_vifi;
1489
1490 cache->mfc_un.res.wrong_if++;
1491 true_vifi = ip6mr_find_vif(skb->dev);
1492
1493 if (true_vifi >= 0 && mroute_do_assert &&
1494 /* pimsm uses asserts, when switching from RPT to SPT,
1495 so that we cannot check that packet arrived on an oif.
1496 It is bad, but otherwise we would need to move pretty
1497 large chunk of pimd to kernel. Ough... --ANK
1498 */
1499 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1500 time_after(jiffies,
1501 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1502 cache->mfc_un.res.last_assert = jiffies;
1503 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1504 }
1505 goto dont_forward;
1506 }
1507
7bc570c8
YH
1508 vif6_table[vif].pkt_in++;
1509 vif6_table[vif].bytes_in += skb->len;
1510
1511 /*
1512 * Forward the frame
1513 */
1514 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1515 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1516 if (psend != -1) {
1517 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1518 if (skb2)
1519 ip6mr_forward2(skb2, cache, psend);
1520 }
1521 psend = ct;
1522 }
1523 }
1524 if (psend != -1) {
1525 ip6mr_forward2(skb, cache, psend);
1526 return 0;
1527 }
1528
14fb64e1 1529dont_forward:
7bc570c8
YH
1530 kfree_skb(skb);
1531 return 0;
1532}
1533
1534
1535/*
1536 * Multicast packets for forwarding arrive here
1537 */
1538
1539int ip6_mr_input(struct sk_buff *skb)
1540{
1541 struct mfc6_cache *cache;
1542
1543 read_lock(&mrt_lock);
1544 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1545
1546 /*
1547 * No usable cache entry
1548 */
1549 if (cache == NULL) {
1550 int vif;
1551
1552 vif = ip6mr_find_vif(skb->dev);
1553 if (vif >= 0) {
1554 int err = ip6mr_cache_unresolved(vif, skb);
1555 read_unlock(&mrt_lock);
1556
1557 return err;
1558 }
1559 read_unlock(&mrt_lock);
1560 kfree_skb(skb);
1561 return -ENODEV;
1562 }
1563
1564 ip6_mr_forward(skb, cache);
1565
1566 read_unlock(&mrt_lock);
1567
1568 return 0;
1569}
1570
1571
1572static int
1573ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1574{
1575 int ct;
1576 struct rtnexthop *nhp;
1577 struct net_device *dev = vif6_table[c->mf6c_parent].dev;
549e028d 1578 u8 *b = skb_tail_pointer(skb);
7bc570c8
YH
1579 struct rtattr *mp_head;
1580
1581 if (dev)
1582 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1583
1584 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1585
1586 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1587 if (c->mfc_un.res.ttls[ct] < 255) {
1588 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1589 goto rtattr_failure;
1590 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1591 nhp->rtnh_flags = 0;
1592 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1593 nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1594 nhp->rtnh_len = sizeof(*nhp);
1595 }
1596 }
1597 mp_head->rta_type = RTA_MULTIPATH;
549e028d 1598 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
7bc570c8
YH
1599 rtm->rtm_type = RTN_MULTICAST;
1600 return 1;
1601
1602rtattr_failure:
1603 nlmsg_trim(skb, b);
1604 return -EMSGSIZE;
1605}
1606
1607int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1608{
1609 int err;
1610 struct mfc6_cache *cache;
1611 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1612
1613 read_lock(&mrt_lock);
1614 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1615
1616 if (!cache) {
1617 struct sk_buff *skb2;
1618 struct ipv6hdr *iph;
1619 struct net_device *dev;
1620 int vif;
1621
1622 if (nowait) {
1623 read_unlock(&mrt_lock);
1624 return -EAGAIN;
1625 }
1626
1627 dev = skb->dev;
1628 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1629 read_unlock(&mrt_lock);
1630 return -ENODEV;
1631 }
1632
1633 /* really correct? */
1634 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1635 if (!skb2) {
1636 read_unlock(&mrt_lock);
1637 return -ENOMEM;
1638 }
1639
1640 skb_reset_transport_header(skb2);
1641
1642 skb_put(skb2, sizeof(struct ipv6hdr));
1643 skb_reset_network_header(skb2);
1644
1645 iph = ipv6_hdr(skb2);
1646 iph->version = 0;
1647 iph->priority = 0;
1648 iph->flow_lbl[0] = 0;
1649 iph->flow_lbl[1] = 0;
1650 iph->flow_lbl[2] = 0;
1651 iph->payload_len = 0;
1652 iph->nexthdr = IPPROTO_NONE;
1653 iph->hop_limit = 0;
1654 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1655 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1656
1657 err = ip6mr_cache_unresolved(vif, skb2);
1658 read_unlock(&mrt_lock);
1659
1660 return err;
1661 }
1662
1663 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1664 cache->mfc_flags |= MFC_NOTIFY;
1665
1666 err = ip6mr_fill_mroute(skb, cache, rtm);
1667 read_unlock(&mrt_lock);
1668 return err;
1669}
1670