]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/ip6mr.c
[IPV6]: Comment MRT6_xxx sockopts in include/linux/in6.h.
[net-next-2.6.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/netdevice.h>
33#include <linux/inetdevice.h>
34#include <linux/igmp.h>
35#include <linux/proc_fs.h>
36#include <linux/seq_file.h>
37#include <linux/mroute.h>
38#include <linux/init.h>
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <linux/skbuff.h>
42#include <net/sock.h>
43#include <net/icmp.h>
44#include <net/udp.h>
45#include <net/raw.h>
46#include <net/route.h>
47#include <linux/notifier.h>
48#include <linux/if_arp.h>
49#include <linux/netfilter_ipv4.h>
50#include <net/ipip.h>
51#include <net/checksum.h>
52#include <net/netlink.h>
53
54#include <net/ipv6.h>
55#include <net/ip6_route.h>
56#include <linux/mroute6.h>
14fb64e1 57#include <linux/pim.h>
7bc570c8
YH
58#include <net/addrconf.h>
59#include <linux/netfilter_ipv6.h>
60
61struct sock *mroute6_socket;
62
63
64/* Big lock, protecting vif table, mrt cache and mroute socket state.
65 Note that the changes are semaphored via rtnl_lock.
66 */
67
68static DEFINE_RWLOCK(mrt_lock);
69
70/*
71 * Multicast router control variables
72 */
73
74static struct mif_device vif6_table[MAXMIFS]; /* Devices */
75static int maxvif;
76
77#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
78
14fb64e1
YH
79static int mroute_do_assert; /* Set in PIM assert */
80#ifdef CONFIG_IPV6_PIMSM_V2
81static int mroute_do_pim;
82#else
83#define mroute_do_pim 0
84#endif
85
7bc570c8
YH
86static struct mfc6_cache *mfc6_cache_array[MFC_LINES]; /* Forwarding cache */
87
88static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
89static atomic_t cache_resolve_queue_len; /* Size of unresolved */
90
91/* Special spinlock for queue of unresolved entries */
92static DEFINE_SPINLOCK(mfc_unres_lock);
93
94/* We return to original Alan's scheme. Hash table of resolved
95 entries is changed only in process context and protected
96 with weak lock mrt_lock. Queue of unresolved entries is protected
97 with strong spinlock mfc_unres_lock.
98
99 In this case data path is free of exclusive locks at all.
100 */
101
102static struct kmem_cache *mrt_cachep __read_mostly;
103
104static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
105static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
106static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
107
14fb64e1
YH
108#ifdef CONFIG_IPV6_PIMSM_V2
109static struct inet6_protocol pim6_protocol;
110#endif
111
7bc570c8
YH
112static struct timer_list ipmr_expire_timer;
113
114
115#ifdef CONFIG_PROC_FS
116
117struct ipmr_mfc_iter {
118 struct mfc6_cache **cache;
119 int ct;
120};
121
122
123static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
124{
125 struct mfc6_cache *mfc;
126
127 it->cache = mfc6_cache_array;
128 read_lock(&mrt_lock);
129 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
130 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
131 if (pos-- == 0)
132 return mfc;
133 read_unlock(&mrt_lock);
134
135 it->cache = &mfc_unres_queue;
136 spin_lock_bh(&mfc_unres_lock);
137 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
138 if (pos-- == 0)
139 return mfc;
140 spin_unlock_bh(&mfc_unres_lock);
141
142 it->cache = NULL;
143 return NULL;
144}
145
146
147
148
149/*
150 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
151 */
152
153struct ipmr_vif_iter {
154 int ct;
155};
156
157static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
158 loff_t pos)
159{
160 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
161 if (!MIF_EXISTS(iter->ct))
162 continue;
163 if (pos-- == 0)
164 return &vif6_table[iter->ct];
165 }
166 return NULL;
167}
168
169static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
170 __acquires(mrt_lock)
171{
172 read_lock(&mrt_lock);
173 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
174 : SEQ_START_TOKEN);
175}
176
177static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
178{
179 struct ipmr_vif_iter *iter = seq->private;
180
181 ++*pos;
182 if (v == SEQ_START_TOKEN)
183 return ip6mr_vif_seq_idx(iter, 0);
184
185 while (++iter->ct < maxvif) {
186 if (!MIF_EXISTS(iter->ct))
187 continue;
188 return &vif6_table[iter->ct];
189 }
190 return NULL;
191}
192
193static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
194 __releases(mrt_lock)
195{
196 read_unlock(&mrt_lock);
197}
198
199static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
200{
201 if (v == SEQ_START_TOKEN) {
202 seq_puts(seq,
203 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
204 } else {
205 const struct mif_device *vif = v;
206 const char *name = vif->dev ? vif->dev->name : "none";
207
208 seq_printf(seq,
209 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n",
210 vif - vif6_table,
211 name, vif->bytes_in, vif->pkt_in,
212 vif->bytes_out, vif->pkt_out,
213 vif->flags);
214 }
215 return 0;
216}
217
218static struct seq_operations ip6mr_vif_seq_ops = {
219 .start = ip6mr_vif_seq_start,
220 .next = ip6mr_vif_seq_next,
221 .stop = ip6mr_vif_seq_stop,
222 .show = ip6mr_vif_seq_show,
223};
224
225static int ip6mr_vif_open(struct inode *inode, struct file *file)
226{
227 return seq_open_private(file, &ip6mr_vif_seq_ops,
228 sizeof(struct ipmr_vif_iter));
229}
230
231static struct file_operations ip6mr_vif_fops = {
232 .owner = THIS_MODULE,
233 .open = ip6mr_vif_open,
234 .read = seq_read,
235 .llseek = seq_lseek,
236 .release = seq_release,
237};
238
239static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
240{
241 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
242 : SEQ_START_TOKEN);
243}
244
245static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
246{
247 struct mfc6_cache *mfc = v;
248 struct ipmr_mfc_iter *it = seq->private;
249
250 ++*pos;
251
252 if (v == SEQ_START_TOKEN)
253 return ipmr_mfc_seq_idx(seq->private, 0);
254
255 if (mfc->next)
256 return mfc->next;
257
258 if (it->cache == &mfc_unres_queue)
259 goto end_of_list;
260
261 BUG_ON(it->cache != mfc6_cache_array);
262
263 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
264 mfc = mfc6_cache_array[it->ct];
265 if (mfc)
266 return mfc;
267 }
268
269 /* exhausted cache_array, show unresolved */
270 read_unlock(&mrt_lock);
271 it->cache = &mfc_unres_queue;
272 it->ct = 0;
273
274 spin_lock_bh(&mfc_unres_lock);
275 mfc = mfc_unres_queue;
276 if (mfc)
277 return mfc;
278
279 end_of_list:
280 spin_unlock_bh(&mfc_unres_lock);
281 it->cache = NULL;
282
283 return NULL;
284}
285
286static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
287{
288 struct ipmr_mfc_iter *it = seq->private;
289
290 if (it->cache == &mfc_unres_queue)
291 spin_unlock_bh(&mfc_unres_lock);
292 else if (it->cache == mfc6_cache_array)
293 read_unlock(&mrt_lock);
294}
295
296static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
297{
298 int n;
299
300 if (v == SEQ_START_TOKEN) {
301 seq_puts(seq,
302 "Group "
303 "Origin "
304 "Iif Pkts Bytes Wrong Oifs\n");
305 } else {
306 const struct mfc6_cache *mfc = v;
307 const struct ipmr_mfc_iter *it = seq->private;
308
309 seq_printf(seq,
310 NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
311 NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
312 mfc->mf6c_parent,
313 mfc->mfc_un.res.pkt,
314 mfc->mfc_un.res.bytes,
315 mfc->mfc_un.res.wrong_if);
316
317 if (it->cache != &mfc_unres_queue) {
318 for (n = mfc->mfc_un.res.minvif;
319 n < mfc->mfc_un.res.maxvif; n++) {
320 if (MIF_EXISTS(n) &&
321 mfc->mfc_un.res.ttls[n] < 255)
322 seq_printf(seq,
323 " %2d:%-3d",
324 n, mfc->mfc_un.res.ttls[n]);
325 }
326 }
327 seq_putc(seq, '\n');
328 }
329 return 0;
330}
331
332static struct seq_operations ipmr_mfc_seq_ops = {
333 .start = ipmr_mfc_seq_start,
334 .next = ipmr_mfc_seq_next,
335 .stop = ipmr_mfc_seq_stop,
336 .show = ipmr_mfc_seq_show,
337};
338
339static int ipmr_mfc_open(struct inode *inode, struct file *file)
340{
341 return seq_open_private(file, &ipmr_mfc_seq_ops,
342 sizeof(struct ipmr_mfc_iter));
343}
344
345static struct file_operations ip6mr_mfc_fops = {
346 .owner = THIS_MODULE,
347 .open = ipmr_mfc_open,
348 .read = seq_read,
349 .llseek = seq_lseek,
350 .release = seq_release,
351};
352#endif
353
14fb64e1
YH
354#ifdef CONFIG_IPV6_PIMSM_V2
355static int reg_vif_num = -1;
356
357static int pim6_rcv(struct sk_buff *skb)
358{
359 struct pimreghdr *pim;
360 struct ipv6hdr *encap;
361 struct net_device *reg_dev = NULL;
362
363 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
364 goto drop;
365
366 pim = (struct pimreghdr *)skb_transport_header(skb);
367 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
368 (pim->flags & PIM_NULL_REGISTER) ||
369 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
370 (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
371 goto drop;
372
373 /* check if the inner packet is destined to mcast group */
374 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
375 sizeof(*pim));
376
377 if (!ipv6_addr_is_multicast(&encap->daddr) ||
378 encap->payload_len == 0 ||
379 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
380 goto drop;
381
382 read_lock(&mrt_lock);
383 if (reg_vif_num >= 0)
384 reg_dev = vif6_table[reg_vif_num].dev;
385 if (reg_dev)
386 dev_hold(reg_dev);
387 read_unlock(&mrt_lock);
388
389 if (reg_dev == NULL)
390 goto drop;
391
392 skb->mac_header = skb->network_header;
393 skb_pull(skb, (u8 *)encap - skb->data);
394 skb_reset_network_header(skb);
395 skb->dev = reg_dev;
396 skb->protocol = htons(ETH_P_IP);
397 skb->ip_summed = 0;
398 skb->pkt_type = PACKET_HOST;
399 dst_release(skb->dst);
400 ((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len;
401 ((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++;
402 skb->dst = NULL;
403 nf_reset(skb);
404 netif_rx(skb);
405 dev_put(reg_dev);
406 return 0;
407 drop:
408 kfree_skb(skb);
409 return 0;
410}
411
412static struct inet6_protocol pim6_protocol = {
413 .handler = pim6_rcv,
414};
415
416/* Service routines creating virtual interfaces: PIMREG */
417
418static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
419{
420 read_lock(&mrt_lock);
421 ((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len;
422 ((struct net_device_stats *)netdev_priv(dev))->tx_packets++;
423 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
424 read_unlock(&mrt_lock);
425 kfree_skb(skb);
426 return 0;
427}
428
429static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
430{
431 return (struct net_device_stats *)netdev_priv(dev);
432}
433
434static void reg_vif_setup(struct net_device *dev)
435{
436 dev->type = ARPHRD_PIMREG;
437 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
438 dev->flags = IFF_NOARP;
439 dev->hard_start_xmit = reg_vif_xmit;
440 dev->get_stats = reg_vif_get_stats;
441 dev->destructor = free_netdev;
442}
443
444static struct net_device *ip6mr_reg_vif(void)
445{
446 struct net_device *dev;
447 struct inet6_dev *in_dev;
448
449 dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg",
450 reg_vif_setup);
451
452 if (dev == NULL)
453 return NULL;
454
455 if (register_netdevice(dev)) {
456 free_netdev(dev);
457 return NULL;
458 }
459 dev->iflink = 0;
460
461 in_dev = ipv6_find_idev(dev);
462 if (!in_dev)
463 goto failure;
464
465 if (dev_open(dev))
466 goto failure;
467
468 return dev;
469
470failure:
471 /* allow the register to be completed before unregistering. */
472 rtnl_unlock();
473 rtnl_lock();
474
475 unregister_netdevice(dev);
476 return NULL;
477}
478#endif
479
7bc570c8
YH
480/*
481 * Delete a VIF entry
482 */
483
484static int mif6_delete(int vifi)
485{
486 struct mif_device *v;
487 struct net_device *dev;
488 if (vifi < 0 || vifi >= maxvif)
489 return -EADDRNOTAVAIL;
490
491 v = &vif6_table[vifi];
492
493 write_lock_bh(&mrt_lock);
494 dev = v->dev;
495 v->dev = NULL;
496
497 if (!dev) {
498 write_unlock_bh(&mrt_lock);
499 return -EADDRNOTAVAIL;
500 }
501
14fb64e1
YH
502#ifdef CONFIG_IPV6_PIMSM_V2
503 if (vifi == reg_vif_num)
504 reg_vif_num = -1;
505#endif
506
7bc570c8
YH
507 if (vifi + 1 == maxvif) {
508 int tmp;
509 for (tmp = vifi - 1; tmp >= 0; tmp--) {
510 if (MIF_EXISTS(tmp))
511 break;
512 }
513 maxvif = tmp + 1;
514 }
515
516 write_unlock_bh(&mrt_lock);
517
518 dev_set_allmulti(dev, -1);
519
520 if (v->flags & MIFF_REGISTER)
521 unregister_netdevice(dev);
522
523 dev_put(dev);
524 return 0;
525}
526
527/* Destroy an unresolved cache entry, killing queued skbs
528 and reporting error to netlink readers.
529 */
530
531static void ip6mr_destroy_unres(struct mfc6_cache *c)
532{
533 struct sk_buff *skb;
534
535 atomic_dec(&cache_resolve_queue_len);
536
537 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
538 if (ipv6_hdr(skb)->version == 0) {
539 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
540 nlh->nlmsg_type = NLMSG_ERROR;
541 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
542 skb_trim(skb, nlh->nlmsg_len);
543 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
544 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
545 } else
546 kfree_skb(skb);
547 }
548
549 kmem_cache_free(mrt_cachep, c);
550}
551
552
553/* Single timer process for all the unresolved queue. */
554
555static void ipmr_do_expire_process(unsigned long dummy)
556{
557 unsigned long now = jiffies;
558 unsigned long expires = 10 * HZ;
559 struct mfc6_cache *c, **cp;
560
561 cp = &mfc_unres_queue;
562
563 while ((c = *cp) != NULL) {
564 if (time_after(c->mfc_un.unres.expires, now)) {
565 /* not yet... */
566 unsigned long interval = c->mfc_un.unres.expires - now;
567 if (interval < expires)
568 expires = interval;
569 cp = &c->next;
570 continue;
571 }
572
573 *cp = c->next;
574 ip6mr_destroy_unres(c);
575 }
576
577 if (atomic_read(&cache_resolve_queue_len))
578 mod_timer(&ipmr_expire_timer, jiffies + expires);
579}
580
581static void ipmr_expire_process(unsigned long dummy)
582{
583 if (!spin_trylock(&mfc_unres_lock)) {
584 mod_timer(&ipmr_expire_timer, jiffies + 1);
585 return;
586 }
587
588 if (atomic_read(&cache_resolve_queue_len))
589 ipmr_do_expire_process(dummy);
590
591 spin_unlock(&mfc_unres_lock);
592}
593
594/* Fill oifs list. It is called under write locked mrt_lock. */
595
596static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
597{
598 int vifi;
599
600 cache->mfc_un.res.minvif = MAXVIFS;
601 cache->mfc_un.res.maxvif = 0;
602 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
603
604 for (vifi = 0; vifi < maxvif; vifi++) {
605 if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
606 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
607 if (cache->mfc_un.res.minvif > vifi)
608 cache->mfc_un.res.minvif = vifi;
609 if (cache->mfc_un.res.maxvif <= vifi)
610 cache->mfc_un.res.maxvif = vifi + 1;
611 }
612 }
613}
614
615static int mif6_add(struct mif6ctl *vifc, int mrtsock)
616{
617 int vifi = vifc->mif6c_mifi;
618 struct mif_device *v = &vif6_table[vifi];
619 struct net_device *dev;
620
621 /* Is vif busy ? */
622 if (MIF_EXISTS(vifi))
623 return -EADDRINUSE;
624
625 switch (vifc->mif6c_flags) {
14fb64e1
YH
626#ifdef CONFIG_IPV6_PIMSM_V2
627 case MIFF_REGISTER:
628 /*
629 * Special Purpose VIF in PIM
630 * All the packets will be sent to the daemon
631 */
632 if (reg_vif_num >= 0)
633 return -EADDRINUSE;
634 dev = ip6mr_reg_vif();
635 if (!dev)
636 return -ENOBUFS;
637 break;
638#endif
7bc570c8
YH
639 case 0:
640 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
641 if (!dev)
642 return -EADDRNOTAVAIL;
643 dev_put(dev);
644 break;
645 default:
646 return -EINVAL;
647 }
648
649 dev_set_allmulti(dev, 1);
650
651 /*
652 * Fill in the VIF structures
653 */
654 v->rate_limit = vifc->vifc_rate_limit;
655 v->flags = vifc->mif6c_flags;
656 if (!mrtsock)
657 v->flags |= VIFF_STATIC;
658 v->threshold = vifc->vifc_threshold;
659 v->bytes_in = 0;
660 v->bytes_out = 0;
661 v->pkt_in = 0;
662 v->pkt_out = 0;
663 v->link = dev->ifindex;
664 if (v->flags & MIFF_REGISTER)
665 v->link = dev->iflink;
666
667 /* And finish update writing critical data */
668 write_lock_bh(&mrt_lock);
669 dev_hold(dev);
670 v->dev = dev;
14fb64e1
YH
671#ifdef CONFIG_IPV6_PIMSM_V2
672 if (v->flags & MIFF_REGISTER)
673 reg_vif_num = vifi;
674#endif
7bc570c8
YH
675 if (vifi + 1 > maxvif)
676 maxvif = vifi + 1;
677 write_unlock_bh(&mrt_lock);
678 return 0;
679}
680
681static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
682{
683 int line = MFC6_HASH(mcastgrp, origin);
684 struct mfc6_cache *c;
685
686 for (c = mfc6_cache_array[line]; c; c = c->next) {
687 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
688 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
689 break;
690 }
691 return c;
692}
693
694/*
695 * Allocate a multicast cache entry
696 */
697static struct mfc6_cache *ip6mr_cache_alloc(void)
698{
699 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
700 if (c == NULL)
701 return NULL;
702 memset(c, 0, sizeof(*c));
703 c->mfc_un.res.minvif = MAXVIFS;
704 return c;
705}
706
707static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
708{
709 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
710 if (c == NULL)
711 return NULL;
712 memset(c, 0, sizeof(*c));
713 skb_queue_head_init(&c->mfc_un.unres.unresolved);
714 c->mfc_un.unres.expires = jiffies + 10 * HZ;
715 return c;
716}
717
718/*
719 * A cache entry has gone into a resolved state from queued
720 */
721
722static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
723{
724 struct sk_buff *skb;
725
726 /*
727 * Play the pending entries through our router
728 */
729
730 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
731 if (ipv6_hdr(skb)->version == 0) {
732 int err;
733 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
734
735 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
736 nlh->nlmsg_len = skb->tail - (u8 *)nlh;
737 } else {
738 nlh->nlmsg_type = NLMSG_ERROR;
739 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
740 skb_trim(skb, nlh->nlmsg_len);
741 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
742 }
743 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
744 } else
745 ip6_mr_forward(skb, c);
746 }
747}
748
749/*
750 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
751 * expects the following bizarre scheme.
752 *
753 * Called under mrt_lock.
754 */
755
756static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
757{
758 struct sk_buff *skb;
759 struct mrt6msg *msg;
760 int ret;
761
14fb64e1
YH
762#ifdef CONFIG_IPV6_PIMSM_V2
763 if (assert == MRT6MSG_WHOLEPKT)
764 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
765 +sizeof(*msg));
766 else
767#endif
768 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
769
770 if (!skb)
771 return -ENOBUFS;
772
773 /* I suppose that internal messages
774 * do not require checksums */
775
776 skb->ip_summed = CHECKSUM_UNNECESSARY;
777
14fb64e1
YH
778#ifdef CONFIG_IPV6_PIMSM_V2
779 if (assert == MRT6MSG_WHOLEPKT) {
780 /* Ugly, but we have no choice with this interface.
781 Duplicate old header, fix length etc.
782 And all this only to mangle msg->im6_msgtype and
783 to set msg->im6_mbz to "mbz" :-)
784 */
785 skb_push(skb, -skb_network_offset(pkt));
786
787 skb_push(skb, sizeof(*msg));
788 skb_reset_transport_header(skb);
789 msg = (struct mrt6msg *)skb_transport_header(skb);
790 msg->im6_mbz = 0;
791 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
792 msg->im6_mif = reg_vif_num;
793 msg->im6_pad = 0;
794 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
795 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
796
797 skb->ip_summed = CHECKSUM_UNNECESSARY;
798 } else
799#endif
800 {
7bc570c8
YH
801 /*
802 * Copy the IP header
803 */
804
805 skb_put(skb, sizeof(struct ipv6hdr));
806 skb_reset_network_header(skb);
807 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
808
809 /*
810 * Add our header
811 */
812 skb_put(skb, sizeof(*msg));
813 skb_reset_transport_header(skb);
814 msg = (struct mrt6msg *)skb_transport_header(skb);
815
816 msg->im6_mbz = 0;
817 msg->im6_msgtype = assert;
818 msg->im6_mif = vifi;
819 msg->im6_pad = 0;
820 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
821 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
822
823 skb->dst = dst_clone(pkt->dst);
824 skb->ip_summed = CHECKSUM_UNNECESSARY;
825
826 skb_pull(skb, sizeof(struct ipv6hdr));
14fb64e1 827 }
7bc570c8
YH
828
829 if (mroute6_socket == NULL) {
830 kfree_skb(skb);
831 return -EINVAL;
832 }
833
834 /*
835 * Deliver to user space multicast routing algorithms
836 */
837 if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
838 if (net_ratelimit())
839 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
840 kfree_skb(skb);
841 }
842
843 return ret;
844}
845
846/*
847 * Queue a packet for resolution. It gets locked cache entry!
848 */
849
850static int
851ip6mr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
852{
853 int err;
854 struct mfc6_cache *c;
855
856 spin_lock_bh(&mfc_unres_lock);
857 for (c = mfc_unres_queue; c; c = c->next) {
858 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
859 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
860 break;
861 }
862
863 if (c == NULL) {
864 /*
865 * Create a new entry if allowable
866 */
867
868 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
869 (c = ip6mr_cache_alloc_unres()) == NULL) {
870 spin_unlock_bh(&mfc_unres_lock);
871
872 kfree_skb(skb);
873 return -ENOBUFS;
874 }
875
876 /*
877 * Fill in the new cache entry
878 */
879 c->mf6c_parent = -1;
880 c->mf6c_origin = ipv6_hdr(skb)->saddr;
881 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
882
883 /*
884 * Reflect first query at pim6sd
885 */
886 if ((err = ip6mr_cache_report(skb, vifi, MRT6MSG_NOCACHE)) < 0) {
887 /* If the report failed throw the cache entry
888 out - Brad Parker
889 */
890 spin_unlock_bh(&mfc_unres_lock);
891
892 kmem_cache_free(mrt_cachep, c);
893 kfree_skb(skb);
894 return err;
895 }
896
897 atomic_inc(&cache_resolve_queue_len);
898 c->next = mfc_unres_queue;
899 mfc_unres_queue = c;
900
901 ipmr_do_expire_process(1);
902 }
903
904 /*
905 * See if we can append the packet
906 */
907 if (c->mfc_un.unres.unresolved.qlen > 3) {
908 kfree_skb(skb);
909 err = -ENOBUFS;
910 } else {
911 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
912 err = 0;
913 }
914
915 spin_unlock_bh(&mfc_unres_lock);
916 return err;
917}
918
919/*
920 * MFC6 cache manipulation by user space
921 */
922
923static int ip6mr_mfc_delete(struct mf6cctl *mfc)
924{
925 int line;
926 struct mfc6_cache *c, **cp;
927
928 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
929
930 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
931 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
932 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
933 write_lock_bh(&mrt_lock);
934 *cp = c->next;
935 write_unlock_bh(&mrt_lock);
936
937 kmem_cache_free(mrt_cachep, c);
938 return 0;
939 }
940 }
941 return -ENOENT;
942}
943
944static int ip6mr_device_event(struct notifier_block *this,
945 unsigned long event, void *ptr)
946{
947 struct net_device *dev = ptr;
948 struct mif_device *v;
949 int ct;
950
951 if (dev_net(dev) != &init_net)
952 return NOTIFY_DONE;
953
954 if (event != NETDEV_UNREGISTER)
955 return NOTIFY_DONE;
956
957 v = &vif6_table[0];
958 for (ct = 0; ct < maxvif; ct++, v++) {
959 if (v->dev == dev)
960 mif6_delete(ct);
961 }
962 return NOTIFY_DONE;
963}
964
965static struct notifier_block ip6_mr_notifier = {
966 .notifier_call = ip6mr_device_event
967};
968
969/*
970 * Setup for IP multicast routing
971 */
972
973void __init ip6_mr_init(void)
974{
975 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
976 sizeof(struct mfc6_cache),
977 0, SLAB_HWCACHE_ALIGN,
978 NULL);
979 if (!mrt_cachep)
980 panic("cannot allocate ip6_mrt_cache");
981
982 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
983 register_netdevice_notifier(&ip6_mr_notifier);
984#ifdef CONFIG_PROC_FS
985 proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops);
986 proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops);
987#endif
988}
989
990
991static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
992{
993 int line;
994 struct mfc6_cache *uc, *c, **cp;
995 unsigned char ttls[MAXVIFS];
996 int i;
997
998 memset(ttls, 255, MAXVIFS);
999 for (i = 0; i < MAXVIFS; i++) {
1000 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1001 ttls[i] = 1;
1002
1003 }
1004
1005 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1006
1007 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1008 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1009 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1010 break;
1011 }
1012
1013 if (c != NULL) {
1014 write_lock_bh(&mrt_lock);
1015 c->mf6c_parent = mfc->mf6cc_parent;
1016 ip6mr_update_thresholds(c, ttls);
1017 if (!mrtsock)
1018 c->mfc_flags |= MFC_STATIC;
1019 write_unlock_bh(&mrt_lock);
1020 return 0;
1021 }
1022
1023 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1024 return -EINVAL;
1025
1026 c = ip6mr_cache_alloc();
1027 if (c == NULL)
1028 return -ENOMEM;
1029
1030 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1031 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1032 c->mf6c_parent = mfc->mf6cc_parent;
1033 ip6mr_update_thresholds(c, ttls);
1034 if (!mrtsock)
1035 c->mfc_flags |= MFC_STATIC;
1036
1037 write_lock_bh(&mrt_lock);
1038 c->next = mfc6_cache_array[line];
1039 mfc6_cache_array[line] = c;
1040 write_unlock_bh(&mrt_lock);
1041
1042 /*
1043 * Check to see if we resolved a queued list. If so we
1044 * need to send on the frames and tidy up.
1045 */
1046 spin_lock_bh(&mfc_unres_lock);
1047 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1048 cp = &uc->next) {
1049 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1050 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1051 *cp = uc->next;
1052 if (atomic_dec_and_test(&cache_resolve_queue_len))
1053 del_timer(&ipmr_expire_timer);
1054 break;
1055 }
1056 }
1057 spin_unlock_bh(&mfc_unres_lock);
1058
1059 if (uc) {
1060 ip6mr_cache_resolve(uc, c);
1061 kmem_cache_free(mrt_cachep, uc);
1062 }
1063 return 0;
1064}
1065
1066/*
1067 * Close the multicast socket, and clear the vif tables etc
1068 */
1069
1070static void mroute_clean_tables(struct sock *sk)
1071{
1072 int i;
1073
1074 /*
1075 * Shut down all active vif entries
1076 */
1077 for (i = 0; i < maxvif; i++) {
1078 if (!(vif6_table[i].flags & VIFF_STATIC))
1079 mif6_delete(i);
1080 }
1081
1082 /*
1083 * Wipe the cache
1084 */
1085 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1086 struct mfc6_cache *c, **cp;
1087
1088 cp = &mfc6_cache_array[i];
1089 while ((c = *cp) != NULL) {
1090 if (c->mfc_flags & MFC_STATIC) {
1091 cp = &c->next;
1092 continue;
1093 }
1094 write_lock_bh(&mrt_lock);
1095 *cp = c->next;
1096 write_unlock_bh(&mrt_lock);
1097
1098 kmem_cache_free(mrt_cachep, c);
1099 }
1100 }
1101
1102 if (atomic_read(&cache_resolve_queue_len) != 0) {
1103 struct mfc6_cache *c;
1104
1105 spin_lock_bh(&mfc_unres_lock);
1106 while (mfc_unres_queue != NULL) {
1107 c = mfc_unres_queue;
1108 mfc_unres_queue = c->next;
1109 spin_unlock_bh(&mfc_unres_lock);
1110
1111 ip6mr_destroy_unres(c);
1112
1113 spin_lock_bh(&mfc_unres_lock);
1114 }
1115 spin_unlock_bh(&mfc_unres_lock);
1116 }
1117}
1118
1119static int ip6mr_sk_init(struct sock *sk)
1120{
1121 int err = 0;
1122
1123 rtnl_lock();
1124 write_lock_bh(&mrt_lock);
1125 if (likely(mroute6_socket == NULL))
1126 mroute6_socket = sk;
1127 else
1128 err = -EADDRINUSE;
1129 write_unlock_bh(&mrt_lock);
1130
1131 rtnl_unlock();
1132
1133 return err;
1134}
1135
1136int ip6mr_sk_done(struct sock *sk)
1137{
1138 int err = 0;
1139
1140 rtnl_lock();
1141 if (sk == mroute6_socket) {
1142 write_lock_bh(&mrt_lock);
1143 mroute6_socket = NULL;
1144 write_unlock_bh(&mrt_lock);
1145
1146 mroute_clean_tables(sk);
1147 } else
1148 err = -EACCES;
1149 rtnl_unlock();
1150
1151 return err;
1152}
1153
1154/*
1155 * Socket options and virtual interface manipulation. The whole
1156 * virtual interface system is a complete heap, but unfortunately
1157 * that's how BSD mrouted happens to think. Maybe one day with a proper
1158 * MOSPF/PIM router set up we can clean this up.
1159 */
1160
1161int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1162{
1163 int ret;
1164 struct mif6ctl vif;
1165 struct mf6cctl mfc;
1166 mifi_t mifi;
1167
1168 if (optname != MRT6_INIT) {
1169 if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
1170 return -EACCES;
1171 }
1172
1173 switch (optname) {
1174 case MRT6_INIT:
1175 if (sk->sk_type != SOCK_RAW ||
1176 inet_sk(sk)->num != IPPROTO_ICMPV6)
1177 return -EOPNOTSUPP;
1178 if (optlen < sizeof(int))
1179 return -EINVAL;
1180
1181 return ip6mr_sk_init(sk);
1182
1183 case MRT6_DONE:
1184 return ip6mr_sk_done(sk);
1185
1186 case MRT6_ADD_MIF:
1187 if (optlen < sizeof(vif))
1188 return -EINVAL;
1189 if (copy_from_user(&vif, optval, sizeof(vif)))
1190 return -EFAULT;
1191 if (vif.mif6c_mifi >= MAXVIFS)
1192 return -ENFILE;
1193 rtnl_lock();
1194 ret = mif6_add(&vif, sk == mroute6_socket);
1195 rtnl_unlock();
1196 return ret;
1197
1198 case MRT6_DEL_MIF:
1199 if (optlen < sizeof(mifi_t))
1200 return -EINVAL;
1201 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1202 return -EFAULT;
1203 rtnl_lock();
1204 ret = mif6_delete(mifi);
1205 rtnl_unlock();
1206 return ret;
1207
1208 /*
1209 * Manipulate the forwarding caches. These live
1210 * in a sort of kernel/user symbiosis.
1211 */
1212 case MRT6_ADD_MFC:
1213 case MRT6_DEL_MFC:
1214 if (optlen < sizeof(mfc))
1215 return -EINVAL;
1216 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1217 return -EFAULT;
1218 rtnl_lock();
1219 if (optname == MRT6_DEL_MFC)
1220 ret = ip6mr_mfc_delete(&mfc);
1221 else
1222 ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
1223 rtnl_unlock();
1224 return ret;
1225
14fb64e1
YH
1226 /*
1227 * Control PIM assert (to activate pim will activate assert)
1228 */
1229 case MRT6_ASSERT:
1230 {
1231 int v;
1232 if (get_user(v, (int __user *)optval))
1233 return -EFAULT;
1234 mroute_do_assert = !!v;
1235 return 0;
1236 }
1237
1238#ifdef CONFIG_IPV6_PIMSM_V2
1239 case MRT6_PIM:
1240 {
1241 int v, ret;
1242 if (get_user(v, (int __user *)optval))
1243 return -EFAULT;
1244 v = !!v;
1245 rtnl_lock();
1246 ret = 0;
1247 if (v != mroute_do_pim) {
1248 mroute_do_pim = v;
1249 mroute_do_assert = v;
1250 if (mroute_do_pim)
1251 ret = inet6_add_protocol(&pim6_protocol,
1252 IPPROTO_PIM);
1253 else
1254 ret = inet6_del_protocol(&pim6_protocol,
1255 IPPROTO_PIM);
1256 if (ret < 0)
1257 ret = -EAGAIN;
1258 }
1259 rtnl_unlock();
1260 return ret;
1261 }
1262
1263#endif
7bc570c8
YH
1264 /*
1265 * Spurious command, or MRT_VERSION which you cannot
1266 * set.
1267 */
1268 default:
1269 return -ENOPROTOOPT;
1270 }
1271}
1272
1273/*
1274 * Getsock opt support for the multicast routing system.
1275 */
1276
1277int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1278 int __user *optlen)
1279{
1280 int olr;
1281 int val;
1282
1283 switch (optname) {
1284 case MRT6_VERSION:
1285 val = 0x0305;
1286 break;
14fb64e1
YH
1287#ifdef CONFIG_IPV6_PIMSM_V2
1288 case MRT6_PIM:
1289 val = mroute_do_pim;
1290 break;
1291#endif
1292 case MRT6_ASSERT:
1293 val = mroute_do_assert;
1294 break;
7bc570c8
YH
1295 default:
1296 return -ENOPROTOOPT;
1297 }
1298
1299 if (get_user(olr, optlen))
1300 return -EFAULT;
1301
1302 olr = min_t(int, olr, sizeof(int));
1303 if (olr < 0)
1304 return -EINVAL;
1305
1306 if (put_user(olr, optlen))
1307 return -EFAULT;
1308 if (copy_to_user(optval, &val, olr))
1309 return -EFAULT;
1310 return 0;
1311}
1312
1313/*
1314 * The IP multicast ioctl support routines.
1315 */
1316
1317int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1318{
1319 struct sioc_sg_req6 sr;
1320 struct sioc_mif_req6 vr;
1321 struct mif_device *vif;
1322 struct mfc6_cache *c;
1323
1324 switch (cmd) {
1325 case SIOCGETMIFCNT_IN6:
1326 if (copy_from_user(&vr, arg, sizeof(vr)))
1327 return -EFAULT;
1328 if (vr.mifi >= maxvif)
1329 return -EINVAL;
1330 read_lock(&mrt_lock);
1331 vif = &vif6_table[vr.mifi];
1332 if (MIF_EXISTS(vr.mifi)) {
1333 vr.icount = vif->pkt_in;
1334 vr.ocount = vif->pkt_out;
1335 vr.ibytes = vif->bytes_in;
1336 vr.obytes = vif->bytes_out;
1337 read_unlock(&mrt_lock);
1338
1339 if (copy_to_user(arg, &vr, sizeof(vr)))
1340 return -EFAULT;
1341 return 0;
1342 }
1343 read_unlock(&mrt_lock);
1344 return -EADDRNOTAVAIL;
1345 case SIOCGETSGCNT_IN6:
1346 if (copy_from_user(&sr, arg, sizeof(sr)))
1347 return -EFAULT;
1348
1349 read_lock(&mrt_lock);
1350 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1351 if (c) {
1352 sr.pktcnt = c->mfc_un.res.pkt;
1353 sr.bytecnt = c->mfc_un.res.bytes;
1354 sr.wrong_if = c->mfc_un.res.wrong_if;
1355 read_unlock(&mrt_lock);
1356
1357 if (copy_to_user(arg, &sr, sizeof(sr)))
1358 return -EFAULT;
1359 return 0;
1360 }
1361 read_unlock(&mrt_lock);
1362 return -EADDRNOTAVAIL;
1363 default:
1364 return -ENOIOCTLCMD;
1365 }
1366}
1367
1368
1369static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1370{
1371 /* XXX stats */
1372 return dst_output(skb);
1373}
1374
1375/*
1376 * Processing handlers for ip6mr_forward
1377 */
1378
1379static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1380{
1381 struct ipv6hdr *ipv6h;
1382 struct mif_device *vif = &vif6_table[vifi];
1383 struct net_device *dev;
1384 struct dst_entry *dst;
1385 struct flowi fl;
1386
1387 if (vif->dev == NULL)
1388 goto out_free;
1389
14fb64e1
YH
1390#ifdef CONFIG_IPV6_PIMSM_V2
1391 if (vif->flags & MIFF_REGISTER) {
1392 vif->pkt_out++;
1393 vif->bytes_out += skb->len;
1394 ((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len;
1395 ((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++;
1396 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1397 kfree_skb(skb);
1398 return 0;
1399 }
1400#endif
1401
7bc570c8
YH
1402 ipv6h = ipv6_hdr(skb);
1403
1404 fl = (struct flowi) {
1405 .oif = vif->link,
1406 .nl_u = { .ip6_u =
1407 { .daddr = ipv6h->daddr, }
1408 }
1409 };
1410
1411 dst = ip6_route_output(&init_net, NULL, &fl);
1412 if (!dst)
1413 goto out_free;
1414
1415 dst_release(skb->dst);
1416 skb->dst = dst;
1417
1418 /*
1419 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1420 * not only before forwarding, but after forwarding on all output
1421 * interfaces. It is clear, if mrouter runs a multicasting
1422 * program, it should receive packets not depending to what interface
1423 * program is joined.
1424 * If we will not make it, the program will have to join on all
1425 * interfaces. On the other hand, multihoming host (or router, but
1426 * not mrouter) cannot join to more than one interface - it will
1427 * result in receiving multiple packets.
1428 */
1429 dev = vif->dev;
1430 skb->dev = dev;
1431 vif->pkt_out++;
1432 vif->bytes_out += skb->len;
1433
1434 /* We are about to write */
1435 /* XXX: extension headers? */
1436 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1437 goto out_free;
1438
1439 ipv6h = ipv6_hdr(skb);
1440 ipv6h->hop_limit--;
1441
1442 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1443
1444 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1445 ip6mr_forward2_finish);
1446
1447out_free:
1448 kfree_skb(skb);
1449 return 0;
1450}
1451
1452static int ip6mr_find_vif(struct net_device *dev)
1453{
1454 int ct;
1455 for (ct = maxvif - 1; ct >= 0; ct--) {
1456 if (vif6_table[ct].dev == dev)
1457 break;
1458 }
1459 return ct;
1460}
1461
1462static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1463{
1464 int psend = -1;
1465 int vif, ct;
1466
1467 vif = cache->mf6c_parent;
1468 cache->mfc_un.res.pkt++;
1469 cache->mfc_un.res.bytes += skb->len;
1470
14fb64e1
YH
1471 /*
1472 * Wrong interface: drop packet and (maybe) send PIM assert.
1473 */
1474 if (vif6_table[vif].dev != skb->dev) {
1475 int true_vifi;
1476
1477 cache->mfc_un.res.wrong_if++;
1478 true_vifi = ip6mr_find_vif(skb->dev);
1479
1480 if (true_vifi >= 0 && mroute_do_assert &&
1481 /* pimsm uses asserts, when switching from RPT to SPT,
1482 so that we cannot check that packet arrived on an oif.
1483 It is bad, but otherwise we would need to move pretty
1484 large chunk of pimd to kernel. Ough... --ANK
1485 */
1486 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1487 time_after(jiffies,
1488 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1489 cache->mfc_un.res.last_assert = jiffies;
1490 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1491 }
1492 goto dont_forward;
1493 }
1494
7bc570c8
YH
1495 vif6_table[vif].pkt_in++;
1496 vif6_table[vif].bytes_in += skb->len;
1497
1498 /*
1499 * Forward the frame
1500 */
1501 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1502 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1503 if (psend != -1) {
1504 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1505 if (skb2)
1506 ip6mr_forward2(skb2, cache, psend);
1507 }
1508 psend = ct;
1509 }
1510 }
1511 if (psend != -1) {
1512 ip6mr_forward2(skb, cache, psend);
1513 return 0;
1514 }
1515
14fb64e1 1516dont_forward:
7bc570c8
YH
1517 kfree_skb(skb);
1518 return 0;
1519}
1520
1521
1522/*
1523 * Multicast packets for forwarding arrive here
1524 */
1525
1526int ip6_mr_input(struct sk_buff *skb)
1527{
1528 struct mfc6_cache *cache;
1529
1530 read_lock(&mrt_lock);
1531 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1532
1533 /*
1534 * No usable cache entry
1535 */
1536 if (cache == NULL) {
1537 int vif;
1538
1539 vif = ip6mr_find_vif(skb->dev);
1540 if (vif >= 0) {
1541 int err = ip6mr_cache_unresolved(vif, skb);
1542 read_unlock(&mrt_lock);
1543
1544 return err;
1545 }
1546 read_unlock(&mrt_lock);
1547 kfree_skb(skb);
1548 return -ENODEV;
1549 }
1550
1551 ip6_mr_forward(skb, cache);
1552
1553 read_unlock(&mrt_lock);
1554
1555 return 0;
1556}
1557
1558
1559static int
1560ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1561{
1562 int ct;
1563 struct rtnexthop *nhp;
1564 struct net_device *dev = vif6_table[c->mf6c_parent].dev;
1565 u8 *b = skb->tail;
1566 struct rtattr *mp_head;
1567
1568 if (dev)
1569 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1570
1571 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1572
1573 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1574 if (c->mfc_un.res.ttls[ct] < 255) {
1575 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1576 goto rtattr_failure;
1577 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1578 nhp->rtnh_flags = 0;
1579 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1580 nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1581 nhp->rtnh_len = sizeof(*nhp);
1582 }
1583 }
1584 mp_head->rta_type = RTA_MULTIPATH;
1585 mp_head->rta_len = skb->tail - (u8 *)mp_head;
1586 rtm->rtm_type = RTN_MULTICAST;
1587 return 1;
1588
1589rtattr_failure:
1590 nlmsg_trim(skb, b);
1591 return -EMSGSIZE;
1592}
1593
1594int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1595{
1596 int err;
1597 struct mfc6_cache *cache;
1598 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1599
1600 read_lock(&mrt_lock);
1601 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1602
1603 if (!cache) {
1604 struct sk_buff *skb2;
1605 struct ipv6hdr *iph;
1606 struct net_device *dev;
1607 int vif;
1608
1609 if (nowait) {
1610 read_unlock(&mrt_lock);
1611 return -EAGAIN;
1612 }
1613
1614 dev = skb->dev;
1615 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1616 read_unlock(&mrt_lock);
1617 return -ENODEV;
1618 }
1619
1620 /* really correct? */
1621 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1622 if (!skb2) {
1623 read_unlock(&mrt_lock);
1624 return -ENOMEM;
1625 }
1626
1627 skb_reset_transport_header(skb2);
1628
1629 skb_put(skb2, sizeof(struct ipv6hdr));
1630 skb_reset_network_header(skb2);
1631
1632 iph = ipv6_hdr(skb2);
1633 iph->version = 0;
1634 iph->priority = 0;
1635 iph->flow_lbl[0] = 0;
1636 iph->flow_lbl[1] = 0;
1637 iph->flow_lbl[2] = 0;
1638 iph->payload_len = 0;
1639 iph->nexthdr = IPPROTO_NONE;
1640 iph->hop_limit = 0;
1641 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1642 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1643
1644 err = ip6mr_cache_unresolved(vif, skb2);
1645 read_unlock(&mrt_lock);
1646
1647 return err;
1648 }
1649
1650 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1651 cache->mfc_flags |= MFC_NOTIFY;
1652
1653 err = ip6mr_fill_mroute(skb, cache, rtm);
1654 read_unlock(&mrt_lock);
1655 return err;
1656}
1657