]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
[IPMR]: Fix bug introduced when converting to skb_network_reset_header
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13 *
14 * Fixes:
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
24 * overflow.
25 * Carlos Picoto : PIMv1 Support
26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
27 * Relax this requrement to work with older peers.
28 *
29 */
30
1da177e4
LT
31#include <asm/system.h>
32#include <asm/uaccess.h>
33#include <linux/types.h>
4fc268d2 34#include <linux/capability.h>
1da177e4
LT
35#include <linux/errno.h>
36#include <linux/timer.h>
37#include <linux/mm.h>
38#include <linux/kernel.h>
39#include <linux/fcntl.h>
40#include <linux/stat.h>
41#include <linux/socket.h>
42#include <linux/in.h>
43#include <linux/inet.h>
44#include <linux/netdevice.h>
45#include <linux/inetdevice.h>
46#include <linux/igmp.h>
47#include <linux/proc_fs.h>
48#include <linux/seq_file.h>
49#include <linux/mroute.h>
50#include <linux/init.h>
46f25dff 51#include <linux/if_ether.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
65
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
70static struct sock *mroute_socket;
71
72
73/* Big lock, protecting vif table, mrt cache and mroute socket state.
74 Note that the changes are semaphored via rtnl_lock.
75 */
76
77static DEFINE_RWLOCK(mrt_lock);
78
79/*
80 * Multicast router control variables
81 */
82
83static struct vif_device vif_table[MAXVIFS]; /* Devices */
84static int maxvif;
85
86#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
87
88static int mroute_do_assert; /* Set in PIM assert */
89static int mroute_do_pim;
90
91static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
92
93static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
94static atomic_t cache_resolve_queue_len; /* Size of unresolved */
95
96/* Special spinlock for queue of unresolved entries */
97static DEFINE_SPINLOCK(mfc_unres_lock);
98
99/* We return to original Alan's scheme. Hash table of resolved
100 entries is changed only in process context and protected
101 with weak lock mrt_lock. Queue of unresolved entries is protected
102 with strong spinlock mfc_unres_lock.
103
104 In this case data path is free of exclusive locks at all.
105 */
106
e18b890b 107static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4
LT
108
109static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
110static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
111static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
112
113#ifdef CONFIG_IP_PIMSM_V2
114static struct net_protocol pim_protocol;
115#endif
116
117static struct timer_list ipmr_expire_timer;
118
119/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
120
121static
122struct net_device *ipmr_new_tunnel(struct vifctl *v)
123{
124 struct net_device *dev;
125
126 dev = __dev_get_by_name("tunl0");
127
128 if (dev) {
129 int err;
130 struct ifreq ifr;
131 mm_segment_t oldfs;
132 struct ip_tunnel_parm p;
133 struct in_device *in_dev;
134
135 memset(&p, 0, sizeof(p));
136 p.iph.daddr = v->vifc_rmt_addr.s_addr;
137 p.iph.saddr = v->vifc_lcl_addr.s_addr;
138 p.iph.version = 4;
139 p.iph.ihl = 5;
140 p.iph.protocol = IPPROTO_IPIP;
141 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
142 ifr.ifr_ifru.ifru_data = (void*)&p;
143
144 oldfs = get_fs(); set_fs(KERNEL_DS);
145 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
146 set_fs(oldfs);
147
148 dev = NULL;
149
150 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
151 dev->flags |= IFF_MULTICAST;
152
e5ed6399 153 in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
154 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
155 goto failure;
156 in_dev->cnf.rp_filter = 0;
157
158 if (dev_open(dev))
159 goto failure;
160 }
161 }
162 return dev;
163
164failure:
165 /* allow the register to be completed before unregistering. */
166 rtnl_unlock();
167 rtnl_lock();
168
169 unregister_netdevice(dev);
170 return NULL;
171}
172
173#ifdef CONFIG_IP_PIMSM
174
175static int reg_vif_num = -1;
176
177static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
178{
179 read_lock(&mrt_lock);
2941a486
PM
180 ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
181 ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
1da177e4
LT
182 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
183 read_unlock(&mrt_lock);
184 kfree_skb(skb);
185 return 0;
186}
187
188static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
189{
2941a486 190 return (struct net_device_stats*)netdev_priv(dev);
1da177e4
LT
191}
192
193static void reg_vif_setup(struct net_device *dev)
194{
195 dev->type = ARPHRD_PIMREG;
46f25dff 196 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4
LT
197 dev->flags = IFF_NOARP;
198 dev->hard_start_xmit = reg_vif_xmit;
199 dev->get_stats = reg_vif_get_stats;
200 dev->destructor = free_netdev;
201}
202
203static struct net_device *ipmr_reg_vif(void)
204{
205 struct net_device *dev;
206 struct in_device *in_dev;
207
208 dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
209 reg_vif_setup);
210
211 if (dev == NULL)
212 return NULL;
213
214 if (register_netdevice(dev)) {
215 free_netdev(dev);
216 return NULL;
217 }
218 dev->iflink = 0;
219
220 if ((in_dev = inetdev_init(dev)) == NULL)
221 goto failure;
222
223 in_dev->cnf.rp_filter = 0;
224
225 if (dev_open(dev))
226 goto failure;
227
228 return dev;
229
230failure:
231 /* allow the register to be completed before unregistering. */
232 rtnl_unlock();
233 rtnl_lock();
234
235 unregister_netdevice(dev);
236 return NULL;
237}
238#endif
239
240/*
241 * Delete a VIF entry
242 */
e905a9ed 243
1da177e4
LT
244static int vif_delete(int vifi)
245{
246 struct vif_device *v;
247 struct net_device *dev;
248 struct in_device *in_dev;
249
250 if (vifi < 0 || vifi >= maxvif)
251 return -EADDRNOTAVAIL;
252
253 v = &vif_table[vifi];
254
255 write_lock_bh(&mrt_lock);
256 dev = v->dev;
257 v->dev = NULL;
258
259 if (!dev) {
260 write_unlock_bh(&mrt_lock);
261 return -EADDRNOTAVAIL;
262 }
263
264#ifdef CONFIG_IP_PIMSM
265 if (vifi == reg_vif_num)
266 reg_vif_num = -1;
267#endif
268
269 if (vifi+1 == maxvif) {
270 int tmp;
271 for (tmp=vifi-1; tmp>=0; tmp--) {
272 if (VIF_EXISTS(tmp))
273 break;
274 }
275 maxvif = tmp+1;
276 }
277
278 write_unlock_bh(&mrt_lock);
279
280 dev_set_allmulti(dev, -1);
281
e5ed6399 282 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
1da177e4
LT
283 in_dev->cnf.mc_forwarding--;
284 ip_rt_multicast_event(in_dev);
285 }
286
287 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
288 unregister_netdevice(dev);
289
290 dev_put(dev);
291 return 0;
292}
293
294/* Destroy an unresolved cache entry, killing queued skbs
295 and reporting error to netlink readers.
296 */
297
298static void ipmr_destroy_unres(struct mfc_cache *c)
299{
300 struct sk_buff *skb;
9ef1d4c7 301 struct nlmsgerr *e;
1da177e4
LT
302
303 atomic_dec(&cache_resolve_queue_len);
304
132adf54 305 while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
1da177e4
LT
306 if (skb->nh.iph->version == 0) {
307 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
308 nlh->nlmsg_type = NLMSG_ERROR;
309 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
310 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
311 e = NLMSG_DATA(nlh);
312 e->error = -ETIMEDOUT;
313 memset(&e->msg, 0, sizeof(e->msg));
2942e900
TG
314
315 rtnl_unicast(skb, NETLINK_CB(skb).pid);
1da177e4
LT
316 } else
317 kfree_skb(skb);
318 }
319
320 kmem_cache_free(mrt_cachep, c);
321}
322
323
324/* Single timer process for all the unresolved queue. */
325
326static void ipmr_expire_process(unsigned long dummy)
327{
328 unsigned long now;
329 unsigned long expires;
330 struct mfc_cache *c, **cp;
331
332 if (!spin_trylock(&mfc_unres_lock)) {
333 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
334 return;
335 }
336
337 if (atomic_read(&cache_resolve_queue_len) == 0)
338 goto out;
339
340 now = jiffies;
341 expires = 10*HZ;
342 cp = &mfc_unres_queue;
343
344 while ((c=*cp) != NULL) {
345 if (time_after(c->mfc_un.unres.expires, now)) {
346 unsigned long interval = c->mfc_un.unres.expires - now;
347 if (interval < expires)
348 expires = interval;
349 cp = &c->next;
350 continue;
351 }
352
353 *cp = c->next;
354
355 ipmr_destroy_unres(c);
356 }
357
358 if (atomic_read(&cache_resolve_queue_len))
359 mod_timer(&ipmr_expire_timer, jiffies + expires);
360
361out:
362 spin_unlock(&mfc_unres_lock);
363}
364
365/* Fill oifs list. It is called under write locked mrt_lock. */
366
d1b04c08 367static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
1da177e4
LT
368{
369 int vifi;
370
371 cache->mfc_un.res.minvif = MAXVIFS;
372 cache->mfc_un.res.maxvif = 0;
373 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
374
375 for (vifi=0; vifi<maxvif; vifi++) {
376 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
377 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
378 if (cache->mfc_un.res.minvif > vifi)
379 cache->mfc_un.res.minvif = vifi;
380 if (cache->mfc_un.res.maxvif <= vifi)
381 cache->mfc_un.res.maxvif = vifi + 1;
382 }
383 }
384}
385
386static int vif_add(struct vifctl *vifc, int mrtsock)
387{
388 int vifi = vifc->vifc_vifi;
389 struct vif_device *v = &vif_table[vifi];
390 struct net_device *dev;
391 struct in_device *in_dev;
392
393 /* Is vif busy ? */
394 if (VIF_EXISTS(vifi))
395 return -EADDRINUSE;
396
397 switch (vifc->vifc_flags) {
398#ifdef CONFIG_IP_PIMSM
399 case VIFF_REGISTER:
400 /*
401 * Special Purpose VIF in PIM
402 * All the packets will be sent to the daemon
403 */
404 if (reg_vif_num >= 0)
405 return -EADDRINUSE;
406 dev = ipmr_reg_vif();
407 if (!dev)
408 return -ENOBUFS;
409 break;
410#endif
e905a9ed 411 case VIFF_TUNNEL:
1da177e4
LT
412 dev = ipmr_new_tunnel(vifc);
413 if (!dev)
414 return -ENOBUFS;
415 break;
416 case 0:
15333061 417 dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
1da177e4
LT
418 if (!dev)
419 return -EADDRNOTAVAIL;
15333061 420 dev_put(dev);
1da177e4
LT
421 break;
422 default:
423 return -EINVAL;
424 }
425
e5ed6399 426 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1da177e4
LT
427 return -EADDRNOTAVAIL;
428 in_dev->cnf.mc_forwarding++;
429 dev_set_allmulti(dev, +1);
430 ip_rt_multicast_event(in_dev);
431
432 /*
433 * Fill in the VIF structures
434 */
435 v->rate_limit=vifc->vifc_rate_limit;
436 v->local=vifc->vifc_lcl_addr.s_addr;
437 v->remote=vifc->vifc_rmt_addr.s_addr;
438 v->flags=vifc->vifc_flags;
439 if (!mrtsock)
440 v->flags |= VIFF_STATIC;
441 v->threshold=vifc->vifc_threshold;
442 v->bytes_in = 0;
443 v->bytes_out = 0;
444 v->pkt_in = 0;
445 v->pkt_out = 0;
446 v->link = dev->ifindex;
447 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
448 v->link = dev->iflink;
449
450 /* And finish update writing critical data */
451 write_lock_bh(&mrt_lock);
452 dev_hold(dev);
453 v->dev=dev;
454#ifdef CONFIG_IP_PIMSM
455 if (v->flags&VIFF_REGISTER)
456 reg_vif_num = vifi;
457#endif
458 if (vifi+1 > maxvif)
459 maxvif = vifi+1;
460 write_unlock_bh(&mrt_lock);
461 return 0;
462}
463
114c7844 464static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
1da177e4
LT
465{
466 int line=MFC_HASH(mcastgrp,origin);
467 struct mfc_cache *c;
468
469 for (c=mfc_cache_array[line]; c; c = c->next) {
470 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
471 break;
472 }
473 return c;
474}
475
476/*
477 * Allocate a multicast cache entry
478 */
479static struct mfc_cache *ipmr_cache_alloc(void)
480{
c3762229 481 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
132adf54 482 if (c==NULL)
1da177e4 483 return NULL;
1da177e4
LT
484 c->mfc_un.res.minvif = MAXVIFS;
485 return c;
486}
487
488static struct mfc_cache *ipmr_cache_alloc_unres(void)
489{
c3762229 490 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
132adf54 491 if (c==NULL)
1da177e4 492 return NULL;
1da177e4
LT
493 skb_queue_head_init(&c->mfc_un.unres.unresolved);
494 c->mfc_un.unres.expires = jiffies + 10*HZ;
495 return c;
496}
497
498/*
499 * A cache entry has gone into a resolved state from queued
500 */
e905a9ed 501
1da177e4
LT
502static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
503{
504 struct sk_buff *skb;
9ef1d4c7 505 struct nlmsgerr *e;
1da177e4
LT
506
507 /*
508 * Play the pending entries through our router
509 */
510
132adf54 511 while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1da177e4 512 if (skb->nh.iph->version == 0) {
1da177e4
LT
513 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
514
515 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
516 nlh->nlmsg_len = skb->tail - (u8*)nlh;
517 } else {
518 nlh->nlmsg_type = NLMSG_ERROR;
519 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
520 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
521 e = NLMSG_DATA(nlh);
522 e->error = -EMSGSIZE;
523 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 524 }
2942e900
TG
525
526 rtnl_unicast(skb, NETLINK_CB(skb).pid);
1da177e4
LT
527 } else
528 ip_mr_forward(skb, c, 0);
529 }
530}
531
532/*
533 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
534 * expects the following bizarre scheme.
535 *
536 * Called under mrt_lock.
537 */
e905a9ed 538
1da177e4
LT
539static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
540{
541 struct sk_buff *skb;
c9bdd4b5 542 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
543 struct igmphdr *igmp;
544 struct igmpmsg *msg;
545 int ret;
546
547#ifdef CONFIG_IP_PIMSM
548 if (assert == IGMPMSG_WHOLEPKT)
549 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
550 else
551#endif
552 skb = alloc_skb(128, GFP_ATOMIC);
553
132adf54 554 if (!skb)
1da177e4
LT
555 return -ENOBUFS;
556
557#ifdef CONFIG_IP_PIMSM
558 if (assert == IGMPMSG_WHOLEPKT) {
559 /* Ugly, but we have no choice with this interface.
560 Duplicate old header, fix ihl, length etc.
561 And all this only to mangle msg->im_msgtype and
562 to set msg->im_mbz to "mbz" :-)
563 */
878c8145
ACM
564 skb_push(skb, sizeof(struct iphdr));
565 skb_reset_network_header(skb);
566 skb->h.raw = skb->data;
0272ffc4 567 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 568 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
569 msg->im_msgtype = IGMPMSG_WHOLEPKT;
570 msg->im_mbz = 0;
e905a9ed 571 msg->im_vif = reg_vif_num;
1da177e4
LT
572 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
573 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
e905a9ed 574 } else
1da177e4 575#endif
e905a9ed
YH
576 {
577
1da177e4
LT
578 /*
579 * Copy the IP header
580 */
581
582 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
583 memcpy(skb->data,pkt->data,ihl);
584 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
585 msg = (struct igmpmsg*)skb->nh.iph;
586 msg->im_vif = vifi;
587 skb->dst = dst_clone(pkt->dst);
588
589 /*
590 * Add our header
591 */
592
593 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
594 igmp->type =
595 msg->im_msgtype = assert;
596 igmp->code = 0;
597 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
598 skb->h.raw = skb->nh.raw;
e905a9ed 599 }
1da177e4
LT
600
601 if (mroute_socket == NULL) {
602 kfree_skb(skb);
603 return -EINVAL;
604 }
605
606 /*
607 * Deliver to mrouted
608 */
609 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
610 if (net_ratelimit())
611 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
612 kfree_skb(skb);
613 }
614
615 return ret;
616}
617
618/*
619 * Queue a packet for resolution. It gets locked cache entry!
620 */
e905a9ed 621
1da177e4
LT
622static int
623ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
624{
625 int err;
626 struct mfc_cache *c;
627
628 spin_lock_bh(&mfc_unres_lock);
629 for (c=mfc_unres_queue; c; c=c->next) {
630 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
631 c->mfc_origin == skb->nh.iph->saddr)
632 break;
633 }
634
635 if (c == NULL) {
636 /*
637 * Create a new entry if allowable
638 */
639
640 if (atomic_read(&cache_resolve_queue_len)>=10 ||
641 (c=ipmr_cache_alloc_unres())==NULL) {
642 spin_unlock_bh(&mfc_unres_lock);
643
644 kfree_skb(skb);
645 return -ENOBUFS;
646 }
647
648 /*
649 * Fill in the new cache entry
650 */
651 c->mfc_parent=-1;
652 c->mfc_origin=skb->nh.iph->saddr;
653 c->mfc_mcastgrp=skb->nh.iph->daddr;
654
655 /*
656 * Reflect first query at mrouted.
657 */
658 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
e905a9ed 659 /* If the report failed throw the cache entry
1da177e4
LT
660 out - Brad Parker
661 */
662 spin_unlock_bh(&mfc_unres_lock);
663
664 kmem_cache_free(mrt_cachep, c);
665 kfree_skb(skb);
666 return err;
667 }
668
669 atomic_inc(&cache_resolve_queue_len);
670 c->next = mfc_unres_queue;
671 mfc_unres_queue = c;
672
673 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
674 }
675
676 /*
677 * See if we can append the packet
678 */
679 if (c->mfc_un.unres.unresolved.qlen>3) {
680 kfree_skb(skb);
681 err = -ENOBUFS;
682 } else {
683 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
684 err = 0;
685 }
686
687 spin_unlock_bh(&mfc_unres_lock);
688 return err;
689}
690
691/*
692 * MFC cache manipulation by user space mroute daemon
693 */
694
695static int ipmr_mfc_delete(struct mfcctl *mfc)
696{
697 int line;
698 struct mfc_cache *c, **cp;
699
700 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
701
702 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
703 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
704 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
705 write_lock_bh(&mrt_lock);
706 *cp = c->next;
707 write_unlock_bh(&mrt_lock);
708
709 kmem_cache_free(mrt_cachep, c);
710 return 0;
711 }
712 }
713 return -ENOENT;
714}
715
716static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
717{
718 int line;
719 struct mfc_cache *uc, *c, **cp;
720
721 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
722
723 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
724 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
725 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
726 break;
727 }
728
729 if (c != NULL) {
730 write_lock_bh(&mrt_lock);
731 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 732 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
733 if (!mrtsock)
734 c->mfc_flags |= MFC_STATIC;
735 write_unlock_bh(&mrt_lock);
736 return 0;
737 }
738
132adf54 739 if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
740 return -EINVAL;
741
742 c=ipmr_cache_alloc();
743 if (c==NULL)
744 return -ENOMEM;
745
746 c->mfc_origin=mfc->mfcc_origin.s_addr;
747 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
748 c->mfc_parent=mfc->mfcc_parent;
d1b04c08 749 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
750 if (!mrtsock)
751 c->mfc_flags |= MFC_STATIC;
752
753 write_lock_bh(&mrt_lock);
754 c->next = mfc_cache_array[line];
755 mfc_cache_array[line] = c;
756 write_unlock_bh(&mrt_lock);
757
758 /*
759 * Check to see if we resolved a queued list. If so we
760 * need to send on the frames and tidy up.
761 */
762 spin_lock_bh(&mfc_unres_lock);
763 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
764 cp = &uc->next) {
765 if (uc->mfc_origin == c->mfc_origin &&
766 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
767 *cp = uc->next;
768 if (atomic_dec_and_test(&cache_resolve_queue_len))
769 del_timer(&ipmr_expire_timer);
770 break;
771 }
772 }
773 spin_unlock_bh(&mfc_unres_lock);
774
775 if (uc) {
776 ipmr_cache_resolve(uc, c);
777 kmem_cache_free(mrt_cachep, uc);
778 }
779 return 0;
780}
781
782/*
783 * Close the multicast socket, and clear the vif tables etc
784 */
e905a9ed 785
1da177e4
LT
786static void mroute_clean_tables(struct sock *sk)
787{
788 int i;
e905a9ed 789
1da177e4
LT
790 /*
791 * Shut down all active vif entries
792 */
132adf54 793 for (i=0; i<maxvif; i++) {
1da177e4
LT
794 if (!(vif_table[i].flags&VIFF_STATIC))
795 vif_delete(i);
796 }
797
798 /*
799 * Wipe the cache
800 */
801 for (i=0;i<MFC_LINES;i++) {
802 struct mfc_cache *c, **cp;
803
804 cp = &mfc_cache_array[i];
805 while ((c = *cp) != NULL) {
806 if (c->mfc_flags&MFC_STATIC) {
807 cp = &c->next;
808 continue;
809 }
810 write_lock_bh(&mrt_lock);
811 *cp = c->next;
812 write_unlock_bh(&mrt_lock);
813
814 kmem_cache_free(mrt_cachep, c);
815 }
816 }
817
818 if (atomic_read(&cache_resolve_queue_len) != 0) {
819 struct mfc_cache *c;
820
821 spin_lock_bh(&mfc_unres_lock);
822 while (mfc_unres_queue != NULL) {
823 c = mfc_unres_queue;
824 mfc_unres_queue = c->next;
825 spin_unlock_bh(&mfc_unres_lock);
826
827 ipmr_destroy_unres(c);
828
829 spin_lock_bh(&mfc_unres_lock);
830 }
831 spin_unlock_bh(&mfc_unres_lock);
832 }
833}
834
835static void mrtsock_destruct(struct sock *sk)
836{
837 rtnl_lock();
838 if (sk == mroute_socket) {
839 ipv4_devconf.mc_forwarding--;
840
841 write_lock_bh(&mrt_lock);
842 mroute_socket=NULL;
843 write_unlock_bh(&mrt_lock);
844
845 mroute_clean_tables(sk);
846 }
847 rtnl_unlock();
848}
849
850/*
851 * Socket options and virtual interface manipulation. The whole
852 * virtual interface system is a complete heap, but unfortunately
853 * that's how BSD mrouted happens to think. Maybe one day with a proper
854 * MOSPF/PIM router set up we can clean this up.
855 */
e905a9ed 856
1da177e4
LT
857int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
858{
859 int ret;
860 struct vifctl vif;
861 struct mfcctl mfc;
e905a9ed 862
132adf54
SH
863 if (optname != MRT_INIT) {
864 if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
1da177e4
LT
865 return -EACCES;
866 }
867
132adf54
SH
868 switch (optname) {
869 case MRT_INIT:
870 if (sk->sk_type != SOCK_RAW ||
871 inet_sk(sk)->num != IPPROTO_IGMP)
872 return -EOPNOTSUPP;
873 if (optlen!=sizeof(int))
874 return -ENOPROTOOPT;
1da177e4 875
132adf54
SH
876 rtnl_lock();
877 if (mroute_socket) {
1da177e4 878 rtnl_unlock();
132adf54
SH
879 return -EADDRINUSE;
880 }
881
882 ret = ip_ra_control(sk, 1, mrtsock_destruct);
883 if (ret == 0) {
884 write_lock_bh(&mrt_lock);
885 mroute_socket=sk;
886 write_unlock_bh(&mrt_lock);
887
888 ipv4_devconf.mc_forwarding++;
889 }
890 rtnl_unlock();
891 return ret;
892 case MRT_DONE:
893 if (sk!=mroute_socket)
894 return -EACCES;
895 return ip_ra_control(sk, 0, NULL);
896 case MRT_ADD_VIF:
897 case MRT_DEL_VIF:
898 if (optlen!=sizeof(vif))
899 return -EINVAL;
900 if (copy_from_user(&vif,optval,sizeof(vif)))
901 return -EFAULT;
902 if (vif.vifc_vifi >= MAXVIFS)
903 return -ENFILE;
904 rtnl_lock();
905 if (optname==MRT_ADD_VIF) {
906 ret = vif_add(&vif, sk==mroute_socket);
907 } else {
908 ret = vif_delete(vif.vifc_vifi);
909 }
910 rtnl_unlock();
911 return ret;
1da177e4
LT
912
913 /*
914 * Manipulate the forwarding caches. These live
915 * in a sort of kernel/user symbiosis.
916 */
132adf54
SH
917 case MRT_ADD_MFC:
918 case MRT_DEL_MFC:
919 if (optlen!=sizeof(mfc))
920 return -EINVAL;
921 if (copy_from_user(&mfc,optval, sizeof(mfc)))
922 return -EFAULT;
923 rtnl_lock();
924 if (optname==MRT_DEL_MFC)
925 ret = ipmr_mfc_delete(&mfc);
926 else
927 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
928 rtnl_unlock();
929 return ret;
1da177e4
LT
930 /*
931 * Control PIM assert.
932 */
132adf54
SH
933 case MRT_ASSERT:
934 {
935 int v;
936 if (get_user(v,(int __user *)optval))
937 return -EFAULT;
938 mroute_do_assert=(v)?1:0;
939 return 0;
940 }
1da177e4 941#ifdef CONFIG_IP_PIMSM
132adf54
SH
942 case MRT_PIM:
943 {
944 int v, ret;
945 if (get_user(v,(int __user *)optval))
946 return -EFAULT;
947 v = (v)?1:0;
948 rtnl_lock();
949 ret = 0;
950 if (v != mroute_do_pim) {
951 mroute_do_pim = v;
952 mroute_do_assert = v;
1da177e4 953#ifdef CONFIG_IP_PIMSM_V2
132adf54
SH
954 if (mroute_do_pim)
955 ret = inet_add_protocol(&pim_protocol,
956 IPPROTO_PIM);
957 else
958 ret = inet_del_protocol(&pim_protocol,
959 IPPROTO_PIM);
960 if (ret < 0)
961 ret = -EAGAIN;
1da177e4 962#endif
1da177e4 963 }
132adf54
SH
964 rtnl_unlock();
965 return ret;
966 }
1da177e4 967#endif
132adf54
SH
968 /*
969 * Spurious command, or MRT_VERSION which you cannot
970 * set.
971 */
972 default:
973 return -ENOPROTOOPT;
1da177e4
LT
974 }
975}
976
977/*
978 * Getsock opt support for the multicast routing system.
979 */
e905a9ed 980
1da177e4
LT
981int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
982{
983 int olr;
984 int val;
985
132adf54 986 if (optname!=MRT_VERSION &&
1da177e4
LT
987#ifdef CONFIG_IP_PIMSM
988 optname!=MRT_PIM &&
989#endif
990 optname!=MRT_ASSERT)
991 return -ENOPROTOOPT;
992
993 if (get_user(olr, optlen))
994 return -EFAULT;
995
996 olr = min_t(unsigned int, olr, sizeof(int));
997 if (olr < 0)
998 return -EINVAL;
e905a9ed 999
132adf54 1000 if (put_user(olr,optlen))
1da177e4 1001 return -EFAULT;
132adf54 1002 if (optname==MRT_VERSION)
1da177e4
LT
1003 val=0x0305;
1004#ifdef CONFIG_IP_PIMSM
132adf54 1005 else if (optname==MRT_PIM)
1da177e4
LT
1006 val=mroute_do_pim;
1007#endif
1008 else
1009 val=mroute_do_assert;
132adf54 1010 if (copy_to_user(optval,&val,olr))
1da177e4
LT
1011 return -EFAULT;
1012 return 0;
1013}
1014
1015/*
1016 * The IP multicast ioctl support routines.
1017 */
e905a9ed 1018
1da177e4
LT
1019int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1020{
1021 struct sioc_sg_req sr;
1022 struct sioc_vif_req vr;
1023 struct vif_device *vif;
1024 struct mfc_cache *c;
e905a9ed 1025
132adf54
SH
1026 switch (cmd) {
1027 case SIOCGETVIFCNT:
1028 if (copy_from_user(&vr,arg,sizeof(vr)))
1029 return -EFAULT;
1030 if (vr.vifi>=maxvif)
1031 return -EINVAL;
1032 read_lock(&mrt_lock);
1033 vif=&vif_table[vr.vifi];
1034 if (VIF_EXISTS(vr.vifi)) {
1035 vr.icount=vif->pkt_in;
1036 vr.ocount=vif->pkt_out;
1037 vr.ibytes=vif->bytes_in;
1038 vr.obytes=vif->bytes_out;
1da177e4 1039 read_unlock(&mrt_lock);
1da177e4 1040
132adf54
SH
1041 if (copy_to_user(arg,&vr,sizeof(vr)))
1042 return -EFAULT;
1043 return 0;
1044 }
1045 read_unlock(&mrt_lock);
1046 return -EADDRNOTAVAIL;
1047 case SIOCGETSGCNT:
1048 if (copy_from_user(&sr,arg,sizeof(sr)))
1049 return -EFAULT;
1050
1051 read_lock(&mrt_lock);
1052 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1053 if (c) {
1054 sr.pktcnt = c->mfc_un.res.pkt;
1055 sr.bytecnt = c->mfc_un.res.bytes;
1056 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1057 read_unlock(&mrt_lock);
132adf54
SH
1058
1059 if (copy_to_user(arg,&sr,sizeof(sr)))
1060 return -EFAULT;
1061 return 0;
1062 }
1063 read_unlock(&mrt_lock);
1064 return -EADDRNOTAVAIL;
1065 default:
1066 return -ENOIOCTLCMD;
1da177e4
LT
1067 }
1068}
1069
1070
1071static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1072{
1073 struct vif_device *v;
1074 int ct;
1075 if (event != NETDEV_UNREGISTER)
1076 return NOTIFY_DONE;
1077 v=&vif_table[0];
132adf54 1078 for (ct=0;ct<maxvif;ct++,v++) {
1da177e4
LT
1079 if (v->dev==ptr)
1080 vif_delete(ct);
1081 }
1082 return NOTIFY_DONE;
1083}
1084
1085
1086static struct notifier_block ip_mr_notifier={
1087 .notifier_call = ipmr_device_event,
1088};
1089
1090/*
1091 * Encapsulate a packet by attaching a valid IPIP header to it.
1092 * This avoids tunnel drivers and other mess and gives us the speed so
1093 * important for multicast video.
1094 */
e905a9ed 1095
114c7844 1096static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1097{
8856dfa3 1098 struct iphdr *iph;
e023dd64 1099 struct iphdr *old_iph = skb->nh.iph;
8856dfa3
ACM
1100
1101 skb_push(skb, sizeof(struct iphdr));
1102 skb->h.ipiph = skb->nh.iph;
1103 skb_reset_network_header(skb);
1104 iph = skb->nh.iph;
1da177e4
LT
1105
1106 iph->version = 4;
e023dd64
ACM
1107 iph->tos = old_iph->tos;
1108 iph->ttl = old_iph->ttl;
1da177e4
LT
1109 iph->frag_off = 0;
1110 iph->daddr = daddr;
1111 iph->saddr = saddr;
1112 iph->protocol = IPPROTO_IPIP;
1113 iph->ihl = 5;
1114 iph->tot_len = htons(skb->len);
1115 ip_select_ident(iph, skb->dst, NULL);
1116 ip_send_check(iph);
1117
1da177e4
LT
1118 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1119 nf_reset(skb);
1120}
1121
1122static inline int ipmr_forward_finish(struct sk_buff *skb)
1123{
1124 struct ip_options * opt = &(IPCB(skb)->opt);
1125
1126 IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1127
1128 if (unlikely(opt->optlen))
1129 ip_forward_options(skb);
1130
1131 return dst_output(skb);
1132}
1133
1134/*
1135 * Processing handlers for ipmr_forward
1136 */
1137
1138static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1139{
1140 struct iphdr *iph = skb->nh.iph;
1141 struct vif_device *vif = &vif_table[vifi];
1142 struct net_device *dev;
1143 struct rtable *rt;
1144 int encap = 0;
1145
1146 if (vif->dev == NULL)
1147 goto out_free;
1148
1149#ifdef CONFIG_IP_PIMSM
1150 if (vif->flags & VIFF_REGISTER) {
1151 vif->pkt_out++;
1152 vif->bytes_out+=skb->len;
2941a486
PM
1153 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
1154 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
1da177e4
LT
1155 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1156 kfree_skb(skb);
1157 return;
1158 }
1159#endif
1160
1161 if (vif->flags&VIFF_TUNNEL) {
1162 struct flowi fl = { .oif = vif->link,
1163 .nl_u = { .ip4_u =
1164 { .daddr = vif->remote,
1165 .saddr = vif->local,
1166 .tos = RT_TOS(iph->tos) } },
1167 .proto = IPPROTO_IPIP };
1168 if (ip_route_output_key(&rt, &fl))
1169 goto out_free;
1170 encap = sizeof(struct iphdr);
1171 } else {
1172 struct flowi fl = { .oif = vif->link,
1173 .nl_u = { .ip4_u =
1174 { .daddr = iph->daddr,
1175 .tos = RT_TOS(iph->tos) } },
1176 .proto = IPPROTO_IPIP };
1177 if (ip_route_output_key(&rt, &fl))
1178 goto out_free;
1179 }
1180
1181 dev = rt->u.dst.dev;
1182
1183 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1184 /* Do not fragment multicasts. Alas, IPv4 does not
1185 allow to send ICMP, so that packets will disappear
1186 to blackhole.
1187 */
1188
1189 IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1190 ip_rt_put(rt);
1191 goto out_free;
1192 }
1193
1194 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1195
1196 if (skb_cow(skb, encap)) {
e905a9ed 1197 ip_rt_put(rt);
1da177e4
LT
1198 goto out_free;
1199 }
1200
1201 vif->pkt_out++;
1202 vif->bytes_out+=skb->len;
1203
1204 dst_release(skb->dst);
1205 skb->dst = &rt->u.dst;
1206 iph = skb->nh.iph;
1207 ip_decrease_ttl(iph);
1208
1209 /* FIXME: forward and output firewalls used to be called here.
1210 * What do we do with netfilter? -- RR */
1211 if (vif->flags & VIFF_TUNNEL) {
1212 ip_encap(skb, vif->local, vif->remote);
1213 /* FIXME: extra output firewall step used to be here. --RR */
2941a486
PM
1214 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
1215 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
1da177e4
LT
1216 }
1217
1218 IPCB(skb)->flags |= IPSKB_FORWARDED;
1219
1220 /*
1221 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1222 * not only before forwarding, but after forwarding on all output
1223 * interfaces. It is clear, if mrouter runs a multicasting
1224 * program, it should receive packets not depending to what interface
1225 * program is joined.
1226 * If we will not make it, the program will have to join on all
1227 * interfaces. On the other hand, multihoming host (or router, but
1228 * not mrouter) cannot join to more than one interface - it will
1229 * result in receiving multiple packets.
1230 */
e905a9ed 1231 NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1232 ipmr_forward_finish);
1233 return;
1234
1235out_free:
1236 kfree_skb(skb);
1237 return;
1238}
1239
1240static int ipmr_find_vif(struct net_device *dev)
1241{
1242 int ct;
1243 for (ct=maxvif-1; ct>=0; ct--) {
1244 if (vif_table[ct].dev == dev)
1245 break;
1246 }
1247 return ct;
1248}
1249
1250/* "local" means that we should preserve one skb (for local delivery) */
1251
1252static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1253{
1254 int psend = -1;
1255 int vif, ct;
1256
1257 vif = cache->mfc_parent;
1258 cache->mfc_un.res.pkt++;
1259 cache->mfc_un.res.bytes += skb->len;
1260
1261 /*
1262 * Wrong interface: drop packet and (maybe) send PIM assert.
1263 */
1264 if (vif_table[vif].dev != skb->dev) {
1265 int true_vifi;
1266
1267 if (((struct rtable*)skb->dst)->fl.iif == 0) {
1268 /* It is our own packet, looped back.
1269 Very complicated situation...
1270
1271 The best workaround until routing daemons will be
1272 fixed is not to redistribute packet, if it was
1273 send through wrong interface. It means, that
1274 multicast applications WILL NOT work for
1275 (S,G), which have default multicast route pointing
1276 to wrong oif. In any case, it is not a good
1277 idea to use multicasting applications on router.
1278 */
1279 goto dont_forward;
1280 }
1281
1282 cache->mfc_un.res.wrong_if++;
1283 true_vifi = ipmr_find_vif(skb->dev);
1284
1285 if (true_vifi >= 0 && mroute_do_assert &&
1286 /* pimsm uses asserts, when switching from RPT to SPT,
1287 so that we cannot check that packet arrived on an oif.
1288 It is bad, but otherwise we would need to move pretty
1289 large chunk of pimd to kernel. Ough... --ANK
1290 */
1291 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1292 time_after(jiffies,
1da177e4
LT
1293 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1294 cache->mfc_un.res.last_assert = jiffies;
1295 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1296 }
1297 goto dont_forward;
1298 }
1299
1300 vif_table[vif].pkt_in++;
1301 vif_table[vif].bytes_in+=skb->len;
1302
1303 /*
1304 * Forward the frame
1305 */
1306 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1307 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1308 if (psend != -1) {
1309 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1310 if (skb2)
1311 ipmr_queue_xmit(skb2, cache, psend);
1312 }
1313 psend=ct;
1314 }
1315 }
1316 if (psend != -1) {
1317 if (local) {
1318 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1319 if (skb2)
1320 ipmr_queue_xmit(skb2, cache, psend);
1321 } else {
1322 ipmr_queue_xmit(skb, cache, psend);
1323 return 0;
1324 }
1325 }
1326
1327dont_forward:
1328 if (!local)
1329 kfree_skb(skb);
1330 return 0;
1331}
1332
1333
1334/*
1335 * Multicast packets for forwarding arrive here
1336 */
1337
1338int ip_mr_input(struct sk_buff *skb)
1339{
1340 struct mfc_cache *cache;
1341 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1342
1343 /* Packet is looped back after forward, it should not be
1344 forwarded second time, but still can be delivered locally.
1345 */
1346 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1347 goto dont_forward;
1348
1349 if (!local) {
1350 if (IPCB(skb)->opt.router_alert) {
1351 if (ip_call_ra_chain(skb))
1352 return 0;
1353 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1354 /* IGMPv1 (and broken IGMPv2 implementations sort of
1355 Cisco IOS <= 11.2(8)) do not put router alert
1356 option to IGMP packets destined to routable
1357 groups. It is very bad, because it means
1358 that we can forward NO IGMP messages.
1359 */
1360 read_lock(&mrt_lock);
1361 if (mroute_socket) {
2715bcf9 1362 nf_reset(skb);
1da177e4
LT
1363 raw_rcv(mroute_socket, skb);
1364 read_unlock(&mrt_lock);
1365 return 0;
1366 }
1367 read_unlock(&mrt_lock);
1368 }
1369 }
1370
1371 read_lock(&mrt_lock);
1372 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1373
1374 /*
1375 * No usable cache entry
1376 */
1377 if (cache==NULL) {
1378 int vif;
1379
1380 if (local) {
1381 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1382 ip_local_deliver(skb);
1383 if (skb2 == NULL) {
1384 read_unlock(&mrt_lock);
1385 return -ENOBUFS;
1386 }
1387 skb = skb2;
1388 }
1389
1390 vif = ipmr_find_vif(skb->dev);
1391 if (vif >= 0) {
1392 int err = ipmr_cache_unresolved(vif, skb);
1393 read_unlock(&mrt_lock);
1394
1395 return err;
1396 }
1397 read_unlock(&mrt_lock);
1398 kfree_skb(skb);
1399 return -ENODEV;
1400 }
1401
1402 ip_mr_forward(skb, cache, local);
1403
1404 read_unlock(&mrt_lock);
1405
1406 if (local)
1407 return ip_local_deliver(skb);
1408
1409 return 0;
1410
1411dont_forward:
1412 if (local)
1413 return ip_local_deliver(skb);
1414 kfree_skb(skb);
1415 return 0;
1416}
1417
1418#ifdef CONFIG_IP_PIMSM_V1
1419/*
1420 * Handle IGMP messages of PIMv1
1421 */
1422
1423int pim_rcv_v1(struct sk_buff * skb)
1424{
1425 struct igmphdr *pim;
1426 struct iphdr *encap;
1427 struct net_device *reg_dev = NULL;
1428
e905a9ed 1429 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1da177e4
LT
1430 goto drop;
1431
1432 pim = (struct igmphdr*)skb->h.raw;
1433
e905a9ed 1434 if (!mroute_do_pim ||
1da177e4 1435 skb->len < sizeof(*pim) + sizeof(*encap) ||
e905a9ed 1436 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1da177e4
LT
1437 goto drop;
1438
1439 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1440 /*
1441 Check that:
1442 a. packet is really destinted to a multicast group
1443 b. packet is not a NULL-REGISTER
1444 c. packet is not truncated
1445 */
1446 if (!MULTICAST(encap->daddr) ||
1447 encap->tot_len == 0 ||
e905a9ed 1448 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1da177e4
LT
1449 goto drop;
1450
1451 read_lock(&mrt_lock);
1452 if (reg_vif_num >= 0)
1453 reg_dev = vif_table[reg_vif_num].dev;
1454 if (reg_dev)
1455 dev_hold(reg_dev);
1456 read_unlock(&mrt_lock);
1457
e905a9ed 1458 if (reg_dev == NULL)
1da177e4
LT
1459 goto drop;
1460
1461 skb->mac.raw = skb->nh.raw;
1462 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1463 skb_reset_network_header(skb);
1da177e4 1464 skb->dev = reg_dev;
1da177e4
LT
1465 skb->protocol = htons(ETH_P_IP);
1466 skb->ip_summed = 0;
1467 skb->pkt_type = PACKET_HOST;
1468 dst_release(skb->dst);
1469 skb->dst = NULL;
2941a486
PM
1470 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1471 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1da177e4
LT
1472 nf_reset(skb);
1473 netif_rx(skb);
1474 dev_put(reg_dev);
1475 return 0;
1476 drop:
1477 kfree_skb(skb);
1478 return 0;
1479}
1480#endif
1481
1482#ifdef CONFIG_IP_PIMSM_V2
1483static int pim_rcv(struct sk_buff * skb)
1484{
1485 struct pimreghdr *pim;
1486 struct iphdr *encap;
1487 struct net_device *reg_dev = NULL;
1488
e905a9ed 1489 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1da177e4
LT
1490 goto drop;
1491
1492 pim = (struct pimreghdr*)skb->h.raw;
e905a9ed 1493 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1494 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1495 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1496 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1497 goto drop;
1498
1499 /* check if the inner packet is destined to mcast group */
1500 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1501 if (!MULTICAST(encap->daddr) ||
1502 encap->tot_len == 0 ||
e905a9ed 1503 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1da177e4
LT
1504 goto drop;
1505
1506 read_lock(&mrt_lock);
1507 if (reg_vif_num >= 0)
1508 reg_dev = vif_table[reg_vif_num].dev;
1509 if (reg_dev)
1510 dev_hold(reg_dev);
1511 read_unlock(&mrt_lock);
1512
e905a9ed 1513 if (reg_dev == NULL)
1da177e4
LT
1514 goto drop;
1515
1516 skb->mac.raw = skb->nh.raw;
1517 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1518 skb_reset_network_header(skb);
1da177e4 1519 skb->dev = reg_dev;
1da177e4
LT
1520 skb->protocol = htons(ETH_P_IP);
1521 skb->ip_summed = 0;
1522 skb->pkt_type = PACKET_HOST;
1523 dst_release(skb->dst);
2941a486
PM
1524 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1525 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1da177e4
LT
1526 skb->dst = NULL;
1527 nf_reset(skb);
1528 netif_rx(skb);
1529 dev_put(reg_dev);
1530 return 0;
1531 drop:
1532 kfree_skb(skb);
1533 return 0;
1534}
1535#endif
1536
1537static int
1538ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1539{
1540 int ct;
1541 struct rtnexthop *nhp;
1542 struct net_device *dev = vif_table[c->mfc_parent].dev;
1543 u8 *b = skb->tail;
1544 struct rtattr *mp_head;
1545
1546 if (dev)
1547 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1548
1549 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1550
1551 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1552 if (c->mfc_un.res.ttls[ct] < 255) {
1553 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1554 goto rtattr_failure;
1555 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1556 nhp->rtnh_flags = 0;
1557 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1558 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1559 nhp->rtnh_len = sizeof(*nhp);
1560 }
1561 }
1562 mp_head->rta_type = RTA_MULTIPATH;
1563 mp_head->rta_len = skb->tail - (u8*)mp_head;
1564 rtm->rtm_type = RTN_MULTICAST;
1565 return 1;
1566
1567rtattr_failure:
1568 skb_trim(skb, b - skb->data);
1569 return -EMSGSIZE;
1570}
1571
1572int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1573{
1574 int err;
1575 struct mfc_cache *cache;
1576 struct rtable *rt = (struct rtable*)skb->dst;
1577
1578 read_lock(&mrt_lock);
1579 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1580
1581 if (cache==NULL) {
72287490 1582 struct sk_buff *skb2;
1da177e4
LT
1583 struct net_device *dev;
1584 int vif;
1585
1586 if (nowait) {
1587 read_unlock(&mrt_lock);
1588 return -EAGAIN;
1589 }
1590
1591 dev = skb->dev;
1592 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1593 read_unlock(&mrt_lock);
1594 return -ENODEV;
1595 }
72287490
AK
1596 skb2 = skb_clone(skb, GFP_ATOMIC);
1597 if (!skb2) {
1598 read_unlock(&mrt_lock);
1599 return -ENOMEM;
1600 }
1601
e2d1bca7
ACM
1602 skb_push(skb2, sizeof(struct iphdr));
1603 skb_reset_network_header(skb2);
72287490
AK
1604 skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
1605 skb2->nh.iph->saddr = rt->rt_src;
1606 skb2->nh.iph->daddr = rt->rt_dst;
1607 skb2->nh.iph->version = 0;
1608 err = ipmr_cache_unresolved(vif, skb2);
1da177e4
LT
1609 read_unlock(&mrt_lock);
1610 return err;
1611 }
1612
1613 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1614 cache->mfc_flags |= MFC_NOTIFY;
1615 err = ipmr_fill_mroute(skb, cache, rtm);
1616 read_unlock(&mrt_lock);
1617 return err;
1618}
1619
e905a9ed 1620#ifdef CONFIG_PROC_FS
1da177e4
LT
1621/*
1622 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1623 */
1624struct ipmr_vif_iter {
1625 int ct;
1626};
1627
1628static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1629 loff_t pos)
1630{
1631 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
132adf54 1632 if (!VIF_EXISTS(iter->ct))
1da177e4 1633 continue;
e905a9ed 1634 if (pos-- == 0)
1da177e4
LT
1635 return &vif_table[iter->ct];
1636 }
1637 return NULL;
1638}
1639
1640static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1641{
1642 read_lock(&mrt_lock);
e905a9ed 1643 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1da177e4
LT
1644 : SEQ_START_TOKEN;
1645}
1646
1647static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1648{
1649 struct ipmr_vif_iter *iter = seq->private;
1650
1651 ++*pos;
1652 if (v == SEQ_START_TOKEN)
1653 return ipmr_vif_seq_idx(iter, 0);
e905a9ed 1654
1da177e4 1655 while (++iter->ct < maxvif) {
132adf54 1656 if (!VIF_EXISTS(iter->ct))
1da177e4
LT
1657 continue;
1658 return &vif_table[iter->ct];
1659 }
1660 return NULL;
1661}
1662
1663static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1664{
1665 read_unlock(&mrt_lock);
1666}
1667
1668static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1669{
1670 if (v == SEQ_START_TOKEN) {
e905a9ed 1671 seq_puts(seq,
1da177e4
LT
1672 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1673 } else {
1674 const struct vif_device *vif = v;
1675 const char *name = vif->dev ? vif->dev->name : "none";
1676
1677 seq_printf(seq,
1678 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1679 vif - vif_table,
e905a9ed 1680 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1681 vif->bytes_out, vif->pkt_out,
1682 vif->flags, vif->local, vif->remote);
1683 }
1684 return 0;
1685}
1686
f690808e 1687static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1688 .start = ipmr_vif_seq_start,
1689 .next = ipmr_vif_seq_next,
1690 .stop = ipmr_vif_seq_stop,
1691 .show = ipmr_vif_seq_show,
1692};
1693
1694static int ipmr_vif_open(struct inode *inode, struct file *file)
1695{
1696 struct seq_file *seq;
1697 int rc = -ENOMEM;
1698 struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
e905a9ed 1699
1da177e4
LT
1700 if (!s)
1701 goto out;
1702
1703 rc = seq_open(file, &ipmr_vif_seq_ops);
1704 if (rc)
1705 goto out_kfree;
1706
1707 s->ct = 0;
1708 seq = file->private_data;
1709 seq->private = s;
1710out:
1711 return rc;
1712out_kfree:
1713 kfree(s);
1714 goto out;
1715
1716}
1717
9a32144e 1718static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1719 .owner = THIS_MODULE,
1720 .open = ipmr_vif_open,
1721 .read = seq_read,
1722 .llseek = seq_lseek,
1723 .release = seq_release_private,
1724};
1725
1726struct ipmr_mfc_iter {
1727 struct mfc_cache **cache;
1728 int ct;
1729};
1730
1731
1732static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1733{
1734 struct mfc_cache *mfc;
1735
1736 it->cache = mfc_cache_array;
1737 read_lock(&mrt_lock);
e905a9ed 1738 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
132adf54 1739 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
e905a9ed 1740 if (pos-- == 0)
1da177e4
LT
1741 return mfc;
1742 read_unlock(&mrt_lock);
1743
1744 it->cache = &mfc_unres_queue;
1745 spin_lock_bh(&mfc_unres_lock);
132adf54 1746 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1da177e4
LT
1747 if (pos-- == 0)
1748 return mfc;
1749 spin_unlock_bh(&mfc_unres_lock);
1750
1751 it->cache = NULL;
1752 return NULL;
1753}
1754
1755
1756static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1757{
1758 struct ipmr_mfc_iter *it = seq->private;
1759 it->cache = NULL;
1760 it->ct = 0;
e905a9ed 1761 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1da177e4
LT
1762 : SEQ_START_TOKEN;
1763}
1764
1765static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1766{
1767 struct mfc_cache *mfc = v;
1768 struct ipmr_mfc_iter *it = seq->private;
1769
1770 ++*pos;
1771
1772 if (v == SEQ_START_TOKEN)
1773 return ipmr_mfc_seq_idx(seq->private, 0);
1774
1775 if (mfc->next)
1776 return mfc->next;
e905a9ed
YH
1777
1778 if (it->cache == &mfc_unres_queue)
1da177e4
LT
1779 goto end_of_list;
1780
1781 BUG_ON(it->cache != mfc_cache_array);
1782
1783 while (++it->ct < MFC_LINES) {
1784 mfc = mfc_cache_array[it->ct];
1785 if (mfc)
1786 return mfc;
1787 }
1788
1789 /* exhausted cache_array, show unresolved */
1790 read_unlock(&mrt_lock);
1791 it->cache = &mfc_unres_queue;
1792 it->ct = 0;
e905a9ed 1793
1da177e4
LT
1794 spin_lock_bh(&mfc_unres_lock);
1795 mfc = mfc_unres_queue;
e905a9ed 1796 if (mfc)
1da177e4
LT
1797 return mfc;
1798
1799 end_of_list:
1800 spin_unlock_bh(&mfc_unres_lock);
1801 it->cache = NULL;
1802
1803 return NULL;
1804}
1805
1806static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1807{
1808 struct ipmr_mfc_iter *it = seq->private;
1809
1810 if (it->cache == &mfc_unres_queue)
1811 spin_unlock_bh(&mfc_unres_lock);
1812 else if (it->cache == mfc_cache_array)
1813 read_unlock(&mrt_lock);
1814}
1815
1816static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1817{
1818 int n;
1819
1820 if (v == SEQ_START_TOKEN) {
e905a9ed 1821 seq_puts(seq,
1da177e4
LT
1822 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1823 } else {
1824 const struct mfc_cache *mfc = v;
1825 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1826
1da177e4
LT
1827 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1828 (unsigned long) mfc->mfc_mcastgrp,
1829 (unsigned long) mfc->mfc_origin,
1830 mfc->mfc_parent,
1831 mfc->mfc_un.res.pkt,
1832 mfc->mfc_un.res.bytes,
1833 mfc->mfc_un.res.wrong_if);
1834
1835 if (it->cache != &mfc_unres_queue) {
132adf54
SH
1836 for (n = mfc->mfc_un.res.minvif;
1837 n < mfc->mfc_un.res.maxvif; n++ ) {
1838 if (VIF_EXISTS(n)
1da177e4 1839 && mfc->mfc_un.res.ttls[n] < 255)
e905a9ed
YH
1840 seq_printf(seq,
1841 " %2d:%-3d",
1da177e4
LT
1842 n, mfc->mfc_un.res.ttls[n]);
1843 }
1844 }
1845 seq_putc(seq, '\n');
1846 }
1847 return 0;
1848}
1849
f690808e 1850static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1851 .start = ipmr_mfc_seq_start,
1852 .next = ipmr_mfc_seq_next,
1853 .stop = ipmr_mfc_seq_stop,
1854 .show = ipmr_mfc_seq_show,
1855};
1856
1857static int ipmr_mfc_open(struct inode *inode, struct file *file)
1858{
1859 struct seq_file *seq;
1860 int rc = -ENOMEM;
1861 struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
e905a9ed 1862
1da177e4
LT
1863 if (!s)
1864 goto out;
1865
1866 rc = seq_open(file, &ipmr_mfc_seq_ops);
1867 if (rc)
1868 goto out_kfree;
1869
1870 seq = file->private_data;
1871 seq->private = s;
1872out:
1873 return rc;
1874out_kfree:
1875 kfree(s);
1876 goto out;
1877
1878}
1879
9a32144e 1880static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1881 .owner = THIS_MODULE,
1882 .open = ipmr_mfc_open,
1883 .read = seq_read,
1884 .llseek = seq_lseek,
1885 .release = seq_release_private,
1886};
e905a9ed 1887#endif
1da177e4
LT
1888
1889#ifdef CONFIG_IP_PIMSM_V2
1890static struct net_protocol pim_protocol = {
1891 .handler = pim_rcv,
1892};
1893#endif
1894
1895
1896/*
1897 * Setup for IP multicast routing
1898 */
e905a9ed 1899
1da177e4
LT
1900void __init ip_mr_init(void)
1901{
1902 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1903 sizeof(struct mfc_cache),
e5d679f3 1904 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1da177e4 1905 NULL, NULL);
1da177e4
LT
1906 init_timer(&ipmr_expire_timer);
1907 ipmr_expire_timer.function=ipmr_expire_process;
1908 register_netdevice_notifier(&ip_mr_notifier);
e905a9ed 1909#ifdef CONFIG_PROC_FS
1da177e4
LT
1910 proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1911 proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
e905a9ed 1912#endif
1da177e4 1913}