]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
netns: ipmr: store netns in struct mfc_cache
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
457c4cbc 50#include <net/net_namespace.h>
1da177e4
LT
51#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
14c85021 54#include <net/route.h>
1da177e4
LT
55#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
dc5fc579 64#include <net/netlink.h>
1da177e4
LT
65
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
1da177e4
LT
70/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
cf958ae3 80#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4
LT
81
82static int mroute_do_assert; /* Set in PIM assert */
83static int mroute_do_pim;
84
85static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
86
87static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
88static atomic_t cache_resolve_queue_len; /* Size of unresolved */
89
90/* Special spinlock for queue of unresolved entries */
91static DEFINE_SPINLOCK(mfc_unres_lock);
92
93/* We return to original Alan's scheme. Hash table of resolved
94 entries is changed only in process context and protected
95 with weak lock mrt_lock. Queue of unresolved entries is protected
96 with strong spinlock mfc_unres_lock.
97
98 In this case data path is free of exclusive locks at all.
99 */
100
e18b890b 101static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4
LT
102
103static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
104static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
105static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
106
107#ifdef CONFIG_IP_PIMSM_V2
108static struct net_protocol pim_protocol;
109#endif
110
111static struct timer_list ipmr_expire_timer;
112
113/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
114
d607032d
WC
115static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
116{
117 dev_close(dev);
118
119 dev = __dev_get_by_name(&init_net, "tunl0");
120 if (dev) {
5bc3eb7e 121 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 122 struct ifreq ifr;
d607032d
WC
123 struct ip_tunnel_parm p;
124
125 memset(&p, 0, sizeof(p));
126 p.iph.daddr = v->vifc_rmt_addr.s_addr;
127 p.iph.saddr = v->vifc_lcl_addr.s_addr;
128 p.iph.version = 4;
129 p.iph.ihl = 5;
130 p.iph.protocol = IPPROTO_IPIP;
131 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
132 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
133
5bc3eb7e
SH
134 if (ops->ndo_do_ioctl) {
135 mm_segment_t oldfs = get_fs();
136
137 set_fs(KERNEL_DS);
138 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
139 set_fs(oldfs);
140 }
d607032d
WC
141 }
142}
143
1da177e4
LT
144static
145struct net_device *ipmr_new_tunnel(struct vifctl *v)
146{
147 struct net_device *dev;
148
881d966b 149 dev = __dev_get_by_name(&init_net, "tunl0");
1da177e4
LT
150
151 if (dev) {
5bc3eb7e 152 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
153 int err;
154 struct ifreq ifr;
1da177e4
LT
155 struct ip_tunnel_parm p;
156 struct in_device *in_dev;
157
158 memset(&p, 0, sizeof(p));
159 p.iph.daddr = v->vifc_rmt_addr.s_addr;
160 p.iph.saddr = v->vifc_lcl_addr.s_addr;
161 p.iph.version = 4;
162 p.iph.ihl = 5;
163 p.iph.protocol = IPPROTO_IPIP;
164 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 165 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 166
5bc3eb7e
SH
167 if (ops->ndo_do_ioctl) {
168 mm_segment_t oldfs = get_fs();
169
170 set_fs(KERNEL_DS);
171 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
172 set_fs(oldfs);
173 } else
174 err = -EOPNOTSUPP;
1da177e4
LT
175
176 dev = NULL;
177
881d966b 178 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
1da177e4
LT
179 dev->flags |= IFF_MULTICAST;
180
e5ed6399 181 in_dev = __in_dev_get_rtnl(dev);
71e27da9 182 if (in_dev == NULL)
1da177e4 183 goto failure;
71e27da9
HX
184
185 ipv4_devconf_setall(in_dev);
186 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
187
188 if (dev_open(dev))
189 goto failure;
7dc00c82 190 dev_hold(dev);
1da177e4
LT
191 }
192 }
193 return dev;
194
195failure:
196 /* allow the register to be completed before unregistering. */
197 rtnl_unlock();
198 rtnl_lock();
199
200 unregister_netdevice(dev);
201 return NULL;
202}
203
204#ifdef CONFIG_IP_PIMSM
205
206static int reg_vif_num = -1;
207
208static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
209{
210 read_lock(&mrt_lock);
cf3677ae
PE
211 dev->stats.tx_bytes += skb->len;
212 dev->stats.tx_packets++;
1da177e4
LT
213 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
214 read_unlock(&mrt_lock);
215 kfree_skb(skb);
216 return 0;
217}
218
007c3838
SH
219static const struct net_device_ops reg_vif_netdev_ops = {
220 .ndo_start_xmit = reg_vif_xmit,
221};
222
1da177e4
LT
223static void reg_vif_setup(struct net_device *dev)
224{
225 dev->type = ARPHRD_PIMREG;
46f25dff 226 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 227 dev->flags = IFF_NOARP;
007c3838 228 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4
LT
229 dev->destructor = free_netdev;
230}
231
232static struct net_device *ipmr_reg_vif(void)
233{
234 struct net_device *dev;
235 struct in_device *in_dev;
236
cf3677ae 237 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
238
239 if (dev == NULL)
240 return NULL;
241
242 if (register_netdevice(dev)) {
243 free_netdev(dev);
244 return NULL;
245 }
246 dev->iflink = 0;
247
71e27da9
HX
248 rcu_read_lock();
249 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
250 rcu_read_unlock();
1da177e4 251 goto failure;
71e27da9 252 }
1da177e4 253
71e27da9
HX
254 ipv4_devconf_setall(in_dev);
255 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
256 rcu_read_unlock();
1da177e4
LT
257
258 if (dev_open(dev))
259 goto failure;
260
7dc00c82
WC
261 dev_hold(dev);
262
1da177e4
LT
263 return dev;
264
265failure:
266 /* allow the register to be completed before unregistering. */
267 rtnl_unlock();
268 rtnl_lock();
269
270 unregister_netdevice(dev);
271 return NULL;
272}
273#endif
274
275/*
276 * Delete a VIF entry
7dc00c82 277 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 278 */
e905a9ed 279
7dc00c82 280static int vif_delete(int vifi, int notify)
1da177e4
LT
281{
282 struct vif_device *v;
283 struct net_device *dev;
284 struct in_device *in_dev;
285
cf958ae3 286 if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
1da177e4
LT
287 return -EADDRNOTAVAIL;
288
cf958ae3 289 v = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
290
291 write_lock_bh(&mrt_lock);
292 dev = v->dev;
293 v->dev = NULL;
294
295 if (!dev) {
296 write_unlock_bh(&mrt_lock);
297 return -EADDRNOTAVAIL;
298 }
299
300#ifdef CONFIG_IP_PIMSM
301 if (vifi == reg_vif_num)
302 reg_vif_num = -1;
303#endif
304
cf958ae3 305 if (vifi+1 == init_net.ipv4.maxvif) {
1da177e4
LT
306 int tmp;
307 for (tmp=vifi-1; tmp>=0; tmp--) {
cf958ae3 308 if (VIF_EXISTS(&init_net, tmp))
1da177e4
LT
309 break;
310 }
cf958ae3 311 init_net.ipv4.maxvif = tmp+1;
1da177e4
LT
312 }
313
314 write_unlock_bh(&mrt_lock);
315
316 dev_set_allmulti(dev, -1);
317
e5ed6399 318 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 319 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
320 ip_rt_multicast_event(in_dev);
321 }
322
7dc00c82 323 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
1da177e4
LT
324 unregister_netdevice(dev);
325
326 dev_put(dev);
327 return 0;
328}
329
5c0a66f5
BT
330static inline void ipmr_cache_free(struct mfc_cache *c)
331{
332 release_net(mfc_net(c));
333 kmem_cache_free(mrt_cachep, c);
334}
335
1da177e4
LT
336/* Destroy an unresolved cache entry, killing queued skbs
337 and reporting error to netlink readers.
338 */
339
340static void ipmr_destroy_unres(struct mfc_cache *c)
341{
342 struct sk_buff *skb;
9ef1d4c7 343 struct nlmsgerr *e;
1da177e4
LT
344
345 atomic_dec(&cache_resolve_queue_len);
346
c354e124 347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 348 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
349 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350 nlh->nlmsg_type = NLMSG_ERROR;
351 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
353 e = NLMSG_DATA(nlh);
354 e->error = -ETIMEDOUT;
355 memset(&e->msg, 0, sizeof(e->msg));
2942e900 356
97c53cac 357 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
1da177e4
LT
358 } else
359 kfree_skb(skb);
360 }
361
5c0a66f5 362 ipmr_cache_free(c);
1da177e4
LT
363}
364
365
366/* Single timer process for all the unresolved queue. */
367
368static void ipmr_expire_process(unsigned long dummy)
369{
370 unsigned long now;
371 unsigned long expires;
372 struct mfc_cache *c, **cp;
373
374 if (!spin_trylock(&mfc_unres_lock)) {
375 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376 return;
377 }
378
379 if (atomic_read(&cache_resolve_queue_len) == 0)
380 goto out;
381
382 now = jiffies;
383 expires = 10*HZ;
384 cp = &mfc_unres_queue;
385
386 while ((c=*cp) != NULL) {
387 if (time_after(c->mfc_un.unres.expires, now)) {
388 unsigned long interval = c->mfc_un.unres.expires - now;
389 if (interval < expires)
390 expires = interval;
391 cp = &c->next;
392 continue;
393 }
394
395 *cp = c->next;
396
397 ipmr_destroy_unres(c);
398 }
399
400 if (atomic_read(&cache_resolve_queue_len))
401 mod_timer(&ipmr_expire_timer, jiffies + expires);
402
403out:
404 spin_unlock(&mfc_unres_lock);
405}
406
407/* Fill oifs list. It is called under write locked mrt_lock. */
408
d1b04c08 409static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
1da177e4
LT
410{
411 int vifi;
412
413 cache->mfc_un.res.minvif = MAXVIFS;
414 cache->mfc_un.res.maxvif = 0;
415 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
416
cf958ae3
BT
417 for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
418 if (VIF_EXISTS(&init_net, vifi) &&
419 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
420 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
421 if (cache->mfc_un.res.minvif > vifi)
422 cache->mfc_un.res.minvif = vifi;
423 if (cache->mfc_un.res.maxvif <= vifi)
424 cache->mfc_un.res.maxvif = vifi + 1;
425 }
426 }
427}
428
429static int vif_add(struct vifctl *vifc, int mrtsock)
430{
431 int vifi = vifc->vifc_vifi;
cf958ae3 432 struct vif_device *v = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
433 struct net_device *dev;
434 struct in_device *in_dev;
d607032d 435 int err;
1da177e4
LT
436
437 /* Is vif busy ? */
cf958ae3 438 if (VIF_EXISTS(&init_net, vifi))
1da177e4
LT
439 return -EADDRINUSE;
440
441 switch (vifc->vifc_flags) {
442#ifdef CONFIG_IP_PIMSM
443 case VIFF_REGISTER:
444 /*
445 * Special Purpose VIF in PIM
446 * All the packets will be sent to the daemon
447 */
448 if (reg_vif_num >= 0)
449 return -EADDRINUSE;
450 dev = ipmr_reg_vif();
451 if (!dev)
452 return -ENOBUFS;
d607032d
WC
453 err = dev_set_allmulti(dev, 1);
454 if (err) {
455 unregister_netdevice(dev);
7dc00c82 456 dev_put(dev);
d607032d
WC
457 return err;
458 }
1da177e4
LT
459 break;
460#endif
e905a9ed 461 case VIFF_TUNNEL:
1da177e4
LT
462 dev = ipmr_new_tunnel(vifc);
463 if (!dev)
464 return -ENOBUFS;
d607032d
WC
465 err = dev_set_allmulti(dev, 1);
466 if (err) {
467 ipmr_del_tunnel(dev, vifc);
7dc00c82 468 dev_put(dev);
d607032d
WC
469 return err;
470 }
1da177e4
LT
471 break;
472 case 0:
1ab35276 473 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
1da177e4
LT
474 if (!dev)
475 return -EADDRNOTAVAIL;
d607032d 476 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
477 if (err) {
478 dev_put(dev);
d607032d 479 return err;
7dc00c82 480 }
1da177e4
LT
481 break;
482 default:
483 return -EINVAL;
484 }
485
e5ed6399 486 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1da177e4 487 return -EADDRNOTAVAIL;
42f811b8 488 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
489 ip_rt_multicast_event(in_dev);
490
491 /*
492 * Fill in the VIF structures
493 */
c354e124
JK
494 v->rate_limit = vifc->vifc_rate_limit;
495 v->local = vifc->vifc_lcl_addr.s_addr;
496 v->remote = vifc->vifc_rmt_addr.s_addr;
497 v->flags = vifc->vifc_flags;
1da177e4
LT
498 if (!mrtsock)
499 v->flags |= VIFF_STATIC;
c354e124 500 v->threshold = vifc->vifc_threshold;
1da177e4
LT
501 v->bytes_in = 0;
502 v->bytes_out = 0;
503 v->pkt_in = 0;
504 v->pkt_out = 0;
505 v->link = dev->ifindex;
506 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
507 v->link = dev->iflink;
508
509 /* And finish update writing critical data */
510 write_lock_bh(&mrt_lock);
c354e124 511 v->dev = dev;
1da177e4
LT
512#ifdef CONFIG_IP_PIMSM
513 if (v->flags&VIFF_REGISTER)
514 reg_vif_num = vifi;
515#endif
cf958ae3
BT
516 if (vifi+1 > init_net.ipv4.maxvif)
517 init_net.ipv4.maxvif = vifi+1;
1da177e4
LT
518 write_unlock_bh(&mrt_lock);
519 return 0;
520}
521
114c7844 522static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
1da177e4 523{
c354e124 524 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
525 struct mfc_cache *c;
526
527 for (c=mfc_cache_array[line]; c; c = c->next) {
528 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
529 break;
530 }
531 return c;
532}
533
534/*
535 * Allocate a multicast cache entry
536 */
5c0a66f5 537static struct mfc_cache *ipmr_cache_alloc(struct net *net)
1da177e4 538{
c354e124
JK
539 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
540 if (c == NULL)
1da177e4 541 return NULL;
1da177e4 542 c->mfc_un.res.minvif = MAXVIFS;
5c0a66f5 543 mfc_net_set(c, net);
1da177e4
LT
544 return c;
545}
546
5c0a66f5 547static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
1da177e4 548{
c354e124
JK
549 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
550 if (c == NULL)
1da177e4 551 return NULL;
1da177e4
LT
552 skb_queue_head_init(&c->mfc_un.unres.unresolved);
553 c->mfc_un.unres.expires = jiffies + 10*HZ;
5c0a66f5 554 mfc_net_set(c, net);
1da177e4
LT
555 return c;
556}
557
558/*
559 * A cache entry has gone into a resolved state from queued
560 */
e905a9ed 561
1da177e4
LT
562static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
563{
564 struct sk_buff *skb;
9ef1d4c7 565 struct nlmsgerr *e;
1da177e4
LT
566
567 /*
568 * Play the pending entries through our router
569 */
570
c354e124 571 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 572 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
573 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
574
575 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
576 nlh->nlmsg_len = (skb_tail_pointer(skb) -
577 (u8 *)nlh);
1da177e4
LT
578 } else {
579 nlh->nlmsg_type = NLMSG_ERROR;
580 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
581 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
582 e = NLMSG_DATA(nlh);
583 e->error = -EMSGSIZE;
584 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 585 }
2942e900 586
97c53cac 587 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
1da177e4
LT
588 } else
589 ip_mr_forward(skb, c, 0);
590 }
591}
592
593/*
594 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
595 * expects the following bizarre scheme.
596 *
597 * Called under mrt_lock.
598 */
e905a9ed 599
1da177e4
LT
600static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
601{
602 struct sk_buff *skb;
c9bdd4b5 603 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
604 struct igmphdr *igmp;
605 struct igmpmsg *msg;
606 int ret;
607
608#ifdef CONFIG_IP_PIMSM
609 if (assert == IGMPMSG_WHOLEPKT)
610 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
611 else
612#endif
613 skb = alloc_skb(128, GFP_ATOMIC);
614
132adf54 615 if (!skb)
1da177e4
LT
616 return -ENOBUFS;
617
618#ifdef CONFIG_IP_PIMSM
619 if (assert == IGMPMSG_WHOLEPKT) {
620 /* Ugly, but we have no choice with this interface.
621 Duplicate old header, fix ihl, length etc.
622 And all this only to mangle msg->im_msgtype and
623 to set msg->im_mbz to "mbz" :-)
624 */
878c8145
ACM
625 skb_push(skb, sizeof(struct iphdr));
626 skb_reset_network_header(skb);
badff6d0 627 skb_reset_transport_header(skb);
0272ffc4 628 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 629 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
630 msg->im_msgtype = IGMPMSG_WHOLEPKT;
631 msg->im_mbz = 0;
e905a9ed 632 msg->im_vif = reg_vif_num;
eddc9ec5
ACM
633 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
634 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
635 sizeof(struct iphdr));
e905a9ed 636 } else
1da177e4 637#endif
e905a9ed
YH
638 {
639
1da177e4
LT
640 /*
641 * Copy the IP header
642 */
643
27a884dc 644 skb->network_header = skb->tail;
ddc7b8e3 645 skb_put(skb, ihl);
27d7ff46 646 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
647 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
648 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4
LT
649 msg->im_vif = vifi;
650 skb->dst = dst_clone(pkt->dst);
651
652 /*
653 * Add our header
654 */
655
c354e124 656 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
657 igmp->type =
658 msg->im_msgtype = assert;
659 igmp->code = 0;
eddc9ec5 660 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 661 skb->transport_header = skb->network_header;
e905a9ed 662 }
1da177e4 663
70a269e6 664 if (init_net.ipv4.mroute_sk == NULL) {
1da177e4
LT
665 kfree_skb(skb);
666 return -EINVAL;
667 }
668
669 /*
670 * Deliver to mrouted
671 */
70a269e6
BT
672 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
673 if (ret < 0) {
1da177e4
LT
674 if (net_ratelimit())
675 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
676 kfree_skb(skb);
677 }
678
679 return ret;
680}
681
682/*
683 * Queue a packet for resolution. It gets locked cache entry!
684 */
e905a9ed 685
1da177e4
LT
686static int
687ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
688{
689 int err;
690 struct mfc_cache *c;
eddc9ec5 691 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
692
693 spin_lock_bh(&mfc_unres_lock);
694 for (c=mfc_unres_queue; c; c=c->next) {
eddc9ec5
ACM
695 if (c->mfc_mcastgrp == iph->daddr &&
696 c->mfc_origin == iph->saddr)
1da177e4
LT
697 break;
698 }
699
700 if (c == NULL) {
701 /*
702 * Create a new entry if allowable
703 */
704
c354e124 705 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
5c0a66f5 706 (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
1da177e4
LT
707 spin_unlock_bh(&mfc_unres_lock);
708
709 kfree_skb(skb);
710 return -ENOBUFS;
711 }
712
713 /*
714 * Fill in the new cache entry
715 */
eddc9ec5
ACM
716 c->mfc_parent = -1;
717 c->mfc_origin = iph->saddr;
718 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
719
720 /*
721 * Reflect first query at mrouted.
722 */
723 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
e905a9ed 724 /* If the report failed throw the cache entry
1da177e4
LT
725 out - Brad Parker
726 */
727 spin_unlock_bh(&mfc_unres_lock);
728
5c0a66f5 729 ipmr_cache_free(c);
1da177e4
LT
730 kfree_skb(skb);
731 return err;
732 }
733
734 atomic_inc(&cache_resolve_queue_len);
735 c->next = mfc_unres_queue;
736 mfc_unres_queue = c;
737
738 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
739 }
740
741 /*
742 * See if we can append the packet
743 */
744 if (c->mfc_un.unres.unresolved.qlen>3) {
745 kfree_skb(skb);
746 err = -ENOBUFS;
747 } else {
c354e124 748 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
749 err = 0;
750 }
751
752 spin_unlock_bh(&mfc_unres_lock);
753 return err;
754}
755
756/*
757 * MFC cache manipulation by user space mroute daemon
758 */
759
760static int ipmr_mfc_delete(struct mfcctl *mfc)
761{
762 int line;
763 struct mfc_cache *c, **cp;
764
c354e124 765 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4
LT
766
767 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
768 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
769 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
770 write_lock_bh(&mrt_lock);
771 *cp = c->next;
772 write_unlock_bh(&mrt_lock);
773
5c0a66f5 774 ipmr_cache_free(c);
1da177e4
LT
775 return 0;
776 }
777 }
778 return -ENOENT;
779}
780
781static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
782{
783 int line;
784 struct mfc_cache *uc, *c, **cp;
785
c354e124 786 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4
LT
787
788 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
789 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
790 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
791 break;
792 }
793
794 if (c != NULL) {
795 write_lock_bh(&mrt_lock);
796 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 797 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
798 if (!mrtsock)
799 c->mfc_flags |= MFC_STATIC;
800 write_unlock_bh(&mrt_lock);
801 return 0;
802 }
803
f97c1e0c 804 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
805 return -EINVAL;
806
5c0a66f5 807 c = ipmr_cache_alloc(&init_net);
c354e124 808 if (c == NULL)
1da177e4
LT
809 return -ENOMEM;
810
c354e124
JK
811 c->mfc_origin = mfc->mfcc_origin.s_addr;
812 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
813 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 814 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
815 if (!mrtsock)
816 c->mfc_flags |= MFC_STATIC;
817
818 write_lock_bh(&mrt_lock);
819 c->next = mfc_cache_array[line];
820 mfc_cache_array[line] = c;
821 write_unlock_bh(&mrt_lock);
822
823 /*
824 * Check to see if we resolved a queued list. If so we
825 * need to send on the frames and tidy up.
826 */
827 spin_lock_bh(&mfc_unres_lock);
828 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
829 cp = &uc->next) {
830 if (uc->mfc_origin == c->mfc_origin &&
831 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
832 *cp = uc->next;
833 if (atomic_dec_and_test(&cache_resolve_queue_len))
834 del_timer(&ipmr_expire_timer);
835 break;
836 }
837 }
838 spin_unlock_bh(&mfc_unres_lock);
839
840 if (uc) {
841 ipmr_cache_resolve(uc, c);
5c0a66f5 842 ipmr_cache_free(uc);
1da177e4
LT
843 }
844 return 0;
845}
846
847/*
848 * Close the multicast socket, and clear the vif tables etc
849 */
e905a9ed 850
1da177e4
LT
851static void mroute_clean_tables(struct sock *sk)
852{
853 int i;
e905a9ed 854
1da177e4
LT
855 /*
856 * Shut down all active vif entries
857 */
cf958ae3
BT
858 for (i = 0; i < init_net.ipv4.maxvif; i++) {
859 if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
7dc00c82 860 vif_delete(i, 0);
1da177e4
LT
861 }
862
863 /*
864 * Wipe the cache
865 */
c354e124 866 for (i=0; i<MFC_LINES; i++) {
1da177e4
LT
867 struct mfc_cache *c, **cp;
868
869 cp = &mfc_cache_array[i];
870 while ((c = *cp) != NULL) {
871 if (c->mfc_flags&MFC_STATIC) {
872 cp = &c->next;
873 continue;
874 }
875 write_lock_bh(&mrt_lock);
876 *cp = c->next;
877 write_unlock_bh(&mrt_lock);
878
5c0a66f5 879 ipmr_cache_free(c);
1da177e4
LT
880 }
881 }
882
883 if (atomic_read(&cache_resolve_queue_len) != 0) {
884 struct mfc_cache *c;
885
886 spin_lock_bh(&mfc_unres_lock);
887 while (mfc_unres_queue != NULL) {
888 c = mfc_unres_queue;
889 mfc_unres_queue = c->next;
890 spin_unlock_bh(&mfc_unres_lock);
891
892 ipmr_destroy_unres(c);
893
894 spin_lock_bh(&mfc_unres_lock);
895 }
896 spin_unlock_bh(&mfc_unres_lock);
897 }
898}
899
900static void mrtsock_destruct(struct sock *sk)
901{
902 rtnl_lock();
70a269e6 903 if (sk == init_net.ipv4.mroute_sk) {
3b1e0a65 904 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
1da177e4
LT
905
906 write_lock_bh(&mrt_lock);
70a269e6 907 init_net.ipv4.mroute_sk = NULL;
1da177e4
LT
908 write_unlock_bh(&mrt_lock);
909
910 mroute_clean_tables(sk);
911 }
912 rtnl_unlock();
913}
914
915/*
916 * Socket options and virtual interface manipulation. The whole
917 * virtual interface system is a complete heap, but unfortunately
918 * that's how BSD mrouted happens to think. Maybe one day with a proper
919 * MOSPF/PIM router set up we can clean this up.
920 */
e905a9ed 921
c354e124 922int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1da177e4
LT
923{
924 int ret;
925 struct vifctl vif;
926 struct mfcctl mfc;
e905a9ed 927
132adf54 928 if (optname != MRT_INIT) {
70a269e6 929 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
930 return -EACCES;
931 }
932
132adf54
SH
933 switch (optname) {
934 case MRT_INIT:
935 if (sk->sk_type != SOCK_RAW ||
936 inet_sk(sk)->num != IPPROTO_IGMP)
937 return -EOPNOTSUPP;
c354e124 938 if (optlen != sizeof(int))
132adf54 939 return -ENOPROTOOPT;
1da177e4 940
132adf54 941 rtnl_lock();
70a269e6 942 if (init_net.ipv4.mroute_sk) {
1da177e4 943 rtnl_unlock();
132adf54
SH
944 return -EADDRINUSE;
945 }
946
947 ret = ip_ra_control(sk, 1, mrtsock_destruct);
948 if (ret == 0) {
949 write_lock_bh(&mrt_lock);
70a269e6 950 init_net.ipv4.mroute_sk = sk;
132adf54
SH
951 write_unlock_bh(&mrt_lock);
952
3b1e0a65 953 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
132adf54
SH
954 }
955 rtnl_unlock();
956 return ret;
957 case MRT_DONE:
70a269e6 958 if (sk != init_net.ipv4.mroute_sk)
132adf54
SH
959 return -EACCES;
960 return ip_ra_control(sk, 0, NULL);
961 case MRT_ADD_VIF:
962 case MRT_DEL_VIF:
c354e124 963 if (optlen != sizeof(vif))
132adf54 964 return -EINVAL;
c354e124 965 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
966 return -EFAULT;
967 if (vif.vifc_vifi >= MAXVIFS)
968 return -ENFILE;
969 rtnl_lock();
c354e124 970 if (optname == MRT_ADD_VIF) {
70a269e6 971 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
132adf54 972 } else {
7dc00c82 973 ret = vif_delete(vif.vifc_vifi, 0);
132adf54
SH
974 }
975 rtnl_unlock();
976 return ret;
1da177e4
LT
977
978 /*
979 * Manipulate the forwarding caches. These live
980 * in a sort of kernel/user symbiosis.
981 */
132adf54
SH
982 case MRT_ADD_MFC:
983 case MRT_DEL_MFC:
c354e124 984 if (optlen != sizeof(mfc))
132adf54 985 return -EINVAL;
c354e124 986 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
987 return -EFAULT;
988 rtnl_lock();
c354e124 989 if (optname == MRT_DEL_MFC)
132adf54
SH
990 ret = ipmr_mfc_delete(&mfc);
991 else
70a269e6 992 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
132adf54
SH
993 rtnl_unlock();
994 return ret;
1da177e4
LT
995 /*
996 * Control PIM assert.
997 */
132adf54
SH
998 case MRT_ASSERT:
999 {
1000 int v;
1001 if (get_user(v,(int __user *)optval))
1002 return -EFAULT;
1003 mroute_do_assert=(v)?1:0;
1004 return 0;
1005 }
1da177e4 1006#ifdef CONFIG_IP_PIMSM
132adf54
SH
1007 case MRT_PIM:
1008 {
ba93ef74
SH
1009 int v;
1010
132adf54
SH
1011 if (get_user(v,(int __user *)optval))
1012 return -EFAULT;
ba93ef74
SH
1013 v = (v) ? 1 : 0;
1014
132adf54
SH
1015 rtnl_lock();
1016 ret = 0;
1017 if (v != mroute_do_pim) {
1018 mroute_do_pim = v;
1019 mroute_do_assert = v;
1da177e4 1020#ifdef CONFIG_IP_PIMSM_V2
132adf54
SH
1021 if (mroute_do_pim)
1022 ret = inet_add_protocol(&pim_protocol,
1023 IPPROTO_PIM);
1024 else
1025 ret = inet_del_protocol(&pim_protocol,
1026 IPPROTO_PIM);
1027 if (ret < 0)
1028 ret = -EAGAIN;
1da177e4 1029#endif
1da177e4 1030 }
132adf54
SH
1031 rtnl_unlock();
1032 return ret;
1033 }
1da177e4 1034#endif
132adf54
SH
1035 /*
1036 * Spurious command, or MRT_VERSION which you cannot
1037 * set.
1038 */
1039 default:
1040 return -ENOPROTOOPT;
1da177e4
LT
1041 }
1042}
1043
1044/*
1045 * Getsock opt support for the multicast routing system.
1046 */
e905a9ed 1047
c354e124 1048int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1049{
1050 int olr;
1051 int val;
1052
c354e124 1053 if (optname != MRT_VERSION &&
1da177e4
LT
1054#ifdef CONFIG_IP_PIMSM
1055 optname!=MRT_PIM &&
1056#endif
1057 optname!=MRT_ASSERT)
1058 return -ENOPROTOOPT;
1059
1060 if (get_user(olr, optlen))
1061 return -EFAULT;
1062
1063 olr = min_t(unsigned int, olr, sizeof(int));
1064 if (olr < 0)
1065 return -EINVAL;
e905a9ed 1066
c354e124 1067 if (put_user(olr, optlen))
1da177e4 1068 return -EFAULT;
c354e124
JK
1069 if (optname == MRT_VERSION)
1070 val = 0x0305;
1da177e4 1071#ifdef CONFIG_IP_PIMSM
c354e124
JK
1072 else if (optname == MRT_PIM)
1073 val = mroute_do_pim;
1da177e4
LT
1074#endif
1075 else
c354e124
JK
1076 val = mroute_do_assert;
1077 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1078 return -EFAULT;
1079 return 0;
1080}
1081
1082/*
1083 * The IP multicast ioctl support routines.
1084 */
e905a9ed 1085
1da177e4
LT
1086int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1087{
1088 struct sioc_sg_req sr;
1089 struct sioc_vif_req vr;
1090 struct vif_device *vif;
1091 struct mfc_cache *c;
e905a9ed 1092
132adf54
SH
1093 switch (cmd) {
1094 case SIOCGETVIFCNT:
c354e124 1095 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1096 return -EFAULT;
cf958ae3 1097 if (vr.vifi >= init_net.ipv4.maxvif)
132adf54
SH
1098 return -EINVAL;
1099 read_lock(&mrt_lock);
cf958ae3
BT
1100 vif = &init_net.ipv4.vif_table[vr.vifi];
1101 if (VIF_EXISTS(&init_net, vr.vifi)) {
c354e124
JK
1102 vr.icount = vif->pkt_in;
1103 vr.ocount = vif->pkt_out;
1104 vr.ibytes = vif->bytes_in;
1105 vr.obytes = vif->bytes_out;
1da177e4 1106 read_unlock(&mrt_lock);
1da177e4 1107
c354e124 1108 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1109 return -EFAULT;
1110 return 0;
1111 }
1112 read_unlock(&mrt_lock);
1113 return -EADDRNOTAVAIL;
1114 case SIOCGETSGCNT:
c354e124 1115 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1116 return -EFAULT;
1117
1118 read_lock(&mrt_lock);
1119 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1120 if (c) {
1121 sr.pktcnt = c->mfc_un.res.pkt;
1122 sr.bytecnt = c->mfc_un.res.bytes;
1123 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1124 read_unlock(&mrt_lock);
132adf54 1125
c354e124 1126 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1127 return -EFAULT;
1128 return 0;
1129 }
1130 read_unlock(&mrt_lock);
1131 return -EADDRNOTAVAIL;
1132 default:
1133 return -ENOIOCTLCMD;
1da177e4
LT
1134 }
1135}
1136
1137
1138static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1139{
e9dc8653 1140 struct net_device *dev = ptr;
1da177e4
LT
1141 struct vif_device *v;
1142 int ct;
e9dc8653 1143
721499e8 1144 if (!net_eq(dev_net(dev), &init_net))
e9dc8653
EB
1145 return NOTIFY_DONE;
1146
1da177e4
LT
1147 if (event != NETDEV_UNREGISTER)
1148 return NOTIFY_DONE;
cf958ae3
BT
1149 v = &init_net.ipv4.vif_table[0];
1150 for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
c354e124 1151 if (v->dev == dev)
7dc00c82 1152 vif_delete(ct, 1);
1da177e4
LT
1153 }
1154 return NOTIFY_DONE;
1155}
1156
1157
c354e124 1158static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1159 .notifier_call = ipmr_device_event,
1160};
1161
1162/*
1163 * Encapsulate a packet by attaching a valid IPIP header to it.
1164 * This avoids tunnel drivers and other mess and gives us the speed so
1165 * important for multicast video.
1166 */
e905a9ed 1167
114c7844 1168static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1169{
8856dfa3 1170 struct iphdr *iph;
eddc9ec5 1171 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1172
1173 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1174 skb->transport_header = skb->network_header;
8856dfa3 1175 skb_reset_network_header(skb);
eddc9ec5 1176 iph = ip_hdr(skb);
1da177e4
LT
1177
1178 iph->version = 4;
e023dd64
ACM
1179 iph->tos = old_iph->tos;
1180 iph->ttl = old_iph->ttl;
1da177e4
LT
1181 iph->frag_off = 0;
1182 iph->daddr = daddr;
1183 iph->saddr = saddr;
1184 iph->protocol = IPPROTO_IPIP;
1185 iph->ihl = 5;
1186 iph->tot_len = htons(skb->len);
1187 ip_select_ident(iph, skb->dst, NULL);
1188 ip_send_check(iph);
1189
1da177e4
LT
1190 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1191 nf_reset(skb);
1192}
1193
1194static inline int ipmr_forward_finish(struct sk_buff *skb)
1195{
1196 struct ip_options * opt = &(IPCB(skb)->opt);
1197
7c73a6fa 1198 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1199
1200 if (unlikely(opt->optlen))
1201 ip_forward_options(skb);
1202
1203 return dst_output(skb);
1204}
1205
1206/*
1207 * Processing handlers for ipmr_forward
1208 */
1209
1210static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1211{
eddc9ec5 1212 const struct iphdr *iph = ip_hdr(skb);
cf958ae3 1213 struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
1214 struct net_device *dev;
1215 struct rtable *rt;
1216 int encap = 0;
1217
1218 if (vif->dev == NULL)
1219 goto out_free;
1220
1221#ifdef CONFIG_IP_PIMSM
1222 if (vif->flags & VIFF_REGISTER) {
1223 vif->pkt_out++;
c354e124 1224 vif->bytes_out += skb->len;
cf3677ae
PE
1225 vif->dev->stats.tx_bytes += skb->len;
1226 vif->dev->stats.tx_packets++;
1da177e4
LT
1227 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1228 kfree_skb(skb);
1229 return;
1230 }
1231#endif
1232
1233 if (vif->flags&VIFF_TUNNEL) {
1234 struct flowi fl = { .oif = vif->link,
1235 .nl_u = { .ip4_u =
1236 { .daddr = vif->remote,
1237 .saddr = vif->local,
1238 .tos = RT_TOS(iph->tos) } },
1239 .proto = IPPROTO_IPIP };
f206351a 1240 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1241 goto out_free;
1242 encap = sizeof(struct iphdr);
1243 } else {
1244 struct flowi fl = { .oif = vif->link,
1245 .nl_u = { .ip4_u =
1246 { .daddr = iph->daddr,
1247 .tos = RT_TOS(iph->tos) } },
1248 .proto = IPPROTO_IPIP };
f206351a 1249 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1250 goto out_free;
1251 }
1252
1253 dev = rt->u.dst.dev;
1254
1255 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1256 /* Do not fragment multicasts. Alas, IPv4 does not
1257 allow to send ICMP, so that packets will disappear
1258 to blackhole.
1259 */
1260
7c73a6fa 1261 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1262 ip_rt_put(rt);
1263 goto out_free;
1264 }
1265
1266 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1267
1268 if (skb_cow(skb, encap)) {
e905a9ed 1269 ip_rt_put(rt);
1da177e4
LT
1270 goto out_free;
1271 }
1272
1273 vif->pkt_out++;
c354e124 1274 vif->bytes_out += skb->len;
1da177e4
LT
1275
1276 dst_release(skb->dst);
1277 skb->dst = &rt->u.dst;
eddc9ec5 1278 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1279
1280 /* FIXME: forward and output firewalls used to be called here.
1281 * What do we do with netfilter? -- RR */
1282 if (vif->flags & VIFF_TUNNEL) {
1283 ip_encap(skb, vif->local, vif->remote);
1284 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1285 vif->dev->stats.tx_packets++;
1286 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1287 }
1288
1289 IPCB(skb)->flags |= IPSKB_FORWARDED;
1290
1291 /*
1292 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1293 * not only before forwarding, but after forwarding on all output
1294 * interfaces. It is clear, if mrouter runs a multicasting
1295 * program, it should receive packets not depending to what interface
1296 * program is joined.
1297 * If we will not make it, the program will have to join on all
1298 * interfaces. On the other hand, multihoming host (or router, but
1299 * not mrouter) cannot join to more than one interface - it will
1300 * result in receiving multiple packets.
1301 */
6e23ae2a 1302 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1303 ipmr_forward_finish);
1304 return;
1305
1306out_free:
1307 kfree_skb(skb);
1308 return;
1309}
1310
1311static int ipmr_find_vif(struct net_device *dev)
1312{
1313 int ct;
cf958ae3
BT
1314 for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
1315 if (init_net.ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1316 break;
1317 }
1318 return ct;
1319}
1320
1321/* "local" means that we should preserve one skb (for local delivery) */
1322
1323static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1324{
1325 int psend = -1;
1326 int vif, ct;
1327
1328 vif = cache->mfc_parent;
1329 cache->mfc_un.res.pkt++;
1330 cache->mfc_un.res.bytes += skb->len;
1331
1332 /*
1333 * Wrong interface: drop packet and (maybe) send PIM assert.
1334 */
cf958ae3 1335 if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1336 int true_vifi;
1337
ee6b9673 1338 if (skb->rtable->fl.iif == 0) {
1da177e4
LT
1339 /* It is our own packet, looped back.
1340 Very complicated situation...
1341
1342 The best workaround until routing daemons will be
1343 fixed is not to redistribute packet, if it was
1344 send through wrong interface. It means, that
1345 multicast applications WILL NOT work for
1346 (S,G), which have default multicast route pointing
1347 to wrong oif. In any case, it is not a good
1348 idea to use multicasting applications on router.
1349 */
1350 goto dont_forward;
1351 }
1352
1353 cache->mfc_un.res.wrong_if++;
1354 true_vifi = ipmr_find_vif(skb->dev);
1355
1356 if (true_vifi >= 0 && mroute_do_assert &&
1357 /* pimsm uses asserts, when switching from RPT to SPT,
1358 so that we cannot check that packet arrived on an oif.
1359 It is bad, but otherwise we would need to move pretty
1360 large chunk of pimd to kernel. Ough... --ANK
1361 */
1362 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1363 time_after(jiffies,
1da177e4
LT
1364 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1365 cache->mfc_un.res.last_assert = jiffies;
1366 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1367 }
1368 goto dont_forward;
1369 }
1370
cf958ae3
BT
1371 init_net.ipv4.vif_table[vif].pkt_in++;
1372 init_net.ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1373
1374 /*
1375 * Forward the frame
1376 */
1377 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1378 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1379 if (psend != -1) {
1380 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1381 if (skb2)
1382 ipmr_queue_xmit(skb2, cache, psend);
1383 }
c354e124 1384 psend = ct;
1da177e4
LT
1385 }
1386 }
1387 if (psend != -1) {
1388 if (local) {
1389 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1390 if (skb2)
1391 ipmr_queue_xmit(skb2, cache, psend);
1392 } else {
1393 ipmr_queue_xmit(skb, cache, psend);
1394 return 0;
1395 }
1396 }
1397
1398dont_forward:
1399 if (!local)
1400 kfree_skb(skb);
1401 return 0;
1402}
1403
1404
1405/*
1406 * Multicast packets for forwarding arrive here
1407 */
1408
1409int ip_mr_input(struct sk_buff *skb)
1410{
1411 struct mfc_cache *cache;
ee6b9673 1412 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1da177e4
LT
1413
1414 /* Packet is looped back after forward, it should not be
1415 forwarded second time, but still can be delivered locally.
1416 */
1417 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1418 goto dont_forward;
1419
1420 if (!local) {
1421 if (IPCB(skb)->opt.router_alert) {
1422 if (ip_call_ra_chain(skb))
1423 return 0;
eddc9ec5 1424 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1425 /* IGMPv1 (and broken IGMPv2 implementations sort of
1426 Cisco IOS <= 11.2(8)) do not put router alert
1427 option to IGMP packets destined to routable
1428 groups. It is very bad, because it means
1429 that we can forward NO IGMP messages.
1430 */
1431 read_lock(&mrt_lock);
70a269e6 1432 if (init_net.ipv4.mroute_sk) {
2715bcf9 1433 nf_reset(skb);
70a269e6 1434 raw_rcv(init_net.ipv4.mroute_sk, skb);
1da177e4
LT
1435 read_unlock(&mrt_lock);
1436 return 0;
1437 }
1438 read_unlock(&mrt_lock);
1439 }
1440 }
1441
1442 read_lock(&mrt_lock);
eddc9ec5 1443 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1444
1445 /*
1446 * No usable cache entry
1447 */
c354e124 1448 if (cache == NULL) {
1da177e4
LT
1449 int vif;
1450
1451 if (local) {
1452 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1453 ip_local_deliver(skb);
1454 if (skb2 == NULL) {
1455 read_unlock(&mrt_lock);
1456 return -ENOBUFS;
1457 }
1458 skb = skb2;
1459 }
1460
1461 vif = ipmr_find_vif(skb->dev);
1462 if (vif >= 0) {
1463 int err = ipmr_cache_unresolved(vif, skb);
1464 read_unlock(&mrt_lock);
1465
1466 return err;
1467 }
1468 read_unlock(&mrt_lock);
1469 kfree_skb(skb);
1470 return -ENODEV;
1471 }
1472
1473 ip_mr_forward(skb, cache, local);
1474
1475 read_unlock(&mrt_lock);
1476
1477 if (local)
1478 return ip_local_deliver(skb);
1479
1480 return 0;
1481
1482dont_forward:
1483 if (local)
1484 return ip_local_deliver(skb);
1485 kfree_skb(skb);
1486 return 0;
1487}
1488
b1879204
IJ
1489#ifdef CONFIG_IP_PIMSM
1490static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1491{
b1879204
IJ
1492 struct net_device *reg_dev = NULL;
1493 struct iphdr *encap;
1da177e4 1494
b1879204 1495 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1496 /*
1497 Check that:
1498 a. packet is really destinted to a multicast group
1499 b. packet is not a NULL-REGISTER
1500 c. packet is not truncated
1501 */
f97c1e0c 1502 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1503 encap->tot_len == 0 ||
b1879204
IJ
1504 ntohs(encap->tot_len) + pimlen > skb->len)
1505 return 1;
1da177e4
LT
1506
1507 read_lock(&mrt_lock);
1508 if (reg_vif_num >= 0)
cf958ae3 1509 reg_dev = init_net.ipv4.vif_table[reg_vif_num].dev;
1da177e4
LT
1510 if (reg_dev)
1511 dev_hold(reg_dev);
1512 read_unlock(&mrt_lock);
1513
e905a9ed 1514 if (reg_dev == NULL)
b1879204 1515 return 1;
1da177e4 1516
b0e380b1 1517 skb->mac_header = skb->network_header;
1da177e4 1518 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1519 skb_reset_network_header(skb);
1da177e4 1520 skb->dev = reg_dev;
1da177e4
LT
1521 skb->protocol = htons(ETH_P_IP);
1522 skb->ip_summed = 0;
1523 skb->pkt_type = PACKET_HOST;
1524 dst_release(skb->dst);
1525 skb->dst = NULL;
cf3677ae
PE
1526 reg_dev->stats.rx_bytes += skb->len;
1527 reg_dev->stats.rx_packets++;
1da177e4
LT
1528 nf_reset(skb);
1529 netif_rx(skb);
1530 dev_put(reg_dev);
b1879204 1531
1da177e4 1532 return 0;
b1879204
IJ
1533}
1534#endif
1535
1536#ifdef CONFIG_IP_PIMSM_V1
1537/*
1538 * Handle IGMP messages of PIMv1
1539 */
1540
1541int pim_rcv_v1(struct sk_buff * skb)
1542{
1543 struct igmphdr *pim;
1544
1545 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1546 goto drop;
1547
1548 pim = igmp_hdr(skb);
1549
1550 if (!mroute_do_pim ||
1551 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1552 goto drop;
1553
1554 if (__pim_rcv(skb, sizeof(*pim))) {
1555drop:
1556 kfree_skb(skb);
1557 }
1da177e4
LT
1558 return 0;
1559}
1560#endif
1561
1562#ifdef CONFIG_IP_PIMSM_V2
1563static int pim_rcv(struct sk_buff * skb)
1564{
1565 struct pimreghdr *pim;
1da177e4 1566
b1879204 1567 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1568 goto drop;
1569
9c70220b 1570 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1571 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1572 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1573 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1574 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1575 goto drop;
1576
b1879204
IJ
1577 if (__pim_rcv(skb, sizeof(*pim))) {
1578drop:
1579 kfree_skb(skb);
1580 }
1da177e4
LT
1581 return 0;
1582}
1583#endif
1584
1585static int
1586ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1587{
1588 int ct;
1589 struct rtnexthop *nhp;
cf958ae3 1590 struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
27a884dc 1591 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1592 struct rtattr *mp_head;
1593
1594 if (dev)
1595 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1596
c354e124 1597 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1598
1599 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1600 if (c->mfc_un.res.ttls[ct] < 255) {
1601 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1602 goto rtattr_failure;
c354e124 1603 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1604 nhp->rtnh_flags = 0;
1605 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
cf958ae3 1606 nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1607 nhp->rtnh_len = sizeof(*nhp);
1608 }
1609 }
1610 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1611 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1612 rtm->rtm_type = RTN_MULTICAST;
1613 return 1;
1614
1615rtattr_failure:
dc5fc579 1616 nlmsg_trim(skb, b);
1da177e4
LT
1617 return -EMSGSIZE;
1618}
1619
1620int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1621{
1622 int err;
1623 struct mfc_cache *cache;
ee6b9673 1624 struct rtable *rt = skb->rtable;
1da177e4
LT
1625
1626 read_lock(&mrt_lock);
1627 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1628
c354e124 1629 if (cache == NULL) {
72287490 1630 struct sk_buff *skb2;
eddc9ec5 1631 struct iphdr *iph;
1da177e4
LT
1632 struct net_device *dev;
1633 int vif;
1634
1635 if (nowait) {
1636 read_unlock(&mrt_lock);
1637 return -EAGAIN;
1638 }
1639
1640 dev = skb->dev;
1641 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1642 read_unlock(&mrt_lock);
1643 return -ENODEV;
1644 }
72287490
AK
1645 skb2 = skb_clone(skb, GFP_ATOMIC);
1646 if (!skb2) {
1647 read_unlock(&mrt_lock);
1648 return -ENOMEM;
1649 }
1650
e2d1bca7
ACM
1651 skb_push(skb2, sizeof(struct iphdr));
1652 skb_reset_network_header(skb2);
eddc9ec5
ACM
1653 iph = ip_hdr(skb2);
1654 iph->ihl = sizeof(struct iphdr) >> 2;
1655 iph->saddr = rt->rt_src;
1656 iph->daddr = rt->rt_dst;
1657 iph->version = 0;
72287490 1658 err = ipmr_cache_unresolved(vif, skb2);
1da177e4
LT
1659 read_unlock(&mrt_lock);
1660 return err;
1661 }
1662
1663 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1664 cache->mfc_flags |= MFC_NOTIFY;
1665 err = ipmr_fill_mroute(skb, cache, rtm);
1666 read_unlock(&mrt_lock);
1667 return err;
1668}
1669
e905a9ed 1670#ifdef CONFIG_PROC_FS
1da177e4
LT
1671/*
1672 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1673 */
1674struct ipmr_vif_iter {
1675 int ct;
1676};
1677
1678static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1679 loff_t pos)
1680{
cf958ae3
BT
1681 for (iter->ct = 0; iter->ct < init_net.ipv4.maxvif; ++iter->ct) {
1682 if (!VIF_EXISTS(&init_net, iter->ct))
1da177e4 1683 continue;
e905a9ed 1684 if (pos-- == 0)
cf958ae3 1685 return &init_net.ipv4.vif_table[iter->ct];
1da177e4
LT
1686 }
1687 return NULL;
1688}
1689
1690static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1691 __acquires(mrt_lock)
1da177e4
LT
1692{
1693 read_lock(&mrt_lock);
e905a9ed 1694 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1da177e4
LT
1695 : SEQ_START_TOKEN;
1696}
1697
1698static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1699{
1700 struct ipmr_vif_iter *iter = seq->private;
1701
1702 ++*pos;
1703 if (v == SEQ_START_TOKEN)
1704 return ipmr_vif_seq_idx(iter, 0);
e905a9ed 1705
cf958ae3
BT
1706 while (++iter->ct < init_net.ipv4.maxvif) {
1707 if (!VIF_EXISTS(&init_net, iter->ct))
1da177e4 1708 continue;
cf958ae3 1709 return &init_net.ipv4.vif_table[iter->ct];
1da177e4
LT
1710 }
1711 return NULL;
1712}
1713
1714static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1715 __releases(mrt_lock)
1da177e4
LT
1716{
1717 read_unlock(&mrt_lock);
1718}
1719
1720static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1721{
1722 if (v == SEQ_START_TOKEN) {
e905a9ed 1723 seq_puts(seq,
1da177e4
LT
1724 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1725 } else {
1726 const struct vif_device *vif = v;
1727 const char *name = vif->dev ? vif->dev->name : "none";
1728
1729 seq_printf(seq,
1730 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
cf958ae3 1731 vif - init_net.ipv4.vif_table,
e905a9ed 1732 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1733 vif->bytes_out, vif->pkt_out,
1734 vif->flags, vif->local, vif->remote);
1735 }
1736 return 0;
1737}
1738
f690808e 1739static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1740 .start = ipmr_vif_seq_start,
1741 .next = ipmr_vif_seq_next,
1742 .stop = ipmr_vif_seq_stop,
1743 .show = ipmr_vif_seq_show,
1744};
1745
1746static int ipmr_vif_open(struct inode *inode, struct file *file)
1747{
cf7732e4
PE
1748 return seq_open_private(file, &ipmr_vif_seq_ops,
1749 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1750}
1751
9a32144e 1752static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1753 .owner = THIS_MODULE,
1754 .open = ipmr_vif_open,
1755 .read = seq_read,
1756 .llseek = seq_lseek,
1757 .release = seq_release_private,
1758};
1759
1760struct ipmr_mfc_iter {
1761 struct mfc_cache **cache;
1762 int ct;
1763};
1764
1765
1766static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1767{
1768 struct mfc_cache *mfc;
1769
1770 it->cache = mfc_cache_array;
1771 read_lock(&mrt_lock);
e905a9ed 1772 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
132adf54 1773 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
e905a9ed 1774 if (pos-- == 0)
1da177e4
LT
1775 return mfc;
1776 read_unlock(&mrt_lock);
1777
1778 it->cache = &mfc_unres_queue;
1779 spin_lock_bh(&mfc_unres_lock);
132adf54 1780 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1da177e4
LT
1781 if (pos-- == 0)
1782 return mfc;
1783 spin_unlock_bh(&mfc_unres_lock);
1784
1785 it->cache = NULL;
1786 return NULL;
1787}
1788
1789
1790static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1791{
1792 struct ipmr_mfc_iter *it = seq->private;
1793 it->cache = NULL;
1794 it->ct = 0;
e905a9ed 1795 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1da177e4
LT
1796 : SEQ_START_TOKEN;
1797}
1798
1799static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1800{
1801 struct mfc_cache *mfc = v;
1802 struct ipmr_mfc_iter *it = seq->private;
1803
1804 ++*pos;
1805
1806 if (v == SEQ_START_TOKEN)
1807 return ipmr_mfc_seq_idx(seq->private, 0);
1808
1809 if (mfc->next)
1810 return mfc->next;
e905a9ed
YH
1811
1812 if (it->cache == &mfc_unres_queue)
1da177e4
LT
1813 goto end_of_list;
1814
1815 BUG_ON(it->cache != mfc_cache_array);
1816
1817 while (++it->ct < MFC_LINES) {
1818 mfc = mfc_cache_array[it->ct];
1819 if (mfc)
1820 return mfc;
1821 }
1822
1823 /* exhausted cache_array, show unresolved */
1824 read_unlock(&mrt_lock);
1825 it->cache = &mfc_unres_queue;
1826 it->ct = 0;
e905a9ed 1827
1da177e4
LT
1828 spin_lock_bh(&mfc_unres_lock);
1829 mfc = mfc_unres_queue;
e905a9ed 1830 if (mfc)
1da177e4
LT
1831 return mfc;
1832
1833 end_of_list:
1834 spin_unlock_bh(&mfc_unres_lock);
1835 it->cache = NULL;
1836
1837 return NULL;
1838}
1839
1840static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1841{
1842 struct ipmr_mfc_iter *it = seq->private;
1843
1844 if (it->cache == &mfc_unres_queue)
1845 spin_unlock_bh(&mfc_unres_lock);
1846 else if (it->cache == mfc_cache_array)
1847 read_unlock(&mrt_lock);
1848}
1849
1850static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1851{
1852 int n;
1853
1854 if (v == SEQ_START_TOKEN) {
e905a9ed 1855 seq_puts(seq,
1da177e4
LT
1856 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1857 } else {
1858 const struct mfc_cache *mfc = v;
1859 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1860
999890b2 1861 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1862 (unsigned long) mfc->mfc_mcastgrp,
1863 (unsigned long) mfc->mfc_origin,
1ea472e2 1864 mfc->mfc_parent);
1da177e4
LT
1865
1866 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
1867 seq_printf(seq, " %8lu %8lu %8lu",
1868 mfc->mfc_un.res.pkt,
1869 mfc->mfc_un.res.bytes,
1870 mfc->mfc_un.res.wrong_if);
132adf54
SH
1871 for (n = mfc->mfc_un.res.minvif;
1872 n < mfc->mfc_un.res.maxvif; n++ ) {
cf958ae3
BT
1873 if (VIF_EXISTS(&init_net, n) &&
1874 mfc->mfc_un.res.ttls[n] < 255)
1875 seq_printf(seq,
e905a9ed 1876 " %2d:%-3d",
1da177e4
LT
1877 n, mfc->mfc_un.res.ttls[n]);
1878 }
1ea472e2
BT
1879 } else {
1880 /* unresolved mfc_caches don't contain
1881 * pkt, bytes and wrong_if values
1882 */
1883 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1884 }
1885 seq_putc(seq, '\n');
1886 }
1887 return 0;
1888}
1889
f690808e 1890static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1891 .start = ipmr_mfc_seq_start,
1892 .next = ipmr_mfc_seq_next,
1893 .stop = ipmr_mfc_seq_stop,
1894 .show = ipmr_mfc_seq_show,
1895};
1896
1897static int ipmr_mfc_open(struct inode *inode, struct file *file)
1898{
cf7732e4
PE
1899 return seq_open_private(file, &ipmr_mfc_seq_ops,
1900 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1901}
1902
9a32144e 1903static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1904 .owner = THIS_MODULE,
1905 .open = ipmr_mfc_open,
1906 .read = seq_read,
1907 .llseek = seq_lseek,
1908 .release = seq_release_private,
1909};
e905a9ed 1910#endif
1da177e4
LT
1911
1912#ifdef CONFIG_IP_PIMSM_V2
1913static struct net_protocol pim_protocol = {
1914 .handler = pim_rcv,
1915};
1916#endif
1917
1918
1919/*
1920 * Setup for IP multicast routing
1921 */
cf958ae3
BT
1922static int __net_init ipmr_net_init(struct net *net)
1923{
1924 int err = 0;
1925
1926 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1927 GFP_KERNEL);
1928 if (!net->ipv4.vif_table) {
1929 err = -ENOMEM;
1930 goto fail;
1931 }
1932fail:
1933 return err;
1934}
1935
1936static void __net_exit ipmr_net_exit(struct net *net)
1937{
1938 kfree(net->ipv4.vif_table);
1939}
1940
1941static struct pernet_operations ipmr_net_ops = {
1942 .init = ipmr_net_init,
1943 .exit = ipmr_net_exit,
1944};
e905a9ed 1945
03d2f897 1946int __init ip_mr_init(void)
1da177e4 1947{
03d2f897
WC
1948 int err;
1949
1da177e4
LT
1950 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1951 sizeof(struct mfc_cache),
e5d679f3 1952 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 1953 NULL);
03d2f897
WC
1954 if (!mrt_cachep)
1955 return -ENOMEM;
1956
cf958ae3
BT
1957 err = register_pernet_subsys(&ipmr_net_ops);
1958 if (err)
1959 goto reg_pernet_fail;
1960
b24b8a24 1961 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
03d2f897
WC
1962 err = register_netdevice_notifier(&ip_mr_notifier);
1963 if (err)
1964 goto reg_notif_fail;
e905a9ed 1965#ifdef CONFIG_PROC_FS
03d2f897
WC
1966 err = -ENOMEM;
1967 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1968 goto proc_vif_fail;
1969 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1970 goto proc_cache_fail;
e905a9ed 1971#endif
03d2f897 1972 return 0;
03d2f897 1973#ifdef CONFIG_PROC_FS
03d2f897
WC
1974proc_cache_fail:
1975 proc_net_remove(&init_net, "ip_mr_vif");
c3e38896
BT
1976proc_vif_fail:
1977 unregister_netdevice_notifier(&ip_mr_notifier);
03d2f897 1978#endif
c3e38896
BT
1979reg_notif_fail:
1980 del_timer(&ipmr_expire_timer);
cf958ae3
BT
1981 unregister_pernet_subsys(&ipmr_net_ops);
1982reg_pernet_fail:
c3e38896 1983 kmem_cache_destroy(mrt_cachep);
03d2f897 1984 return err;
1da177e4 1985}