]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
netns: ipmr: declare counter cache_resolve_queue_len per-namespace
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
457c4cbc 50#include <net/net_namespace.h>
1da177e4
LT
51#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
14c85021 54#include <net/route.h>
1da177e4
LT
55#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
dc5fc579 64#include <net/netlink.h>
1da177e4
LT
65
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
1da177e4
LT
70/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
cf958ae3 80#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4
LT
81
82static int mroute_do_assert; /* Set in PIM assert */
83static int mroute_do_pim;
84
1da177e4 85static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
1da177e4
LT
86
87/* Special spinlock for queue of unresolved entries */
88static DEFINE_SPINLOCK(mfc_unres_lock);
89
90/* We return to original Alan's scheme. Hash table of resolved
91 entries is changed only in process context and protected
92 with weak lock mrt_lock. Queue of unresolved entries is protected
93 with strong spinlock mfc_unres_lock.
94
95 In this case data path is free of exclusive locks at all.
96 */
97
e18b890b 98static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4
LT
99
100static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
101static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
102static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
103
104#ifdef CONFIG_IP_PIMSM_V2
105static struct net_protocol pim_protocol;
106#endif
107
108static struct timer_list ipmr_expire_timer;
109
110/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
111
d607032d
WC
112static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
113{
114 dev_close(dev);
115
116 dev = __dev_get_by_name(&init_net, "tunl0");
117 if (dev) {
5bc3eb7e 118 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 119 struct ifreq ifr;
d607032d
WC
120 struct ip_tunnel_parm p;
121
122 memset(&p, 0, sizeof(p));
123 p.iph.daddr = v->vifc_rmt_addr.s_addr;
124 p.iph.saddr = v->vifc_lcl_addr.s_addr;
125 p.iph.version = 4;
126 p.iph.ihl = 5;
127 p.iph.protocol = IPPROTO_IPIP;
128 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
129 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
130
5bc3eb7e
SH
131 if (ops->ndo_do_ioctl) {
132 mm_segment_t oldfs = get_fs();
133
134 set_fs(KERNEL_DS);
135 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
136 set_fs(oldfs);
137 }
d607032d
WC
138 }
139}
140
1da177e4
LT
141static
142struct net_device *ipmr_new_tunnel(struct vifctl *v)
143{
144 struct net_device *dev;
145
881d966b 146 dev = __dev_get_by_name(&init_net, "tunl0");
1da177e4
LT
147
148 if (dev) {
5bc3eb7e 149 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
150 int err;
151 struct ifreq ifr;
1da177e4
LT
152 struct ip_tunnel_parm p;
153 struct in_device *in_dev;
154
155 memset(&p, 0, sizeof(p));
156 p.iph.daddr = v->vifc_rmt_addr.s_addr;
157 p.iph.saddr = v->vifc_lcl_addr.s_addr;
158 p.iph.version = 4;
159 p.iph.ihl = 5;
160 p.iph.protocol = IPPROTO_IPIP;
161 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 162 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 163
5bc3eb7e
SH
164 if (ops->ndo_do_ioctl) {
165 mm_segment_t oldfs = get_fs();
166
167 set_fs(KERNEL_DS);
168 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
169 set_fs(oldfs);
170 } else
171 err = -EOPNOTSUPP;
1da177e4
LT
172
173 dev = NULL;
174
881d966b 175 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
1da177e4
LT
176 dev->flags |= IFF_MULTICAST;
177
e5ed6399 178 in_dev = __in_dev_get_rtnl(dev);
71e27da9 179 if (in_dev == NULL)
1da177e4 180 goto failure;
71e27da9
HX
181
182 ipv4_devconf_setall(in_dev);
183 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
184
185 if (dev_open(dev))
186 goto failure;
7dc00c82 187 dev_hold(dev);
1da177e4
LT
188 }
189 }
190 return dev;
191
192failure:
193 /* allow the register to be completed before unregistering. */
194 rtnl_unlock();
195 rtnl_lock();
196
197 unregister_netdevice(dev);
198 return NULL;
199}
200
201#ifdef CONFIG_IP_PIMSM
202
203static int reg_vif_num = -1;
204
205static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
206{
207 read_lock(&mrt_lock);
cf3677ae
PE
208 dev->stats.tx_bytes += skb->len;
209 dev->stats.tx_packets++;
1da177e4
LT
210 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
211 read_unlock(&mrt_lock);
212 kfree_skb(skb);
213 return 0;
214}
215
007c3838
SH
216static const struct net_device_ops reg_vif_netdev_ops = {
217 .ndo_start_xmit = reg_vif_xmit,
218};
219
1da177e4
LT
220static void reg_vif_setup(struct net_device *dev)
221{
222 dev->type = ARPHRD_PIMREG;
46f25dff 223 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 224 dev->flags = IFF_NOARP;
007c3838 225 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4
LT
226 dev->destructor = free_netdev;
227}
228
229static struct net_device *ipmr_reg_vif(void)
230{
231 struct net_device *dev;
232 struct in_device *in_dev;
233
cf3677ae 234 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
235
236 if (dev == NULL)
237 return NULL;
238
239 if (register_netdevice(dev)) {
240 free_netdev(dev);
241 return NULL;
242 }
243 dev->iflink = 0;
244
71e27da9
HX
245 rcu_read_lock();
246 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
247 rcu_read_unlock();
1da177e4 248 goto failure;
71e27da9 249 }
1da177e4 250
71e27da9
HX
251 ipv4_devconf_setall(in_dev);
252 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
253 rcu_read_unlock();
1da177e4
LT
254
255 if (dev_open(dev))
256 goto failure;
257
7dc00c82
WC
258 dev_hold(dev);
259
1da177e4
LT
260 return dev;
261
262failure:
263 /* allow the register to be completed before unregistering. */
264 rtnl_unlock();
265 rtnl_lock();
266
267 unregister_netdevice(dev);
268 return NULL;
269}
270#endif
271
272/*
273 * Delete a VIF entry
7dc00c82 274 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 275 */
e905a9ed 276
7dc00c82 277static int vif_delete(int vifi, int notify)
1da177e4
LT
278{
279 struct vif_device *v;
280 struct net_device *dev;
281 struct in_device *in_dev;
282
cf958ae3 283 if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
1da177e4
LT
284 return -EADDRNOTAVAIL;
285
cf958ae3 286 v = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
287
288 write_lock_bh(&mrt_lock);
289 dev = v->dev;
290 v->dev = NULL;
291
292 if (!dev) {
293 write_unlock_bh(&mrt_lock);
294 return -EADDRNOTAVAIL;
295 }
296
297#ifdef CONFIG_IP_PIMSM
298 if (vifi == reg_vif_num)
299 reg_vif_num = -1;
300#endif
301
cf958ae3 302 if (vifi+1 == init_net.ipv4.maxvif) {
1da177e4
LT
303 int tmp;
304 for (tmp=vifi-1; tmp>=0; tmp--) {
cf958ae3 305 if (VIF_EXISTS(&init_net, tmp))
1da177e4
LT
306 break;
307 }
cf958ae3 308 init_net.ipv4.maxvif = tmp+1;
1da177e4
LT
309 }
310
311 write_unlock_bh(&mrt_lock);
312
313 dev_set_allmulti(dev, -1);
314
e5ed6399 315 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 316 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
317 ip_rt_multicast_event(in_dev);
318 }
319
7dc00c82 320 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
1da177e4
LT
321 unregister_netdevice(dev);
322
323 dev_put(dev);
324 return 0;
325}
326
5c0a66f5
BT
327static inline void ipmr_cache_free(struct mfc_cache *c)
328{
329 release_net(mfc_net(c));
330 kmem_cache_free(mrt_cachep, c);
331}
332
1da177e4
LT
333/* Destroy an unresolved cache entry, killing queued skbs
334 and reporting error to netlink readers.
335 */
336
337static void ipmr_destroy_unres(struct mfc_cache *c)
338{
339 struct sk_buff *skb;
9ef1d4c7 340 struct nlmsgerr *e;
1da177e4 341
1e8fb3b6 342 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
1da177e4 343
c354e124 344 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 345 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
346 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
347 nlh->nlmsg_type = NLMSG_ERROR;
348 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
349 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
350 e = NLMSG_DATA(nlh);
351 e->error = -ETIMEDOUT;
352 memset(&e->msg, 0, sizeof(e->msg));
2942e900 353
97c53cac 354 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
1da177e4
LT
355 } else
356 kfree_skb(skb);
357 }
358
5c0a66f5 359 ipmr_cache_free(c);
1da177e4
LT
360}
361
362
363/* Single timer process for all the unresolved queue. */
364
365static void ipmr_expire_process(unsigned long dummy)
366{
367 unsigned long now;
368 unsigned long expires;
369 struct mfc_cache *c, **cp;
370
371 if (!spin_trylock(&mfc_unres_lock)) {
372 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
373 return;
374 }
375
1e8fb3b6 376 if (mfc_unres_queue == NULL)
1da177e4
LT
377 goto out;
378
379 now = jiffies;
380 expires = 10*HZ;
381 cp = &mfc_unres_queue;
382
383 while ((c=*cp) != NULL) {
384 if (time_after(c->mfc_un.unres.expires, now)) {
385 unsigned long interval = c->mfc_un.unres.expires - now;
386 if (interval < expires)
387 expires = interval;
388 cp = &c->next;
389 continue;
390 }
391
392 *cp = c->next;
393
394 ipmr_destroy_unres(c);
395 }
396
1e8fb3b6 397 if (mfc_unres_queue != NULL)
1da177e4
LT
398 mod_timer(&ipmr_expire_timer, jiffies + expires);
399
400out:
401 spin_unlock(&mfc_unres_lock);
402}
403
404/* Fill oifs list. It is called under write locked mrt_lock. */
405
d1b04c08 406static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
1da177e4
LT
407{
408 int vifi;
409
410 cache->mfc_un.res.minvif = MAXVIFS;
411 cache->mfc_un.res.maxvif = 0;
412 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
413
cf958ae3
BT
414 for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
415 if (VIF_EXISTS(&init_net, vifi) &&
416 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
417 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
418 if (cache->mfc_un.res.minvif > vifi)
419 cache->mfc_un.res.minvif = vifi;
420 if (cache->mfc_un.res.maxvif <= vifi)
421 cache->mfc_un.res.maxvif = vifi + 1;
422 }
423 }
424}
425
426static int vif_add(struct vifctl *vifc, int mrtsock)
427{
428 int vifi = vifc->vifc_vifi;
cf958ae3 429 struct vif_device *v = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
430 struct net_device *dev;
431 struct in_device *in_dev;
d607032d 432 int err;
1da177e4
LT
433
434 /* Is vif busy ? */
cf958ae3 435 if (VIF_EXISTS(&init_net, vifi))
1da177e4
LT
436 return -EADDRINUSE;
437
438 switch (vifc->vifc_flags) {
439#ifdef CONFIG_IP_PIMSM
440 case VIFF_REGISTER:
441 /*
442 * Special Purpose VIF in PIM
443 * All the packets will be sent to the daemon
444 */
445 if (reg_vif_num >= 0)
446 return -EADDRINUSE;
447 dev = ipmr_reg_vif();
448 if (!dev)
449 return -ENOBUFS;
d607032d
WC
450 err = dev_set_allmulti(dev, 1);
451 if (err) {
452 unregister_netdevice(dev);
7dc00c82 453 dev_put(dev);
d607032d
WC
454 return err;
455 }
1da177e4
LT
456 break;
457#endif
e905a9ed 458 case VIFF_TUNNEL:
1da177e4
LT
459 dev = ipmr_new_tunnel(vifc);
460 if (!dev)
461 return -ENOBUFS;
d607032d
WC
462 err = dev_set_allmulti(dev, 1);
463 if (err) {
464 ipmr_del_tunnel(dev, vifc);
7dc00c82 465 dev_put(dev);
d607032d
WC
466 return err;
467 }
1da177e4
LT
468 break;
469 case 0:
1ab35276 470 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
1da177e4
LT
471 if (!dev)
472 return -EADDRNOTAVAIL;
d607032d 473 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
474 if (err) {
475 dev_put(dev);
d607032d 476 return err;
7dc00c82 477 }
1da177e4
LT
478 break;
479 default:
480 return -EINVAL;
481 }
482
e5ed6399 483 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1da177e4 484 return -EADDRNOTAVAIL;
42f811b8 485 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
486 ip_rt_multicast_event(in_dev);
487
488 /*
489 * Fill in the VIF structures
490 */
c354e124
JK
491 v->rate_limit = vifc->vifc_rate_limit;
492 v->local = vifc->vifc_lcl_addr.s_addr;
493 v->remote = vifc->vifc_rmt_addr.s_addr;
494 v->flags = vifc->vifc_flags;
1da177e4
LT
495 if (!mrtsock)
496 v->flags |= VIFF_STATIC;
c354e124 497 v->threshold = vifc->vifc_threshold;
1da177e4
LT
498 v->bytes_in = 0;
499 v->bytes_out = 0;
500 v->pkt_in = 0;
501 v->pkt_out = 0;
502 v->link = dev->ifindex;
503 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
504 v->link = dev->iflink;
505
506 /* And finish update writing critical data */
507 write_lock_bh(&mrt_lock);
c354e124 508 v->dev = dev;
1da177e4
LT
509#ifdef CONFIG_IP_PIMSM
510 if (v->flags&VIFF_REGISTER)
511 reg_vif_num = vifi;
512#endif
cf958ae3
BT
513 if (vifi+1 > init_net.ipv4.maxvif)
514 init_net.ipv4.maxvif = vifi+1;
1da177e4
LT
515 write_unlock_bh(&mrt_lock);
516 return 0;
517}
518
114c7844 519static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
1da177e4 520{
c354e124 521 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
522 struct mfc_cache *c;
523
2bb8b26c 524 for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) {
1da177e4
LT
525 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
526 break;
527 }
528 return c;
529}
530
531/*
532 * Allocate a multicast cache entry
533 */
5c0a66f5 534static struct mfc_cache *ipmr_cache_alloc(struct net *net)
1da177e4 535{
c354e124
JK
536 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
537 if (c == NULL)
1da177e4 538 return NULL;
1da177e4 539 c->mfc_un.res.minvif = MAXVIFS;
5c0a66f5 540 mfc_net_set(c, net);
1da177e4
LT
541 return c;
542}
543
5c0a66f5 544static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
1da177e4 545{
c354e124
JK
546 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
547 if (c == NULL)
1da177e4 548 return NULL;
1da177e4
LT
549 skb_queue_head_init(&c->mfc_un.unres.unresolved);
550 c->mfc_un.unres.expires = jiffies + 10*HZ;
5c0a66f5 551 mfc_net_set(c, net);
1da177e4
LT
552 return c;
553}
554
555/*
556 * A cache entry has gone into a resolved state from queued
557 */
e905a9ed 558
1da177e4
LT
559static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
560{
561 struct sk_buff *skb;
9ef1d4c7 562 struct nlmsgerr *e;
1da177e4
LT
563
564 /*
565 * Play the pending entries through our router
566 */
567
c354e124 568 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 569 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
570 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
571
572 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
573 nlh->nlmsg_len = (skb_tail_pointer(skb) -
574 (u8 *)nlh);
1da177e4
LT
575 } else {
576 nlh->nlmsg_type = NLMSG_ERROR;
577 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
578 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
579 e = NLMSG_DATA(nlh);
580 e->error = -EMSGSIZE;
581 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 582 }
2942e900 583
97c53cac 584 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
1da177e4
LT
585 } else
586 ip_mr_forward(skb, c, 0);
587 }
588}
589
590/*
591 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
592 * expects the following bizarre scheme.
593 *
594 * Called under mrt_lock.
595 */
e905a9ed 596
1da177e4
LT
597static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
598{
599 struct sk_buff *skb;
c9bdd4b5 600 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
601 struct igmphdr *igmp;
602 struct igmpmsg *msg;
603 int ret;
604
605#ifdef CONFIG_IP_PIMSM
606 if (assert == IGMPMSG_WHOLEPKT)
607 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
608 else
609#endif
610 skb = alloc_skb(128, GFP_ATOMIC);
611
132adf54 612 if (!skb)
1da177e4
LT
613 return -ENOBUFS;
614
615#ifdef CONFIG_IP_PIMSM
616 if (assert == IGMPMSG_WHOLEPKT) {
617 /* Ugly, but we have no choice with this interface.
618 Duplicate old header, fix ihl, length etc.
619 And all this only to mangle msg->im_msgtype and
620 to set msg->im_mbz to "mbz" :-)
621 */
878c8145
ACM
622 skb_push(skb, sizeof(struct iphdr));
623 skb_reset_network_header(skb);
badff6d0 624 skb_reset_transport_header(skb);
0272ffc4 625 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 626 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
627 msg->im_msgtype = IGMPMSG_WHOLEPKT;
628 msg->im_mbz = 0;
e905a9ed 629 msg->im_vif = reg_vif_num;
eddc9ec5
ACM
630 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
631 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
632 sizeof(struct iphdr));
e905a9ed 633 } else
1da177e4 634#endif
e905a9ed
YH
635 {
636
1da177e4
LT
637 /*
638 * Copy the IP header
639 */
640
27a884dc 641 skb->network_header = skb->tail;
ddc7b8e3 642 skb_put(skb, ihl);
27d7ff46 643 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
644 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
645 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4
LT
646 msg->im_vif = vifi;
647 skb->dst = dst_clone(pkt->dst);
648
649 /*
650 * Add our header
651 */
652
c354e124 653 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
654 igmp->type =
655 msg->im_msgtype = assert;
656 igmp->code = 0;
eddc9ec5 657 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 658 skb->transport_header = skb->network_header;
e905a9ed 659 }
1da177e4 660
70a269e6 661 if (init_net.ipv4.mroute_sk == NULL) {
1da177e4
LT
662 kfree_skb(skb);
663 return -EINVAL;
664 }
665
666 /*
667 * Deliver to mrouted
668 */
70a269e6
BT
669 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
670 if (ret < 0) {
1da177e4
LT
671 if (net_ratelimit())
672 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
673 kfree_skb(skb);
674 }
675
676 return ret;
677}
678
679/*
680 * Queue a packet for resolution. It gets locked cache entry!
681 */
e905a9ed 682
1da177e4
LT
683static int
684ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
685{
686 int err;
687 struct mfc_cache *c;
eddc9ec5 688 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
689
690 spin_lock_bh(&mfc_unres_lock);
691 for (c=mfc_unres_queue; c; c=c->next) {
1e8fb3b6
BT
692 if (net_eq(mfc_net(c), &init_net) &&
693 c->mfc_mcastgrp == iph->daddr &&
eddc9ec5 694 c->mfc_origin == iph->saddr)
1da177e4
LT
695 break;
696 }
697
698 if (c == NULL) {
699 /*
700 * Create a new entry if allowable
701 */
702
1e8fb3b6 703 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 ||
5c0a66f5 704 (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
1da177e4
LT
705 spin_unlock_bh(&mfc_unres_lock);
706
707 kfree_skb(skb);
708 return -ENOBUFS;
709 }
710
711 /*
712 * Fill in the new cache entry
713 */
eddc9ec5
ACM
714 c->mfc_parent = -1;
715 c->mfc_origin = iph->saddr;
716 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
717
718 /*
719 * Reflect first query at mrouted.
720 */
721 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
e905a9ed 722 /* If the report failed throw the cache entry
1da177e4
LT
723 out - Brad Parker
724 */
725 spin_unlock_bh(&mfc_unres_lock);
726
5c0a66f5 727 ipmr_cache_free(c);
1da177e4
LT
728 kfree_skb(skb);
729 return err;
730 }
731
1e8fb3b6 732 atomic_inc(&init_net.ipv4.cache_resolve_queue_len);
1da177e4
LT
733 c->next = mfc_unres_queue;
734 mfc_unres_queue = c;
735
736 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
737 }
738
739 /*
740 * See if we can append the packet
741 */
742 if (c->mfc_un.unres.unresolved.qlen>3) {
743 kfree_skb(skb);
744 err = -ENOBUFS;
745 } else {
c354e124 746 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
747 err = 0;
748 }
749
750 spin_unlock_bh(&mfc_unres_lock);
751 return err;
752}
753
754/*
755 * MFC cache manipulation by user space mroute daemon
756 */
757
758static int ipmr_mfc_delete(struct mfcctl *mfc)
759{
760 int line;
761 struct mfc_cache *c, **cp;
762
c354e124 763 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 764
2bb8b26c
BT
765 for (cp = &init_net.ipv4.mfc_cache_array[line];
766 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
767 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
768 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
769 write_lock_bh(&mrt_lock);
770 *cp = c->next;
771 write_unlock_bh(&mrt_lock);
772
5c0a66f5 773 ipmr_cache_free(c);
1da177e4
LT
774 return 0;
775 }
776 }
777 return -ENOENT;
778}
779
780static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
781{
782 int line;
783 struct mfc_cache *uc, *c, **cp;
784
c354e124 785 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 786
2bb8b26c
BT
787 for (cp = &init_net.ipv4.mfc_cache_array[line];
788 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
789 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
790 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
791 break;
792 }
793
794 if (c != NULL) {
795 write_lock_bh(&mrt_lock);
796 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 797 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
798 if (!mrtsock)
799 c->mfc_flags |= MFC_STATIC;
800 write_unlock_bh(&mrt_lock);
801 return 0;
802 }
803
f97c1e0c 804 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
805 return -EINVAL;
806
5c0a66f5 807 c = ipmr_cache_alloc(&init_net);
c354e124 808 if (c == NULL)
1da177e4
LT
809 return -ENOMEM;
810
c354e124
JK
811 c->mfc_origin = mfc->mfcc_origin.s_addr;
812 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
813 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 814 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
815 if (!mrtsock)
816 c->mfc_flags |= MFC_STATIC;
817
818 write_lock_bh(&mrt_lock);
2bb8b26c
BT
819 c->next = init_net.ipv4.mfc_cache_array[line];
820 init_net.ipv4.mfc_cache_array[line] = c;
1da177e4
LT
821 write_unlock_bh(&mrt_lock);
822
823 /*
824 * Check to see if we resolved a queued list. If so we
825 * need to send on the frames and tidy up.
826 */
827 spin_lock_bh(&mfc_unres_lock);
828 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
829 cp = &uc->next) {
1e8fb3b6
BT
830 if (net_eq(mfc_net(uc), &init_net) &&
831 uc->mfc_origin == c->mfc_origin &&
1da177e4
LT
832 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
833 *cp = uc->next;
1e8fb3b6 834 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
1da177e4
LT
835 break;
836 }
837 }
1e8fb3b6
BT
838 if (mfc_unres_queue == NULL)
839 del_timer(&ipmr_expire_timer);
1da177e4
LT
840 spin_unlock_bh(&mfc_unres_lock);
841
842 if (uc) {
843 ipmr_cache_resolve(uc, c);
5c0a66f5 844 ipmr_cache_free(uc);
1da177e4
LT
845 }
846 return 0;
847}
848
849/*
850 * Close the multicast socket, and clear the vif tables etc
851 */
e905a9ed 852
1da177e4
LT
853static void mroute_clean_tables(struct sock *sk)
854{
855 int i;
e905a9ed 856
1da177e4
LT
857 /*
858 * Shut down all active vif entries
859 */
cf958ae3
BT
860 for (i = 0; i < init_net.ipv4.maxvif; i++) {
861 if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
7dc00c82 862 vif_delete(i, 0);
1da177e4
LT
863 }
864
865 /*
866 * Wipe the cache
867 */
c354e124 868 for (i=0; i<MFC_LINES; i++) {
1da177e4
LT
869 struct mfc_cache *c, **cp;
870
2bb8b26c 871 cp = &init_net.ipv4.mfc_cache_array[i];
1da177e4
LT
872 while ((c = *cp) != NULL) {
873 if (c->mfc_flags&MFC_STATIC) {
874 cp = &c->next;
875 continue;
876 }
877 write_lock_bh(&mrt_lock);
878 *cp = c->next;
879 write_unlock_bh(&mrt_lock);
880
5c0a66f5 881 ipmr_cache_free(c);
1da177e4
LT
882 }
883 }
884
1e8fb3b6
BT
885 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) {
886 struct mfc_cache *c, **cp;
1da177e4
LT
887
888 spin_lock_bh(&mfc_unres_lock);
1e8fb3b6
BT
889 cp = &mfc_unres_queue;
890 while ((c = *cp) != NULL) {
891 if (!net_eq(mfc_net(c), &init_net)) {
892 cp = &c->next;
893 continue;
894 }
895 *cp = c->next;
1da177e4
LT
896
897 ipmr_destroy_unres(c);
1da177e4
LT
898 }
899 spin_unlock_bh(&mfc_unres_lock);
900 }
901}
902
903static void mrtsock_destruct(struct sock *sk)
904{
905 rtnl_lock();
70a269e6 906 if (sk == init_net.ipv4.mroute_sk) {
3b1e0a65 907 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
1da177e4
LT
908
909 write_lock_bh(&mrt_lock);
70a269e6 910 init_net.ipv4.mroute_sk = NULL;
1da177e4
LT
911 write_unlock_bh(&mrt_lock);
912
913 mroute_clean_tables(sk);
914 }
915 rtnl_unlock();
916}
917
918/*
919 * Socket options and virtual interface manipulation. The whole
920 * virtual interface system is a complete heap, but unfortunately
921 * that's how BSD mrouted happens to think. Maybe one day with a proper
922 * MOSPF/PIM router set up we can clean this up.
923 */
e905a9ed 924
c354e124 925int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1da177e4
LT
926{
927 int ret;
928 struct vifctl vif;
929 struct mfcctl mfc;
e905a9ed 930
132adf54 931 if (optname != MRT_INIT) {
70a269e6 932 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
933 return -EACCES;
934 }
935
132adf54
SH
936 switch (optname) {
937 case MRT_INIT:
938 if (sk->sk_type != SOCK_RAW ||
939 inet_sk(sk)->num != IPPROTO_IGMP)
940 return -EOPNOTSUPP;
c354e124 941 if (optlen != sizeof(int))
132adf54 942 return -ENOPROTOOPT;
1da177e4 943
132adf54 944 rtnl_lock();
70a269e6 945 if (init_net.ipv4.mroute_sk) {
1da177e4 946 rtnl_unlock();
132adf54
SH
947 return -EADDRINUSE;
948 }
949
950 ret = ip_ra_control(sk, 1, mrtsock_destruct);
951 if (ret == 0) {
952 write_lock_bh(&mrt_lock);
70a269e6 953 init_net.ipv4.mroute_sk = sk;
132adf54
SH
954 write_unlock_bh(&mrt_lock);
955
3b1e0a65 956 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
132adf54
SH
957 }
958 rtnl_unlock();
959 return ret;
960 case MRT_DONE:
70a269e6 961 if (sk != init_net.ipv4.mroute_sk)
132adf54
SH
962 return -EACCES;
963 return ip_ra_control(sk, 0, NULL);
964 case MRT_ADD_VIF:
965 case MRT_DEL_VIF:
c354e124 966 if (optlen != sizeof(vif))
132adf54 967 return -EINVAL;
c354e124 968 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
969 return -EFAULT;
970 if (vif.vifc_vifi >= MAXVIFS)
971 return -ENFILE;
972 rtnl_lock();
c354e124 973 if (optname == MRT_ADD_VIF) {
70a269e6 974 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
132adf54 975 } else {
7dc00c82 976 ret = vif_delete(vif.vifc_vifi, 0);
132adf54
SH
977 }
978 rtnl_unlock();
979 return ret;
1da177e4
LT
980
981 /*
982 * Manipulate the forwarding caches. These live
983 * in a sort of kernel/user symbiosis.
984 */
132adf54
SH
985 case MRT_ADD_MFC:
986 case MRT_DEL_MFC:
c354e124 987 if (optlen != sizeof(mfc))
132adf54 988 return -EINVAL;
c354e124 989 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
990 return -EFAULT;
991 rtnl_lock();
c354e124 992 if (optname == MRT_DEL_MFC)
132adf54
SH
993 ret = ipmr_mfc_delete(&mfc);
994 else
70a269e6 995 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
132adf54
SH
996 rtnl_unlock();
997 return ret;
1da177e4
LT
998 /*
999 * Control PIM assert.
1000 */
132adf54
SH
1001 case MRT_ASSERT:
1002 {
1003 int v;
1004 if (get_user(v,(int __user *)optval))
1005 return -EFAULT;
1006 mroute_do_assert=(v)?1:0;
1007 return 0;
1008 }
1da177e4 1009#ifdef CONFIG_IP_PIMSM
132adf54
SH
1010 case MRT_PIM:
1011 {
ba93ef74
SH
1012 int v;
1013
132adf54
SH
1014 if (get_user(v,(int __user *)optval))
1015 return -EFAULT;
ba93ef74
SH
1016 v = (v) ? 1 : 0;
1017
132adf54
SH
1018 rtnl_lock();
1019 ret = 0;
1020 if (v != mroute_do_pim) {
1021 mroute_do_pim = v;
1022 mroute_do_assert = v;
1da177e4 1023#ifdef CONFIG_IP_PIMSM_V2
132adf54
SH
1024 if (mroute_do_pim)
1025 ret = inet_add_protocol(&pim_protocol,
1026 IPPROTO_PIM);
1027 else
1028 ret = inet_del_protocol(&pim_protocol,
1029 IPPROTO_PIM);
1030 if (ret < 0)
1031 ret = -EAGAIN;
1da177e4 1032#endif
1da177e4 1033 }
132adf54
SH
1034 rtnl_unlock();
1035 return ret;
1036 }
1da177e4 1037#endif
132adf54
SH
1038 /*
1039 * Spurious command, or MRT_VERSION which you cannot
1040 * set.
1041 */
1042 default:
1043 return -ENOPROTOOPT;
1da177e4
LT
1044 }
1045}
1046
1047/*
1048 * Getsock opt support for the multicast routing system.
1049 */
e905a9ed 1050
c354e124 1051int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1052{
1053 int olr;
1054 int val;
1055
c354e124 1056 if (optname != MRT_VERSION &&
1da177e4
LT
1057#ifdef CONFIG_IP_PIMSM
1058 optname!=MRT_PIM &&
1059#endif
1060 optname!=MRT_ASSERT)
1061 return -ENOPROTOOPT;
1062
1063 if (get_user(olr, optlen))
1064 return -EFAULT;
1065
1066 olr = min_t(unsigned int, olr, sizeof(int));
1067 if (olr < 0)
1068 return -EINVAL;
e905a9ed 1069
c354e124 1070 if (put_user(olr, optlen))
1da177e4 1071 return -EFAULT;
c354e124
JK
1072 if (optname == MRT_VERSION)
1073 val = 0x0305;
1da177e4 1074#ifdef CONFIG_IP_PIMSM
c354e124
JK
1075 else if (optname == MRT_PIM)
1076 val = mroute_do_pim;
1da177e4
LT
1077#endif
1078 else
c354e124
JK
1079 val = mroute_do_assert;
1080 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1081 return -EFAULT;
1082 return 0;
1083}
1084
1085/*
1086 * The IP multicast ioctl support routines.
1087 */
e905a9ed 1088
1da177e4
LT
1089int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1090{
1091 struct sioc_sg_req sr;
1092 struct sioc_vif_req vr;
1093 struct vif_device *vif;
1094 struct mfc_cache *c;
e905a9ed 1095
132adf54
SH
1096 switch (cmd) {
1097 case SIOCGETVIFCNT:
c354e124 1098 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1099 return -EFAULT;
cf958ae3 1100 if (vr.vifi >= init_net.ipv4.maxvif)
132adf54
SH
1101 return -EINVAL;
1102 read_lock(&mrt_lock);
cf958ae3
BT
1103 vif = &init_net.ipv4.vif_table[vr.vifi];
1104 if (VIF_EXISTS(&init_net, vr.vifi)) {
c354e124
JK
1105 vr.icount = vif->pkt_in;
1106 vr.ocount = vif->pkt_out;
1107 vr.ibytes = vif->bytes_in;
1108 vr.obytes = vif->bytes_out;
1da177e4 1109 read_unlock(&mrt_lock);
1da177e4 1110
c354e124 1111 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1112 return -EFAULT;
1113 return 0;
1114 }
1115 read_unlock(&mrt_lock);
1116 return -EADDRNOTAVAIL;
1117 case SIOCGETSGCNT:
c354e124 1118 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1119 return -EFAULT;
1120
1121 read_lock(&mrt_lock);
1122 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1123 if (c) {
1124 sr.pktcnt = c->mfc_un.res.pkt;
1125 sr.bytecnt = c->mfc_un.res.bytes;
1126 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1127 read_unlock(&mrt_lock);
132adf54 1128
c354e124 1129 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1130 return -EFAULT;
1131 return 0;
1132 }
1133 read_unlock(&mrt_lock);
1134 return -EADDRNOTAVAIL;
1135 default:
1136 return -ENOIOCTLCMD;
1da177e4
LT
1137 }
1138}
1139
1140
1141static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1142{
e9dc8653 1143 struct net_device *dev = ptr;
1da177e4
LT
1144 struct vif_device *v;
1145 int ct;
e9dc8653 1146
721499e8 1147 if (!net_eq(dev_net(dev), &init_net))
e9dc8653
EB
1148 return NOTIFY_DONE;
1149
1da177e4
LT
1150 if (event != NETDEV_UNREGISTER)
1151 return NOTIFY_DONE;
cf958ae3
BT
1152 v = &init_net.ipv4.vif_table[0];
1153 for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
c354e124 1154 if (v->dev == dev)
7dc00c82 1155 vif_delete(ct, 1);
1da177e4
LT
1156 }
1157 return NOTIFY_DONE;
1158}
1159
1160
c354e124 1161static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1162 .notifier_call = ipmr_device_event,
1163};
1164
1165/*
1166 * Encapsulate a packet by attaching a valid IPIP header to it.
1167 * This avoids tunnel drivers and other mess and gives us the speed so
1168 * important for multicast video.
1169 */
e905a9ed 1170
114c7844 1171static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1172{
8856dfa3 1173 struct iphdr *iph;
eddc9ec5 1174 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1175
1176 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1177 skb->transport_header = skb->network_header;
8856dfa3 1178 skb_reset_network_header(skb);
eddc9ec5 1179 iph = ip_hdr(skb);
1da177e4
LT
1180
1181 iph->version = 4;
e023dd64
ACM
1182 iph->tos = old_iph->tos;
1183 iph->ttl = old_iph->ttl;
1da177e4
LT
1184 iph->frag_off = 0;
1185 iph->daddr = daddr;
1186 iph->saddr = saddr;
1187 iph->protocol = IPPROTO_IPIP;
1188 iph->ihl = 5;
1189 iph->tot_len = htons(skb->len);
1190 ip_select_ident(iph, skb->dst, NULL);
1191 ip_send_check(iph);
1192
1da177e4
LT
1193 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1194 nf_reset(skb);
1195}
1196
1197static inline int ipmr_forward_finish(struct sk_buff *skb)
1198{
1199 struct ip_options * opt = &(IPCB(skb)->opt);
1200
7c73a6fa 1201 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1202
1203 if (unlikely(opt->optlen))
1204 ip_forward_options(skb);
1205
1206 return dst_output(skb);
1207}
1208
1209/*
1210 * Processing handlers for ipmr_forward
1211 */
1212
1213static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1214{
eddc9ec5 1215 const struct iphdr *iph = ip_hdr(skb);
cf958ae3 1216 struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
1217 struct net_device *dev;
1218 struct rtable *rt;
1219 int encap = 0;
1220
1221 if (vif->dev == NULL)
1222 goto out_free;
1223
1224#ifdef CONFIG_IP_PIMSM
1225 if (vif->flags & VIFF_REGISTER) {
1226 vif->pkt_out++;
c354e124 1227 vif->bytes_out += skb->len;
cf3677ae
PE
1228 vif->dev->stats.tx_bytes += skb->len;
1229 vif->dev->stats.tx_packets++;
1da177e4
LT
1230 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1231 kfree_skb(skb);
1232 return;
1233 }
1234#endif
1235
1236 if (vif->flags&VIFF_TUNNEL) {
1237 struct flowi fl = { .oif = vif->link,
1238 .nl_u = { .ip4_u =
1239 { .daddr = vif->remote,
1240 .saddr = vif->local,
1241 .tos = RT_TOS(iph->tos) } },
1242 .proto = IPPROTO_IPIP };
f206351a 1243 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1244 goto out_free;
1245 encap = sizeof(struct iphdr);
1246 } else {
1247 struct flowi fl = { .oif = vif->link,
1248 .nl_u = { .ip4_u =
1249 { .daddr = iph->daddr,
1250 .tos = RT_TOS(iph->tos) } },
1251 .proto = IPPROTO_IPIP };
f206351a 1252 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1253 goto out_free;
1254 }
1255
1256 dev = rt->u.dst.dev;
1257
1258 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1259 /* Do not fragment multicasts. Alas, IPv4 does not
1260 allow to send ICMP, so that packets will disappear
1261 to blackhole.
1262 */
1263
7c73a6fa 1264 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1265 ip_rt_put(rt);
1266 goto out_free;
1267 }
1268
1269 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1270
1271 if (skb_cow(skb, encap)) {
e905a9ed 1272 ip_rt_put(rt);
1da177e4
LT
1273 goto out_free;
1274 }
1275
1276 vif->pkt_out++;
c354e124 1277 vif->bytes_out += skb->len;
1da177e4
LT
1278
1279 dst_release(skb->dst);
1280 skb->dst = &rt->u.dst;
eddc9ec5 1281 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1282
1283 /* FIXME: forward and output firewalls used to be called here.
1284 * What do we do with netfilter? -- RR */
1285 if (vif->flags & VIFF_TUNNEL) {
1286 ip_encap(skb, vif->local, vif->remote);
1287 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1288 vif->dev->stats.tx_packets++;
1289 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1290 }
1291
1292 IPCB(skb)->flags |= IPSKB_FORWARDED;
1293
1294 /*
1295 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1296 * not only before forwarding, but after forwarding on all output
1297 * interfaces. It is clear, if mrouter runs a multicasting
1298 * program, it should receive packets not depending to what interface
1299 * program is joined.
1300 * If we will not make it, the program will have to join on all
1301 * interfaces. On the other hand, multihoming host (or router, but
1302 * not mrouter) cannot join to more than one interface - it will
1303 * result in receiving multiple packets.
1304 */
6e23ae2a 1305 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1306 ipmr_forward_finish);
1307 return;
1308
1309out_free:
1310 kfree_skb(skb);
1311 return;
1312}
1313
1314static int ipmr_find_vif(struct net_device *dev)
1315{
1316 int ct;
cf958ae3
BT
1317 for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
1318 if (init_net.ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1319 break;
1320 }
1321 return ct;
1322}
1323
1324/* "local" means that we should preserve one skb (for local delivery) */
1325
1326static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1327{
1328 int psend = -1;
1329 int vif, ct;
1330
1331 vif = cache->mfc_parent;
1332 cache->mfc_un.res.pkt++;
1333 cache->mfc_un.res.bytes += skb->len;
1334
1335 /*
1336 * Wrong interface: drop packet and (maybe) send PIM assert.
1337 */
cf958ae3 1338 if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1339 int true_vifi;
1340
ee6b9673 1341 if (skb->rtable->fl.iif == 0) {
1da177e4
LT
1342 /* It is our own packet, looped back.
1343 Very complicated situation...
1344
1345 The best workaround until routing daemons will be
1346 fixed is not to redistribute packet, if it was
1347 send through wrong interface. It means, that
1348 multicast applications WILL NOT work for
1349 (S,G), which have default multicast route pointing
1350 to wrong oif. In any case, it is not a good
1351 idea to use multicasting applications on router.
1352 */
1353 goto dont_forward;
1354 }
1355
1356 cache->mfc_un.res.wrong_if++;
1357 true_vifi = ipmr_find_vif(skb->dev);
1358
1359 if (true_vifi >= 0 && mroute_do_assert &&
1360 /* pimsm uses asserts, when switching from RPT to SPT,
1361 so that we cannot check that packet arrived on an oif.
1362 It is bad, but otherwise we would need to move pretty
1363 large chunk of pimd to kernel. Ough... --ANK
1364 */
1365 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1366 time_after(jiffies,
1da177e4
LT
1367 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1368 cache->mfc_un.res.last_assert = jiffies;
1369 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1370 }
1371 goto dont_forward;
1372 }
1373
cf958ae3
BT
1374 init_net.ipv4.vif_table[vif].pkt_in++;
1375 init_net.ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1376
1377 /*
1378 * Forward the frame
1379 */
1380 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1381 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1382 if (psend != -1) {
1383 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1384 if (skb2)
1385 ipmr_queue_xmit(skb2, cache, psend);
1386 }
c354e124 1387 psend = ct;
1da177e4
LT
1388 }
1389 }
1390 if (psend != -1) {
1391 if (local) {
1392 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1393 if (skb2)
1394 ipmr_queue_xmit(skb2, cache, psend);
1395 } else {
1396 ipmr_queue_xmit(skb, cache, psend);
1397 return 0;
1398 }
1399 }
1400
1401dont_forward:
1402 if (!local)
1403 kfree_skb(skb);
1404 return 0;
1405}
1406
1407
1408/*
1409 * Multicast packets for forwarding arrive here
1410 */
1411
1412int ip_mr_input(struct sk_buff *skb)
1413{
1414 struct mfc_cache *cache;
ee6b9673 1415 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1da177e4
LT
1416
1417 /* Packet is looped back after forward, it should not be
1418 forwarded second time, but still can be delivered locally.
1419 */
1420 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1421 goto dont_forward;
1422
1423 if (!local) {
1424 if (IPCB(skb)->opt.router_alert) {
1425 if (ip_call_ra_chain(skb))
1426 return 0;
eddc9ec5 1427 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1428 /* IGMPv1 (and broken IGMPv2 implementations sort of
1429 Cisco IOS <= 11.2(8)) do not put router alert
1430 option to IGMP packets destined to routable
1431 groups. It is very bad, because it means
1432 that we can forward NO IGMP messages.
1433 */
1434 read_lock(&mrt_lock);
70a269e6 1435 if (init_net.ipv4.mroute_sk) {
2715bcf9 1436 nf_reset(skb);
70a269e6 1437 raw_rcv(init_net.ipv4.mroute_sk, skb);
1da177e4
LT
1438 read_unlock(&mrt_lock);
1439 return 0;
1440 }
1441 read_unlock(&mrt_lock);
1442 }
1443 }
1444
1445 read_lock(&mrt_lock);
eddc9ec5 1446 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1447
1448 /*
1449 * No usable cache entry
1450 */
c354e124 1451 if (cache == NULL) {
1da177e4
LT
1452 int vif;
1453
1454 if (local) {
1455 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1456 ip_local_deliver(skb);
1457 if (skb2 == NULL) {
1458 read_unlock(&mrt_lock);
1459 return -ENOBUFS;
1460 }
1461 skb = skb2;
1462 }
1463
1464 vif = ipmr_find_vif(skb->dev);
1465 if (vif >= 0) {
1466 int err = ipmr_cache_unresolved(vif, skb);
1467 read_unlock(&mrt_lock);
1468
1469 return err;
1470 }
1471 read_unlock(&mrt_lock);
1472 kfree_skb(skb);
1473 return -ENODEV;
1474 }
1475
1476 ip_mr_forward(skb, cache, local);
1477
1478 read_unlock(&mrt_lock);
1479
1480 if (local)
1481 return ip_local_deliver(skb);
1482
1483 return 0;
1484
1485dont_forward:
1486 if (local)
1487 return ip_local_deliver(skb);
1488 kfree_skb(skb);
1489 return 0;
1490}
1491
b1879204
IJ
1492#ifdef CONFIG_IP_PIMSM
1493static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1494{
b1879204
IJ
1495 struct net_device *reg_dev = NULL;
1496 struct iphdr *encap;
1da177e4 1497
b1879204 1498 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1499 /*
1500 Check that:
1501 a. packet is really destinted to a multicast group
1502 b. packet is not a NULL-REGISTER
1503 c. packet is not truncated
1504 */
f97c1e0c 1505 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1506 encap->tot_len == 0 ||
b1879204
IJ
1507 ntohs(encap->tot_len) + pimlen > skb->len)
1508 return 1;
1da177e4
LT
1509
1510 read_lock(&mrt_lock);
1511 if (reg_vif_num >= 0)
cf958ae3 1512 reg_dev = init_net.ipv4.vif_table[reg_vif_num].dev;
1da177e4
LT
1513 if (reg_dev)
1514 dev_hold(reg_dev);
1515 read_unlock(&mrt_lock);
1516
e905a9ed 1517 if (reg_dev == NULL)
b1879204 1518 return 1;
1da177e4 1519
b0e380b1 1520 skb->mac_header = skb->network_header;
1da177e4 1521 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1522 skb_reset_network_header(skb);
1da177e4 1523 skb->dev = reg_dev;
1da177e4
LT
1524 skb->protocol = htons(ETH_P_IP);
1525 skb->ip_summed = 0;
1526 skb->pkt_type = PACKET_HOST;
1527 dst_release(skb->dst);
1528 skb->dst = NULL;
cf3677ae
PE
1529 reg_dev->stats.rx_bytes += skb->len;
1530 reg_dev->stats.rx_packets++;
1da177e4
LT
1531 nf_reset(skb);
1532 netif_rx(skb);
1533 dev_put(reg_dev);
b1879204 1534
1da177e4 1535 return 0;
b1879204
IJ
1536}
1537#endif
1538
1539#ifdef CONFIG_IP_PIMSM_V1
1540/*
1541 * Handle IGMP messages of PIMv1
1542 */
1543
1544int pim_rcv_v1(struct sk_buff * skb)
1545{
1546 struct igmphdr *pim;
1547
1548 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1549 goto drop;
1550
1551 pim = igmp_hdr(skb);
1552
1553 if (!mroute_do_pim ||
1554 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1555 goto drop;
1556
1557 if (__pim_rcv(skb, sizeof(*pim))) {
1558drop:
1559 kfree_skb(skb);
1560 }
1da177e4
LT
1561 return 0;
1562}
1563#endif
1564
1565#ifdef CONFIG_IP_PIMSM_V2
1566static int pim_rcv(struct sk_buff * skb)
1567{
1568 struct pimreghdr *pim;
1da177e4 1569
b1879204 1570 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1571 goto drop;
1572
9c70220b 1573 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1574 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1575 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1576 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1577 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1578 goto drop;
1579
b1879204
IJ
1580 if (__pim_rcv(skb, sizeof(*pim))) {
1581drop:
1582 kfree_skb(skb);
1583 }
1da177e4
LT
1584 return 0;
1585}
1586#endif
1587
1588static int
1589ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1590{
1591 int ct;
1592 struct rtnexthop *nhp;
cf958ae3 1593 struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
27a884dc 1594 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1595 struct rtattr *mp_head;
1596
1597 if (dev)
1598 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1599
c354e124 1600 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1601
1602 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1603 if (c->mfc_un.res.ttls[ct] < 255) {
1604 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1605 goto rtattr_failure;
c354e124 1606 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1607 nhp->rtnh_flags = 0;
1608 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
cf958ae3 1609 nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1610 nhp->rtnh_len = sizeof(*nhp);
1611 }
1612 }
1613 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1614 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1615 rtm->rtm_type = RTN_MULTICAST;
1616 return 1;
1617
1618rtattr_failure:
dc5fc579 1619 nlmsg_trim(skb, b);
1da177e4
LT
1620 return -EMSGSIZE;
1621}
1622
1623int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1624{
1625 int err;
1626 struct mfc_cache *cache;
ee6b9673 1627 struct rtable *rt = skb->rtable;
1da177e4
LT
1628
1629 read_lock(&mrt_lock);
1630 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1631
c354e124 1632 if (cache == NULL) {
72287490 1633 struct sk_buff *skb2;
eddc9ec5 1634 struct iphdr *iph;
1da177e4
LT
1635 struct net_device *dev;
1636 int vif;
1637
1638 if (nowait) {
1639 read_unlock(&mrt_lock);
1640 return -EAGAIN;
1641 }
1642
1643 dev = skb->dev;
1644 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1645 read_unlock(&mrt_lock);
1646 return -ENODEV;
1647 }
72287490
AK
1648 skb2 = skb_clone(skb, GFP_ATOMIC);
1649 if (!skb2) {
1650 read_unlock(&mrt_lock);
1651 return -ENOMEM;
1652 }
1653
e2d1bca7
ACM
1654 skb_push(skb2, sizeof(struct iphdr));
1655 skb_reset_network_header(skb2);
eddc9ec5
ACM
1656 iph = ip_hdr(skb2);
1657 iph->ihl = sizeof(struct iphdr) >> 2;
1658 iph->saddr = rt->rt_src;
1659 iph->daddr = rt->rt_dst;
1660 iph->version = 0;
72287490 1661 err = ipmr_cache_unresolved(vif, skb2);
1da177e4
LT
1662 read_unlock(&mrt_lock);
1663 return err;
1664 }
1665
1666 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1667 cache->mfc_flags |= MFC_NOTIFY;
1668 err = ipmr_fill_mroute(skb, cache, rtm);
1669 read_unlock(&mrt_lock);
1670 return err;
1671}
1672
e905a9ed 1673#ifdef CONFIG_PROC_FS
1da177e4
LT
1674/*
1675 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1676 */
1677struct ipmr_vif_iter {
1678 int ct;
1679};
1680
1681static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1682 loff_t pos)
1683{
cf958ae3
BT
1684 for (iter->ct = 0; iter->ct < init_net.ipv4.maxvif; ++iter->ct) {
1685 if (!VIF_EXISTS(&init_net, iter->ct))
1da177e4 1686 continue;
e905a9ed 1687 if (pos-- == 0)
cf958ae3 1688 return &init_net.ipv4.vif_table[iter->ct];
1da177e4
LT
1689 }
1690 return NULL;
1691}
1692
1693static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1694 __acquires(mrt_lock)
1da177e4
LT
1695{
1696 read_lock(&mrt_lock);
e905a9ed 1697 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1da177e4
LT
1698 : SEQ_START_TOKEN;
1699}
1700
1701static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1702{
1703 struct ipmr_vif_iter *iter = seq->private;
1704
1705 ++*pos;
1706 if (v == SEQ_START_TOKEN)
1707 return ipmr_vif_seq_idx(iter, 0);
e905a9ed 1708
cf958ae3
BT
1709 while (++iter->ct < init_net.ipv4.maxvif) {
1710 if (!VIF_EXISTS(&init_net, iter->ct))
1da177e4 1711 continue;
cf958ae3 1712 return &init_net.ipv4.vif_table[iter->ct];
1da177e4
LT
1713 }
1714 return NULL;
1715}
1716
1717static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1718 __releases(mrt_lock)
1da177e4
LT
1719{
1720 read_unlock(&mrt_lock);
1721}
1722
1723static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1724{
1725 if (v == SEQ_START_TOKEN) {
e905a9ed 1726 seq_puts(seq,
1da177e4
LT
1727 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1728 } else {
1729 const struct vif_device *vif = v;
1730 const char *name = vif->dev ? vif->dev->name : "none";
1731
1732 seq_printf(seq,
1733 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
cf958ae3 1734 vif - init_net.ipv4.vif_table,
e905a9ed 1735 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1736 vif->bytes_out, vif->pkt_out,
1737 vif->flags, vif->local, vif->remote);
1738 }
1739 return 0;
1740}
1741
f690808e 1742static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1743 .start = ipmr_vif_seq_start,
1744 .next = ipmr_vif_seq_next,
1745 .stop = ipmr_vif_seq_stop,
1746 .show = ipmr_vif_seq_show,
1747};
1748
1749static int ipmr_vif_open(struct inode *inode, struct file *file)
1750{
cf7732e4
PE
1751 return seq_open_private(file, &ipmr_vif_seq_ops,
1752 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1753}
1754
9a32144e 1755static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1756 .owner = THIS_MODULE,
1757 .open = ipmr_vif_open,
1758 .read = seq_read,
1759 .llseek = seq_lseek,
1760 .release = seq_release_private,
1761};
1762
1763struct ipmr_mfc_iter {
1764 struct mfc_cache **cache;
1765 int ct;
1766};
1767
1768
1769static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1770{
1771 struct mfc_cache *mfc;
1772
2bb8b26c 1773 it->cache = init_net.ipv4.mfc_cache_array;
1da177e4 1774 read_lock(&mrt_lock);
e905a9ed 1775 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
2bb8b26c
BT
1776 for (mfc = init_net.ipv4.mfc_cache_array[it->ct];
1777 mfc; mfc = mfc->next)
e905a9ed 1778 if (pos-- == 0)
1da177e4
LT
1779 return mfc;
1780 read_unlock(&mrt_lock);
1781
1782 it->cache = &mfc_unres_queue;
1783 spin_lock_bh(&mfc_unres_lock);
132adf54 1784 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1da177e4
LT
1785 if (pos-- == 0)
1786 return mfc;
1787 spin_unlock_bh(&mfc_unres_lock);
1788
1789 it->cache = NULL;
1790 return NULL;
1791}
1792
1793
1794static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1795{
1796 struct ipmr_mfc_iter *it = seq->private;
1797 it->cache = NULL;
1798 it->ct = 0;
e905a9ed 1799 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1da177e4
LT
1800 : SEQ_START_TOKEN;
1801}
1802
1803static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1804{
1805 struct mfc_cache *mfc = v;
1806 struct ipmr_mfc_iter *it = seq->private;
1807
1808 ++*pos;
1809
1810 if (v == SEQ_START_TOKEN)
1811 return ipmr_mfc_seq_idx(seq->private, 0);
1812
1813 if (mfc->next)
1814 return mfc->next;
e905a9ed
YH
1815
1816 if (it->cache == &mfc_unres_queue)
1da177e4
LT
1817 goto end_of_list;
1818
2bb8b26c 1819 BUG_ON(it->cache != init_net.ipv4.mfc_cache_array);
1da177e4
LT
1820
1821 while (++it->ct < MFC_LINES) {
2bb8b26c 1822 mfc = init_net.ipv4.mfc_cache_array[it->ct];
1da177e4
LT
1823 if (mfc)
1824 return mfc;
1825 }
1826
1827 /* exhausted cache_array, show unresolved */
1828 read_unlock(&mrt_lock);
1829 it->cache = &mfc_unres_queue;
1830 it->ct = 0;
e905a9ed 1831
1da177e4
LT
1832 spin_lock_bh(&mfc_unres_lock);
1833 mfc = mfc_unres_queue;
e905a9ed 1834 if (mfc)
1da177e4
LT
1835 return mfc;
1836
1837 end_of_list:
1838 spin_unlock_bh(&mfc_unres_lock);
1839 it->cache = NULL;
1840
1841 return NULL;
1842}
1843
1844static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1845{
1846 struct ipmr_mfc_iter *it = seq->private;
1847
1848 if (it->cache == &mfc_unres_queue)
1849 spin_unlock_bh(&mfc_unres_lock);
2bb8b26c 1850 else if (it->cache == init_net.ipv4.mfc_cache_array)
1da177e4
LT
1851 read_unlock(&mrt_lock);
1852}
1853
1854static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1855{
1856 int n;
1857
1858 if (v == SEQ_START_TOKEN) {
e905a9ed 1859 seq_puts(seq,
1da177e4
LT
1860 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1861 } else {
1862 const struct mfc_cache *mfc = v;
1863 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1864
999890b2 1865 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1866 (unsigned long) mfc->mfc_mcastgrp,
1867 (unsigned long) mfc->mfc_origin,
1ea472e2 1868 mfc->mfc_parent);
1da177e4
LT
1869
1870 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
1871 seq_printf(seq, " %8lu %8lu %8lu",
1872 mfc->mfc_un.res.pkt,
1873 mfc->mfc_un.res.bytes,
1874 mfc->mfc_un.res.wrong_if);
132adf54
SH
1875 for (n = mfc->mfc_un.res.minvif;
1876 n < mfc->mfc_un.res.maxvif; n++ ) {
cf958ae3
BT
1877 if (VIF_EXISTS(&init_net, n) &&
1878 mfc->mfc_un.res.ttls[n] < 255)
1879 seq_printf(seq,
e905a9ed 1880 " %2d:%-3d",
1da177e4
LT
1881 n, mfc->mfc_un.res.ttls[n]);
1882 }
1ea472e2
BT
1883 } else {
1884 /* unresolved mfc_caches don't contain
1885 * pkt, bytes and wrong_if values
1886 */
1887 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1888 }
1889 seq_putc(seq, '\n');
1890 }
1891 return 0;
1892}
1893
f690808e 1894static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1895 .start = ipmr_mfc_seq_start,
1896 .next = ipmr_mfc_seq_next,
1897 .stop = ipmr_mfc_seq_stop,
1898 .show = ipmr_mfc_seq_show,
1899};
1900
1901static int ipmr_mfc_open(struct inode *inode, struct file *file)
1902{
cf7732e4
PE
1903 return seq_open_private(file, &ipmr_mfc_seq_ops,
1904 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1905}
1906
9a32144e 1907static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1908 .owner = THIS_MODULE,
1909 .open = ipmr_mfc_open,
1910 .read = seq_read,
1911 .llseek = seq_lseek,
1912 .release = seq_release_private,
1913};
e905a9ed 1914#endif
1da177e4
LT
1915
1916#ifdef CONFIG_IP_PIMSM_V2
1917static struct net_protocol pim_protocol = {
1918 .handler = pim_rcv,
1919};
1920#endif
1921
1922
1923/*
1924 * Setup for IP multicast routing
1925 */
cf958ae3
BT
1926static int __net_init ipmr_net_init(struct net *net)
1927{
1928 int err = 0;
1929
1930 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1931 GFP_KERNEL);
1932 if (!net->ipv4.vif_table) {
1933 err = -ENOMEM;
1934 goto fail;
1935 }
2bb8b26c
BT
1936
1937 /* Forwarding cache */
1938 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1939 sizeof(struct mfc_cache *),
1940 GFP_KERNEL);
1941 if (!net->ipv4.mfc_cache_array) {
1942 err = -ENOMEM;
1943 goto fail_mfc_cache;
1944 }
1945 return 0;
1946
1947fail_mfc_cache:
1948 kfree(net->ipv4.vif_table);
cf958ae3
BT
1949fail:
1950 return err;
1951}
1952
1953static void __net_exit ipmr_net_exit(struct net *net)
1954{
2bb8b26c 1955 kfree(net->ipv4.mfc_cache_array);
cf958ae3
BT
1956 kfree(net->ipv4.vif_table);
1957}
1958
1959static struct pernet_operations ipmr_net_ops = {
1960 .init = ipmr_net_init,
1961 .exit = ipmr_net_exit,
1962};
e905a9ed 1963
03d2f897 1964int __init ip_mr_init(void)
1da177e4 1965{
03d2f897
WC
1966 int err;
1967
1da177e4
LT
1968 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1969 sizeof(struct mfc_cache),
e5d679f3 1970 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 1971 NULL);
03d2f897
WC
1972 if (!mrt_cachep)
1973 return -ENOMEM;
1974
cf958ae3
BT
1975 err = register_pernet_subsys(&ipmr_net_ops);
1976 if (err)
1977 goto reg_pernet_fail;
1978
b24b8a24 1979 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
03d2f897
WC
1980 err = register_netdevice_notifier(&ip_mr_notifier);
1981 if (err)
1982 goto reg_notif_fail;
e905a9ed 1983#ifdef CONFIG_PROC_FS
03d2f897
WC
1984 err = -ENOMEM;
1985 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1986 goto proc_vif_fail;
1987 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1988 goto proc_cache_fail;
e905a9ed 1989#endif
03d2f897 1990 return 0;
03d2f897 1991#ifdef CONFIG_PROC_FS
03d2f897
WC
1992proc_cache_fail:
1993 proc_net_remove(&init_net, "ip_mr_vif");
c3e38896
BT
1994proc_vif_fail:
1995 unregister_netdevice_notifier(&ip_mr_notifier);
03d2f897 1996#endif
c3e38896
BT
1997reg_notif_fail:
1998 del_timer(&ipmr_expire_timer);
cf958ae3
BT
1999 unregister_pernet_subsys(&ipmr_net_ops);
2000reg_pernet_fail:
c3e38896 2001 kmem_cache_destroy(mrt_cachep);
03d2f897 2002 return err;
1da177e4 2003}