]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
netns: ipmr: dynamically allocate mfc_cache_array
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
457c4cbc 50#include <net/net_namespace.h>
1da177e4
LT
51#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
14c85021 54#include <net/route.h>
1da177e4
LT
55#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
dc5fc579 64#include <net/netlink.h>
1da177e4
LT
65
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
1da177e4
LT
70/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
cf958ae3 80#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4
LT
81
82static int mroute_do_assert; /* Set in PIM assert */
83static int mroute_do_pim;
84
1da177e4
LT
85static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
86static atomic_t cache_resolve_queue_len; /* Size of unresolved */
87
88/* Special spinlock for queue of unresolved entries */
89static DEFINE_SPINLOCK(mfc_unres_lock);
90
91/* We return to original Alan's scheme. Hash table of resolved
92 entries is changed only in process context and protected
93 with weak lock mrt_lock. Queue of unresolved entries is protected
94 with strong spinlock mfc_unres_lock.
95
96 In this case data path is free of exclusive locks at all.
97 */
98
e18b890b 99static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4
LT
100
101static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
102static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
103static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
104
105#ifdef CONFIG_IP_PIMSM_V2
106static struct net_protocol pim_protocol;
107#endif
108
109static struct timer_list ipmr_expire_timer;
110
111/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
112
d607032d
WC
113static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
114{
115 dev_close(dev);
116
117 dev = __dev_get_by_name(&init_net, "tunl0");
118 if (dev) {
5bc3eb7e 119 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 120 struct ifreq ifr;
d607032d
WC
121 struct ip_tunnel_parm p;
122
123 memset(&p, 0, sizeof(p));
124 p.iph.daddr = v->vifc_rmt_addr.s_addr;
125 p.iph.saddr = v->vifc_lcl_addr.s_addr;
126 p.iph.version = 4;
127 p.iph.ihl = 5;
128 p.iph.protocol = IPPROTO_IPIP;
129 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
130 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
131
5bc3eb7e
SH
132 if (ops->ndo_do_ioctl) {
133 mm_segment_t oldfs = get_fs();
134
135 set_fs(KERNEL_DS);
136 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
137 set_fs(oldfs);
138 }
d607032d
WC
139 }
140}
141
1da177e4
LT
142static
143struct net_device *ipmr_new_tunnel(struct vifctl *v)
144{
145 struct net_device *dev;
146
881d966b 147 dev = __dev_get_by_name(&init_net, "tunl0");
1da177e4
LT
148
149 if (dev) {
5bc3eb7e 150 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
151 int err;
152 struct ifreq ifr;
1da177e4
LT
153 struct ip_tunnel_parm p;
154 struct in_device *in_dev;
155
156 memset(&p, 0, sizeof(p));
157 p.iph.daddr = v->vifc_rmt_addr.s_addr;
158 p.iph.saddr = v->vifc_lcl_addr.s_addr;
159 p.iph.version = 4;
160 p.iph.ihl = 5;
161 p.iph.protocol = IPPROTO_IPIP;
162 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 163 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 164
5bc3eb7e
SH
165 if (ops->ndo_do_ioctl) {
166 mm_segment_t oldfs = get_fs();
167
168 set_fs(KERNEL_DS);
169 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
170 set_fs(oldfs);
171 } else
172 err = -EOPNOTSUPP;
1da177e4
LT
173
174 dev = NULL;
175
881d966b 176 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
1da177e4
LT
177 dev->flags |= IFF_MULTICAST;
178
e5ed6399 179 in_dev = __in_dev_get_rtnl(dev);
71e27da9 180 if (in_dev == NULL)
1da177e4 181 goto failure;
71e27da9
HX
182
183 ipv4_devconf_setall(in_dev);
184 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
185
186 if (dev_open(dev))
187 goto failure;
7dc00c82 188 dev_hold(dev);
1da177e4
LT
189 }
190 }
191 return dev;
192
193failure:
194 /* allow the register to be completed before unregistering. */
195 rtnl_unlock();
196 rtnl_lock();
197
198 unregister_netdevice(dev);
199 return NULL;
200}
201
202#ifdef CONFIG_IP_PIMSM
203
204static int reg_vif_num = -1;
205
206static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
207{
208 read_lock(&mrt_lock);
cf3677ae
PE
209 dev->stats.tx_bytes += skb->len;
210 dev->stats.tx_packets++;
1da177e4
LT
211 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
212 read_unlock(&mrt_lock);
213 kfree_skb(skb);
214 return 0;
215}
216
007c3838
SH
217static const struct net_device_ops reg_vif_netdev_ops = {
218 .ndo_start_xmit = reg_vif_xmit,
219};
220
1da177e4
LT
221static void reg_vif_setup(struct net_device *dev)
222{
223 dev->type = ARPHRD_PIMREG;
46f25dff 224 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 225 dev->flags = IFF_NOARP;
007c3838 226 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4
LT
227 dev->destructor = free_netdev;
228}
229
230static struct net_device *ipmr_reg_vif(void)
231{
232 struct net_device *dev;
233 struct in_device *in_dev;
234
cf3677ae 235 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
236
237 if (dev == NULL)
238 return NULL;
239
240 if (register_netdevice(dev)) {
241 free_netdev(dev);
242 return NULL;
243 }
244 dev->iflink = 0;
245
71e27da9
HX
246 rcu_read_lock();
247 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
248 rcu_read_unlock();
1da177e4 249 goto failure;
71e27da9 250 }
1da177e4 251
71e27da9
HX
252 ipv4_devconf_setall(in_dev);
253 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
254 rcu_read_unlock();
1da177e4
LT
255
256 if (dev_open(dev))
257 goto failure;
258
7dc00c82
WC
259 dev_hold(dev);
260
1da177e4
LT
261 return dev;
262
263failure:
264 /* allow the register to be completed before unregistering. */
265 rtnl_unlock();
266 rtnl_lock();
267
268 unregister_netdevice(dev);
269 return NULL;
270}
271#endif
272
273/*
274 * Delete a VIF entry
7dc00c82 275 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 276 */
e905a9ed 277
7dc00c82 278static int vif_delete(int vifi, int notify)
1da177e4
LT
279{
280 struct vif_device *v;
281 struct net_device *dev;
282 struct in_device *in_dev;
283
cf958ae3 284 if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
1da177e4
LT
285 return -EADDRNOTAVAIL;
286
cf958ae3 287 v = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
288
289 write_lock_bh(&mrt_lock);
290 dev = v->dev;
291 v->dev = NULL;
292
293 if (!dev) {
294 write_unlock_bh(&mrt_lock);
295 return -EADDRNOTAVAIL;
296 }
297
298#ifdef CONFIG_IP_PIMSM
299 if (vifi == reg_vif_num)
300 reg_vif_num = -1;
301#endif
302
cf958ae3 303 if (vifi+1 == init_net.ipv4.maxvif) {
1da177e4
LT
304 int tmp;
305 for (tmp=vifi-1; tmp>=0; tmp--) {
cf958ae3 306 if (VIF_EXISTS(&init_net, tmp))
1da177e4
LT
307 break;
308 }
cf958ae3 309 init_net.ipv4.maxvif = tmp+1;
1da177e4
LT
310 }
311
312 write_unlock_bh(&mrt_lock);
313
314 dev_set_allmulti(dev, -1);
315
e5ed6399 316 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 317 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
318 ip_rt_multicast_event(in_dev);
319 }
320
7dc00c82 321 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
1da177e4
LT
322 unregister_netdevice(dev);
323
324 dev_put(dev);
325 return 0;
326}
327
5c0a66f5
BT
328static inline void ipmr_cache_free(struct mfc_cache *c)
329{
330 release_net(mfc_net(c));
331 kmem_cache_free(mrt_cachep, c);
332}
333
1da177e4
LT
334/* Destroy an unresolved cache entry, killing queued skbs
335 and reporting error to netlink readers.
336 */
337
338static void ipmr_destroy_unres(struct mfc_cache *c)
339{
340 struct sk_buff *skb;
9ef1d4c7 341 struct nlmsgerr *e;
1da177e4
LT
342
343 atomic_dec(&cache_resolve_queue_len);
344
c354e124 345 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 346 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
347 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
348 nlh->nlmsg_type = NLMSG_ERROR;
349 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
350 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
351 e = NLMSG_DATA(nlh);
352 e->error = -ETIMEDOUT;
353 memset(&e->msg, 0, sizeof(e->msg));
2942e900 354
97c53cac 355 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
1da177e4
LT
356 } else
357 kfree_skb(skb);
358 }
359
5c0a66f5 360 ipmr_cache_free(c);
1da177e4
LT
361}
362
363
364/* Single timer process for all the unresolved queue. */
365
366static void ipmr_expire_process(unsigned long dummy)
367{
368 unsigned long now;
369 unsigned long expires;
370 struct mfc_cache *c, **cp;
371
372 if (!spin_trylock(&mfc_unres_lock)) {
373 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
374 return;
375 }
376
377 if (atomic_read(&cache_resolve_queue_len) == 0)
378 goto out;
379
380 now = jiffies;
381 expires = 10*HZ;
382 cp = &mfc_unres_queue;
383
384 while ((c=*cp) != NULL) {
385 if (time_after(c->mfc_un.unres.expires, now)) {
386 unsigned long interval = c->mfc_un.unres.expires - now;
387 if (interval < expires)
388 expires = interval;
389 cp = &c->next;
390 continue;
391 }
392
393 *cp = c->next;
394
395 ipmr_destroy_unres(c);
396 }
397
398 if (atomic_read(&cache_resolve_queue_len))
399 mod_timer(&ipmr_expire_timer, jiffies + expires);
400
401out:
402 spin_unlock(&mfc_unres_lock);
403}
404
405/* Fill oifs list. It is called under write locked mrt_lock. */
406
d1b04c08 407static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
1da177e4
LT
408{
409 int vifi;
410
411 cache->mfc_un.res.minvif = MAXVIFS;
412 cache->mfc_un.res.maxvif = 0;
413 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
414
cf958ae3
BT
415 for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
416 if (VIF_EXISTS(&init_net, vifi) &&
417 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
418 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
419 if (cache->mfc_un.res.minvif > vifi)
420 cache->mfc_un.res.minvif = vifi;
421 if (cache->mfc_un.res.maxvif <= vifi)
422 cache->mfc_un.res.maxvif = vifi + 1;
423 }
424 }
425}
426
427static int vif_add(struct vifctl *vifc, int mrtsock)
428{
429 int vifi = vifc->vifc_vifi;
cf958ae3 430 struct vif_device *v = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
431 struct net_device *dev;
432 struct in_device *in_dev;
d607032d 433 int err;
1da177e4
LT
434
435 /* Is vif busy ? */
cf958ae3 436 if (VIF_EXISTS(&init_net, vifi))
1da177e4
LT
437 return -EADDRINUSE;
438
439 switch (vifc->vifc_flags) {
440#ifdef CONFIG_IP_PIMSM
441 case VIFF_REGISTER:
442 /*
443 * Special Purpose VIF in PIM
444 * All the packets will be sent to the daemon
445 */
446 if (reg_vif_num >= 0)
447 return -EADDRINUSE;
448 dev = ipmr_reg_vif();
449 if (!dev)
450 return -ENOBUFS;
d607032d
WC
451 err = dev_set_allmulti(dev, 1);
452 if (err) {
453 unregister_netdevice(dev);
7dc00c82 454 dev_put(dev);
d607032d
WC
455 return err;
456 }
1da177e4
LT
457 break;
458#endif
e905a9ed 459 case VIFF_TUNNEL:
1da177e4
LT
460 dev = ipmr_new_tunnel(vifc);
461 if (!dev)
462 return -ENOBUFS;
d607032d
WC
463 err = dev_set_allmulti(dev, 1);
464 if (err) {
465 ipmr_del_tunnel(dev, vifc);
7dc00c82 466 dev_put(dev);
d607032d
WC
467 return err;
468 }
1da177e4
LT
469 break;
470 case 0:
1ab35276 471 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
1da177e4
LT
472 if (!dev)
473 return -EADDRNOTAVAIL;
d607032d 474 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
475 if (err) {
476 dev_put(dev);
d607032d 477 return err;
7dc00c82 478 }
1da177e4
LT
479 break;
480 default:
481 return -EINVAL;
482 }
483
e5ed6399 484 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1da177e4 485 return -EADDRNOTAVAIL;
42f811b8 486 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
487 ip_rt_multicast_event(in_dev);
488
489 /*
490 * Fill in the VIF structures
491 */
c354e124
JK
492 v->rate_limit = vifc->vifc_rate_limit;
493 v->local = vifc->vifc_lcl_addr.s_addr;
494 v->remote = vifc->vifc_rmt_addr.s_addr;
495 v->flags = vifc->vifc_flags;
1da177e4
LT
496 if (!mrtsock)
497 v->flags |= VIFF_STATIC;
c354e124 498 v->threshold = vifc->vifc_threshold;
1da177e4
LT
499 v->bytes_in = 0;
500 v->bytes_out = 0;
501 v->pkt_in = 0;
502 v->pkt_out = 0;
503 v->link = dev->ifindex;
504 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
505 v->link = dev->iflink;
506
507 /* And finish update writing critical data */
508 write_lock_bh(&mrt_lock);
c354e124 509 v->dev = dev;
1da177e4
LT
510#ifdef CONFIG_IP_PIMSM
511 if (v->flags&VIFF_REGISTER)
512 reg_vif_num = vifi;
513#endif
cf958ae3
BT
514 if (vifi+1 > init_net.ipv4.maxvif)
515 init_net.ipv4.maxvif = vifi+1;
1da177e4
LT
516 write_unlock_bh(&mrt_lock);
517 return 0;
518}
519
114c7844 520static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
1da177e4 521{
c354e124 522 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
523 struct mfc_cache *c;
524
2bb8b26c 525 for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) {
1da177e4
LT
526 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
527 break;
528 }
529 return c;
530}
531
532/*
533 * Allocate a multicast cache entry
534 */
5c0a66f5 535static struct mfc_cache *ipmr_cache_alloc(struct net *net)
1da177e4 536{
c354e124
JK
537 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
538 if (c == NULL)
1da177e4 539 return NULL;
1da177e4 540 c->mfc_un.res.minvif = MAXVIFS;
5c0a66f5 541 mfc_net_set(c, net);
1da177e4
LT
542 return c;
543}
544
5c0a66f5 545static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
1da177e4 546{
c354e124
JK
547 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
548 if (c == NULL)
1da177e4 549 return NULL;
1da177e4
LT
550 skb_queue_head_init(&c->mfc_un.unres.unresolved);
551 c->mfc_un.unres.expires = jiffies + 10*HZ;
5c0a66f5 552 mfc_net_set(c, net);
1da177e4
LT
553 return c;
554}
555
556/*
557 * A cache entry has gone into a resolved state from queued
558 */
e905a9ed 559
1da177e4
LT
560static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
561{
562 struct sk_buff *skb;
9ef1d4c7 563 struct nlmsgerr *e;
1da177e4
LT
564
565 /*
566 * Play the pending entries through our router
567 */
568
c354e124 569 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 570 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
571 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
572
573 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
574 nlh->nlmsg_len = (skb_tail_pointer(skb) -
575 (u8 *)nlh);
1da177e4
LT
576 } else {
577 nlh->nlmsg_type = NLMSG_ERROR;
578 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
579 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
580 e = NLMSG_DATA(nlh);
581 e->error = -EMSGSIZE;
582 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 583 }
2942e900 584
97c53cac 585 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
1da177e4
LT
586 } else
587 ip_mr_forward(skb, c, 0);
588 }
589}
590
591/*
592 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
593 * expects the following bizarre scheme.
594 *
595 * Called under mrt_lock.
596 */
e905a9ed 597
1da177e4
LT
598static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
599{
600 struct sk_buff *skb;
c9bdd4b5 601 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
602 struct igmphdr *igmp;
603 struct igmpmsg *msg;
604 int ret;
605
606#ifdef CONFIG_IP_PIMSM
607 if (assert == IGMPMSG_WHOLEPKT)
608 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
609 else
610#endif
611 skb = alloc_skb(128, GFP_ATOMIC);
612
132adf54 613 if (!skb)
1da177e4
LT
614 return -ENOBUFS;
615
616#ifdef CONFIG_IP_PIMSM
617 if (assert == IGMPMSG_WHOLEPKT) {
618 /* Ugly, but we have no choice with this interface.
619 Duplicate old header, fix ihl, length etc.
620 And all this only to mangle msg->im_msgtype and
621 to set msg->im_mbz to "mbz" :-)
622 */
878c8145
ACM
623 skb_push(skb, sizeof(struct iphdr));
624 skb_reset_network_header(skb);
badff6d0 625 skb_reset_transport_header(skb);
0272ffc4 626 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 627 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
628 msg->im_msgtype = IGMPMSG_WHOLEPKT;
629 msg->im_mbz = 0;
e905a9ed 630 msg->im_vif = reg_vif_num;
eddc9ec5
ACM
631 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
632 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
633 sizeof(struct iphdr));
e905a9ed 634 } else
1da177e4 635#endif
e905a9ed
YH
636 {
637
1da177e4
LT
638 /*
639 * Copy the IP header
640 */
641
27a884dc 642 skb->network_header = skb->tail;
ddc7b8e3 643 skb_put(skb, ihl);
27d7ff46 644 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
645 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
646 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4
LT
647 msg->im_vif = vifi;
648 skb->dst = dst_clone(pkt->dst);
649
650 /*
651 * Add our header
652 */
653
c354e124 654 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
655 igmp->type =
656 msg->im_msgtype = assert;
657 igmp->code = 0;
eddc9ec5 658 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 659 skb->transport_header = skb->network_header;
e905a9ed 660 }
1da177e4 661
70a269e6 662 if (init_net.ipv4.mroute_sk == NULL) {
1da177e4
LT
663 kfree_skb(skb);
664 return -EINVAL;
665 }
666
667 /*
668 * Deliver to mrouted
669 */
70a269e6
BT
670 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
671 if (ret < 0) {
1da177e4
LT
672 if (net_ratelimit())
673 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
674 kfree_skb(skb);
675 }
676
677 return ret;
678}
679
680/*
681 * Queue a packet for resolution. It gets locked cache entry!
682 */
e905a9ed 683
1da177e4
LT
684static int
685ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
686{
687 int err;
688 struct mfc_cache *c;
eddc9ec5 689 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
690
691 spin_lock_bh(&mfc_unres_lock);
692 for (c=mfc_unres_queue; c; c=c->next) {
eddc9ec5
ACM
693 if (c->mfc_mcastgrp == iph->daddr &&
694 c->mfc_origin == iph->saddr)
1da177e4
LT
695 break;
696 }
697
698 if (c == NULL) {
699 /*
700 * Create a new entry if allowable
701 */
702
c354e124 703 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
5c0a66f5 704 (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
1da177e4
LT
705 spin_unlock_bh(&mfc_unres_lock);
706
707 kfree_skb(skb);
708 return -ENOBUFS;
709 }
710
711 /*
712 * Fill in the new cache entry
713 */
eddc9ec5
ACM
714 c->mfc_parent = -1;
715 c->mfc_origin = iph->saddr;
716 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
717
718 /*
719 * Reflect first query at mrouted.
720 */
721 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
e905a9ed 722 /* If the report failed throw the cache entry
1da177e4
LT
723 out - Brad Parker
724 */
725 spin_unlock_bh(&mfc_unres_lock);
726
5c0a66f5 727 ipmr_cache_free(c);
1da177e4
LT
728 kfree_skb(skb);
729 return err;
730 }
731
732 atomic_inc(&cache_resolve_queue_len);
733 c->next = mfc_unres_queue;
734 mfc_unres_queue = c;
735
736 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
737 }
738
739 /*
740 * See if we can append the packet
741 */
742 if (c->mfc_un.unres.unresolved.qlen>3) {
743 kfree_skb(skb);
744 err = -ENOBUFS;
745 } else {
c354e124 746 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
747 err = 0;
748 }
749
750 spin_unlock_bh(&mfc_unres_lock);
751 return err;
752}
753
754/*
755 * MFC cache manipulation by user space mroute daemon
756 */
757
758static int ipmr_mfc_delete(struct mfcctl *mfc)
759{
760 int line;
761 struct mfc_cache *c, **cp;
762
c354e124 763 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 764
2bb8b26c
BT
765 for (cp = &init_net.ipv4.mfc_cache_array[line];
766 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
767 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
768 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
769 write_lock_bh(&mrt_lock);
770 *cp = c->next;
771 write_unlock_bh(&mrt_lock);
772
5c0a66f5 773 ipmr_cache_free(c);
1da177e4
LT
774 return 0;
775 }
776 }
777 return -ENOENT;
778}
779
780static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
781{
782 int line;
783 struct mfc_cache *uc, *c, **cp;
784
c354e124 785 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 786
2bb8b26c
BT
787 for (cp = &init_net.ipv4.mfc_cache_array[line];
788 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
789 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
790 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
791 break;
792 }
793
794 if (c != NULL) {
795 write_lock_bh(&mrt_lock);
796 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 797 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
798 if (!mrtsock)
799 c->mfc_flags |= MFC_STATIC;
800 write_unlock_bh(&mrt_lock);
801 return 0;
802 }
803
f97c1e0c 804 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
805 return -EINVAL;
806
5c0a66f5 807 c = ipmr_cache_alloc(&init_net);
c354e124 808 if (c == NULL)
1da177e4
LT
809 return -ENOMEM;
810
c354e124
JK
811 c->mfc_origin = mfc->mfcc_origin.s_addr;
812 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
813 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 814 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
815 if (!mrtsock)
816 c->mfc_flags |= MFC_STATIC;
817
818 write_lock_bh(&mrt_lock);
2bb8b26c
BT
819 c->next = init_net.ipv4.mfc_cache_array[line];
820 init_net.ipv4.mfc_cache_array[line] = c;
1da177e4
LT
821 write_unlock_bh(&mrt_lock);
822
823 /*
824 * Check to see if we resolved a queued list. If so we
825 * need to send on the frames and tidy up.
826 */
827 spin_lock_bh(&mfc_unres_lock);
828 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
829 cp = &uc->next) {
830 if (uc->mfc_origin == c->mfc_origin &&
831 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
832 *cp = uc->next;
833 if (atomic_dec_and_test(&cache_resolve_queue_len))
834 del_timer(&ipmr_expire_timer);
835 break;
836 }
837 }
838 spin_unlock_bh(&mfc_unres_lock);
839
840 if (uc) {
841 ipmr_cache_resolve(uc, c);
5c0a66f5 842 ipmr_cache_free(uc);
1da177e4
LT
843 }
844 return 0;
845}
846
847/*
848 * Close the multicast socket, and clear the vif tables etc
849 */
e905a9ed 850
1da177e4
LT
851static void mroute_clean_tables(struct sock *sk)
852{
853 int i;
e905a9ed 854
1da177e4
LT
855 /*
856 * Shut down all active vif entries
857 */
cf958ae3
BT
858 for (i = 0; i < init_net.ipv4.maxvif; i++) {
859 if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
7dc00c82 860 vif_delete(i, 0);
1da177e4
LT
861 }
862
863 /*
864 * Wipe the cache
865 */
c354e124 866 for (i=0; i<MFC_LINES; i++) {
1da177e4
LT
867 struct mfc_cache *c, **cp;
868
2bb8b26c 869 cp = &init_net.ipv4.mfc_cache_array[i];
1da177e4
LT
870 while ((c = *cp) != NULL) {
871 if (c->mfc_flags&MFC_STATIC) {
872 cp = &c->next;
873 continue;
874 }
875 write_lock_bh(&mrt_lock);
876 *cp = c->next;
877 write_unlock_bh(&mrt_lock);
878
5c0a66f5 879 ipmr_cache_free(c);
1da177e4
LT
880 }
881 }
882
883 if (atomic_read(&cache_resolve_queue_len) != 0) {
884 struct mfc_cache *c;
885
886 spin_lock_bh(&mfc_unres_lock);
887 while (mfc_unres_queue != NULL) {
888 c = mfc_unres_queue;
889 mfc_unres_queue = c->next;
890 spin_unlock_bh(&mfc_unres_lock);
891
892 ipmr_destroy_unres(c);
893
894 spin_lock_bh(&mfc_unres_lock);
895 }
896 spin_unlock_bh(&mfc_unres_lock);
897 }
898}
899
900static void mrtsock_destruct(struct sock *sk)
901{
902 rtnl_lock();
70a269e6 903 if (sk == init_net.ipv4.mroute_sk) {
3b1e0a65 904 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
1da177e4
LT
905
906 write_lock_bh(&mrt_lock);
70a269e6 907 init_net.ipv4.mroute_sk = NULL;
1da177e4
LT
908 write_unlock_bh(&mrt_lock);
909
910 mroute_clean_tables(sk);
911 }
912 rtnl_unlock();
913}
914
915/*
916 * Socket options and virtual interface manipulation. The whole
917 * virtual interface system is a complete heap, but unfortunately
918 * that's how BSD mrouted happens to think. Maybe one day with a proper
919 * MOSPF/PIM router set up we can clean this up.
920 */
e905a9ed 921
c354e124 922int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1da177e4
LT
923{
924 int ret;
925 struct vifctl vif;
926 struct mfcctl mfc;
e905a9ed 927
132adf54 928 if (optname != MRT_INIT) {
70a269e6 929 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
930 return -EACCES;
931 }
932
132adf54
SH
933 switch (optname) {
934 case MRT_INIT:
935 if (sk->sk_type != SOCK_RAW ||
936 inet_sk(sk)->num != IPPROTO_IGMP)
937 return -EOPNOTSUPP;
c354e124 938 if (optlen != sizeof(int))
132adf54 939 return -ENOPROTOOPT;
1da177e4 940
132adf54 941 rtnl_lock();
70a269e6 942 if (init_net.ipv4.mroute_sk) {
1da177e4 943 rtnl_unlock();
132adf54
SH
944 return -EADDRINUSE;
945 }
946
947 ret = ip_ra_control(sk, 1, mrtsock_destruct);
948 if (ret == 0) {
949 write_lock_bh(&mrt_lock);
70a269e6 950 init_net.ipv4.mroute_sk = sk;
132adf54
SH
951 write_unlock_bh(&mrt_lock);
952
3b1e0a65 953 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
132adf54
SH
954 }
955 rtnl_unlock();
956 return ret;
957 case MRT_DONE:
70a269e6 958 if (sk != init_net.ipv4.mroute_sk)
132adf54
SH
959 return -EACCES;
960 return ip_ra_control(sk, 0, NULL);
961 case MRT_ADD_VIF:
962 case MRT_DEL_VIF:
c354e124 963 if (optlen != sizeof(vif))
132adf54 964 return -EINVAL;
c354e124 965 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
966 return -EFAULT;
967 if (vif.vifc_vifi >= MAXVIFS)
968 return -ENFILE;
969 rtnl_lock();
c354e124 970 if (optname == MRT_ADD_VIF) {
70a269e6 971 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
132adf54 972 } else {
7dc00c82 973 ret = vif_delete(vif.vifc_vifi, 0);
132adf54
SH
974 }
975 rtnl_unlock();
976 return ret;
1da177e4
LT
977
978 /*
979 * Manipulate the forwarding caches. These live
980 * in a sort of kernel/user symbiosis.
981 */
132adf54
SH
982 case MRT_ADD_MFC:
983 case MRT_DEL_MFC:
c354e124 984 if (optlen != sizeof(mfc))
132adf54 985 return -EINVAL;
c354e124 986 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
987 return -EFAULT;
988 rtnl_lock();
c354e124 989 if (optname == MRT_DEL_MFC)
132adf54
SH
990 ret = ipmr_mfc_delete(&mfc);
991 else
70a269e6 992 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
132adf54
SH
993 rtnl_unlock();
994 return ret;
1da177e4
LT
995 /*
996 * Control PIM assert.
997 */
132adf54
SH
998 case MRT_ASSERT:
999 {
1000 int v;
1001 if (get_user(v,(int __user *)optval))
1002 return -EFAULT;
1003 mroute_do_assert=(v)?1:0;
1004 return 0;
1005 }
1da177e4 1006#ifdef CONFIG_IP_PIMSM
132adf54
SH
1007 case MRT_PIM:
1008 {
ba93ef74
SH
1009 int v;
1010
132adf54
SH
1011 if (get_user(v,(int __user *)optval))
1012 return -EFAULT;
ba93ef74
SH
1013 v = (v) ? 1 : 0;
1014
132adf54
SH
1015 rtnl_lock();
1016 ret = 0;
1017 if (v != mroute_do_pim) {
1018 mroute_do_pim = v;
1019 mroute_do_assert = v;
1da177e4 1020#ifdef CONFIG_IP_PIMSM_V2
132adf54
SH
1021 if (mroute_do_pim)
1022 ret = inet_add_protocol(&pim_protocol,
1023 IPPROTO_PIM);
1024 else
1025 ret = inet_del_protocol(&pim_protocol,
1026 IPPROTO_PIM);
1027 if (ret < 0)
1028 ret = -EAGAIN;
1da177e4 1029#endif
1da177e4 1030 }
132adf54
SH
1031 rtnl_unlock();
1032 return ret;
1033 }
1da177e4 1034#endif
132adf54
SH
1035 /*
1036 * Spurious command, or MRT_VERSION which you cannot
1037 * set.
1038 */
1039 default:
1040 return -ENOPROTOOPT;
1da177e4
LT
1041 }
1042}
1043
1044/*
1045 * Getsock opt support for the multicast routing system.
1046 */
e905a9ed 1047
c354e124 1048int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1049{
1050 int olr;
1051 int val;
1052
c354e124 1053 if (optname != MRT_VERSION &&
1da177e4
LT
1054#ifdef CONFIG_IP_PIMSM
1055 optname!=MRT_PIM &&
1056#endif
1057 optname!=MRT_ASSERT)
1058 return -ENOPROTOOPT;
1059
1060 if (get_user(olr, optlen))
1061 return -EFAULT;
1062
1063 olr = min_t(unsigned int, olr, sizeof(int));
1064 if (olr < 0)
1065 return -EINVAL;
e905a9ed 1066
c354e124 1067 if (put_user(olr, optlen))
1da177e4 1068 return -EFAULT;
c354e124
JK
1069 if (optname == MRT_VERSION)
1070 val = 0x0305;
1da177e4 1071#ifdef CONFIG_IP_PIMSM
c354e124
JK
1072 else if (optname == MRT_PIM)
1073 val = mroute_do_pim;
1da177e4
LT
1074#endif
1075 else
c354e124
JK
1076 val = mroute_do_assert;
1077 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1078 return -EFAULT;
1079 return 0;
1080}
1081
1082/*
1083 * The IP multicast ioctl support routines.
1084 */
e905a9ed 1085
1da177e4
LT
1086int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1087{
1088 struct sioc_sg_req sr;
1089 struct sioc_vif_req vr;
1090 struct vif_device *vif;
1091 struct mfc_cache *c;
e905a9ed 1092
132adf54
SH
1093 switch (cmd) {
1094 case SIOCGETVIFCNT:
c354e124 1095 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1096 return -EFAULT;
cf958ae3 1097 if (vr.vifi >= init_net.ipv4.maxvif)
132adf54
SH
1098 return -EINVAL;
1099 read_lock(&mrt_lock);
cf958ae3
BT
1100 vif = &init_net.ipv4.vif_table[vr.vifi];
1101 if (VIF_EXISTS(&init_net, vr.vifi)) {
c354e124
JK
1102 vr.icount = vif->pkt_in;
1103 vr.ocount = vif->pkt_out;
1104 vr.ibytes = vif->bytes_in;
1105 vr.obytes = vif->bytes_out;
1da177e4 1106 read_unlock(&mrt_lock);
1da177e4 1107
c354e124 1108 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1109 return -EFAULT;
1110 return 0;
1111 }
1112 read_unlock(&mrt_lock);
1113 return -EADDRNOTAVAIL;
1114 case SIOCGETSGCNT:
c354e124 1115 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1116 return -EFAULT;
1117
1118 read_lock(&mrt_lock);
1119 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1120 if (c) {
1121 sr.pktcnt = c->mfc_un.res.pkt;
1122 sr.bytecnt = c->mfc_un.res.bytes;
1123 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1124 read_unlock(&mrt_lock);
132adf54 1125
c354e124 1126 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1127 return -EFAULT;
1128 return 0;
1129 }
1130 read_unlock(&mrt_lock);
1131 return -EADDRNOTAVAIL;
1132 default:
1133 return -ENOIOCTLCMD;
1da177e4
LT
1134 }
1135}
1136
1137
1138static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1139{
e9dc8653 1140 struct net_device *dev = ptr;
1da177e4
LT
1141 struct vif_device *v;
1142 int ct;
e9dc8653 1143
721499e8 1144 if (!net_eq(dev_net(dev), &init_net))
e9dc8653
EB
1145 return NOTIFY_DONE;
1146
1da177e4
LT
1147 if (event != NETDEV_UNREGISTER)
1148 return NOTIFY_DONE;
cf958ae3
BT
1149 v = &init_net.ipv4.vif_table[0];
1150 for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
c354e124 1151 if (v->dev == dev)
7dc00c82 1152 vif_delete(ct, 1);
1da177e4
LT
1153 }
1154 return NOTIFY_DONE;
1155}
1156
1157
c354e124 1158static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1159 .notifier_call = ipmr_device_event,
1160};
1161
1162/*
1163 * Encapsulate a packet by attaching a valid IPIP header to it.
1164 * This avoids tunnel drivers and other mess and gives us the speed so
1165 * important for multicast video.
1166 */
e905a9ed 1167
114c7844 1168static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1169{
8856dfa3 1170 struct iphdr *iph;
eddc9ec5 1171 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1172
1173 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1174 skb->transport_header = skb->network_header;
8856dfa3 1175 skb_reset_network_header(skb);
eddc9ec5 1176 iph = ip_hdr(skb);
1da177e4
LT
1177
1178 iph->version = 4;
e023dd64
ACM
1179 iph->tos = old_iph->tos;
1180 iph->ttl = old_iph->ttl;
1da177e4
LT
1181 iph->frag_off = 0;
1182 iph->daddr = daddr;
1183 iph->saddr = saddr;
1184 iph->protocol = IPPROTO_IPIP;
1185 iph->ihl = 5;
1186 iph->tot_len = htons(skb->len);
1187 ip_select_ident(iph, skb->dst, NULL);
1188 ip_send_check(iph);
1189
1da177e4
LT
1190 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1191 nf_reset(skb);
1192}
1193
1194static inline int ipmr_forward_finish(struct sk_buff *skb)
1195{
1196 struct ip_options * opt = &(IPCB(skb)->opt);
1197
7c73a6fa 1198 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1199
1200 if (unlikely(opt->optlen))
1201 ip_forward_options(skb);
1202
1203 return dst_output(skb);
1204}
1205
1206/*
1207 * Processing handlers for ipmr_forward
1208 */
1209
1210static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1211{
eddc9ec5 1212 const struct iphdr *iph = ip_hdr(skb);
cf958ae3 1213 struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
1214 struct net_device *dev;
1215 struct rtable *rt;
1216 int encap = 0;
1217
1218 if (vif->dev == NULL)
1219 goto out_free;
1220
1221#ifdef CONFIG_IP_PIMSM
1222 if (vif->flags & VIFF_REGISTER) {
1223 vif->pkt_out++;
c354e124 1224 vif->bytes_out += skb->len;
cf3677ae
PE
1225 vif->dev->stats.tx_bytes += skb->len;
1226 vif->dev->stats.tx_packets++;
1da177e4
LT
1227 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1228 kfree_skb(skb);
1229 return;
1230 }
1231#endif
1232
1233 if (vif->flags&VIFF_TUNNEL) {
1234 struct flowi fl = { .oif = vif->link,
1235 .nl_u = { .ip4_u =
1236 { .daddr = vif->remote,
1237 .saddr = vif->local,
1238 .tos = RT_TOS(iph->tos) } },
1239 .proto = IPPROTO_IPIP };
f206351a 1240 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1241 goto out_free;
1242 encap = sizeof(struct iphdr);
1243 } else {
1244 struct flowi fl = { .oif = vif->link,
1245 .nl_u = { .ip4_u =
1246 { .daddr = iph->daddr,
1247 .tos = RT_TOS(iph->tos) } },
1248 .proto = IPPROTO_IPIP };
f206351a 1249 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1250 goto out_free;
1251 }
1252
1253 dev = rt->u.dst.dev;
1254
1255 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1256 /* Do not fragment multicasts. Alas, IPv4 does not
1257 allow to send ICMP, so that packets will disappear
1258 to blackhole.
1259 */
1260
7c73a6fa 1261 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1262 ip_rt_put(rt);
1263 goto out_free;
1264 }
1265
1266 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1267
1268 if (skb_cow(skb, encap)) {
e905a9ed 1269 ip_rt_put(rt);
1da177e4
LT
1270 goto out_free;
1271 }
1272
1273 vif->pkt_out++;
c354e124 1274 vif->bytes_out += skb->len;
1da177e4
LT
1275
1276 dst_release(skb->dst);
1277 skb->dst = &rt->u.dst;
eddc9ec5 1278 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1279
1280 /* FIXME: forward and output firewalls used to be called here.
1281 * What do we do with netfilter? -- RR */
1282 if (vif->flags & VIFF_TUNNEL) {
1283 ip_encap(skb, vif->local, vif->remote);
1284 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1285 vif->dev->stats.tx_packets++;
1286 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1287 }
1288
1289 IPCB(skb)->flags |= IPSKB_FORWARDED;
1290
1291 /*
1292 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1293 * not only before forwarding, but after forwarding on all output
1294 * interfaces. It is clear, if mrouter runs a multicasting
1295 * program, it should receive packets not depending to what interface
1296 * program is joined.
1297 * If we will not make it, the program will have to join on all
1298 * interfaces. On the other hand, multihoming host (or router, but
1299 * not mrouter) cannot join to more than one interface - it will
1300 * result in receiving multiple packets.
1301 */
6e23ae2a 1302 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1303 ipmr_forward_finish);
1304 return;
1305
1306out_free:
1307 kfree_skb(skb);
1308 return;
1309}
1310
1311static int ipmr_find_vif(struct net_device *dev)
1312{
1313 int ct;
cf958ae3
BT
1314 for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
1315 if (init_net.ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1316 break;
1317 }
1318 return ct;
1319}
1320
1321/* "local" means that we should preserve one skb (for local delivery) */
1322
1323static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1324{
1325 int psend = -1;
1326 int vif, ct;
1327
1328 vif = cache->mfc_parent;
1329 cache->mfc_un.res.pkt++;
1330 cache->mfc_un.res.bytes += skb->len;
1331
1332 /*
1333 * Wrong interface: drop packet and (maybe) send PIM assert.
1334 */
cf958ae3 1335 if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1336 int true_vifi;
1337
ee6b9673 1338 if (skb->rtable->fl.iif == 0) {
1da177e4
LT
1339 /* It is our own packet, looped back.
1340 Very complicated situation...
1341
1342 The best workaround until routing daemons will be
1343 fixed is not to redistribute packet, if it was
1344 send through wrong interface. It means, that
1345 multicast applications WILL NOT work for
1346 (S,G), which have default multicast route pointing
1347 to wrong oif. In any case, it is not a good
1348 idea to use multicasting applications on router.
1349 */
1350 goto dont_forward;
1351 }
1352
1353 cache->mfc_un.res.wrong_if++;
1354 true_vifi = ipmr_find_vif(skb->dev);
1355
1356 if (true_vifi >= 0 && mroute_do_assert &&
1357 /* pimsm uses asserts, when switching from RPT to SPT,
1358 so that we cannot check that packet arrived on an oif.
1359 It is bad, but otherwise we would need to move pretty
1360 large chunk of pimd to kernel. Ough... --ANK
1361 */
1362 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1363 time_after(jiffies,
1da177e4
LT
1364 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1365 cache->mfc_un.res.last_assert = jiffies;
1366 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1367 }
1368 goto dont_forward;
1369 }
1370
cf958ae3
BT
1371 init_net.ipv4.vif_table[vif].pkt_in++;
1372 init_net.ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1373
1374 /*
1375 * Forward the frame
1376 */
1377 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1378 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1379 if (psend != -1) {
1380 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1381 if (skb2)
1382 ipmr_queue_xmit(skb2, cache, psend);
1383 }
c354e124 1384 psend = ct;
1da177e4
LT
1385 }
1386 }
1387 if (psend != -1) {
1388 if (local) {
1389 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1390 if (skb2)
1391 ipmr_queue_xmit(skb2, cache, psend);
1392 } else {
1393 ipmr_queue_xmit(skb, cache, psend);
1394 return 0;
1395 }
1396 }
1397
1398dont_forward:
1399 if (!local)
1400 kfree_skb(skb);
1401 return 0;
1402}
1403
1404
1405/*
1406 * Multicast packets for forwarding arrive here
1407 */
1408
1409int ip_mr_input(struct sk_buff *skb)
1410{
1411 struct mfc_cache *cache;
ee6b9673 1412 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1da177e4
LT
1413
1414 /* Packet is looped back after forward, it should not be
1415 forwarded second time, but still can be delivered locally.
1416 */
1417 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1418 goto dont_forward;
1419
1420 if (!local) {
1421 if (IPCB(skb)->opt.router_alert) {
1422 if (ip_call_ra_chain(skb))
1423 return 0;
eddc9ec5 1424 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1425 /* IGMPv1 (and broken IGMPv2 implementations sort of
1426 Cisco IOS <= 11.2(8)) do not put router alert
1427 option to IGMP packets destined to routable
1428 groups. It is very bad, because it means
1429 that we can forward NO IGMP messages.
1430 */
1431 read_lock(&mrt_lock);
70a269e6 1432 if (init_net.ipv4.mroute_sk) {
2715bcf9 1433 nf_reset(skb);
70a269e6 1434 raw_rcv(init_net.ipv4.mroute_sk, skb);
1da177e4
LT
1435 read_unlock(&mrt_lock);
1436 return 0;
1437 }
1438 read_unlock(&mrt_lock);
1439 }
1440 }
1441
1442 read_lock(&mrt_lock);
eddc9ec5 1443 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1444
1445 /*
1446 * No usable cache entry
1447 */
c354e124 1448 if (cache == NULL) {
1da177e4
LT
1449 int vif;
1450
1451 if (local) {
1452 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1453 ip_local_deliver(skb);
1454 if (skb2 == NULL) {
1455 read_unlock(&mrt_lock);
1456 return -ENOBUFS;
1457 }
1458 skb = skb2;
1459 }
1460
1461 vif = ipmr_find_vif(skb->dev);
1462 if (vif >= 0) {
1463 int err = ipmr_cache_unresolved(vif, skb);
1464 read_unlock(&mrt_lock);
1465
1466 return err;
1467 }
1468 read_unlock(&mrt_lock);
1469 kfree_skb(skb);
1470 return -ENODEV;
1471 }
1472
1473 ip_mr_forward(skb, cache, local);
1474
1475 read_unlock(&mrt_lock);
1476
1477 if (local)
1478 return ip_local_deliver(skb);
1479
1480 return 0;
1481
1482dont_forward:
1483 if (local)
1484 return ip_local_deliver(skb);
1485 kfree_skb(skb);
1486 return 0;
1487}
1488
b1879204
IJ
1489#ifdef CONFIG_IP_PIMSM
1490static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1491{
b1879204
IJ
1492 struct net_device *reg_dev = NULL;
1493 struct iphdr *encap;
1da177e4 1494
b1879204 1495 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1496 /*
1497 Check that:
1498 a. packet is really destinted to a multicast group
1499 b. packet is not a NULL-REGISTER
1500 c. packet is not truncated
1501 */
f97c1e0c 1502 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1503 encap->tot_len == 0 ||
b1879204
IJ
1504 ntohs(encap->tot_len) + pimlen > skb->len)
1505 return 1;
1da177e4
LT
1506
1507 read_lock(&mrt_lock);
1508 if (reg_vif_num >= 0)
cf958ae3 1509 reg_dev = init_net.ipv4.vif_table[reg_vif_num].dev;
1da177e4
LT
1510 if (reg_dev)
1511 dev_hold(reg_dev);
1512 read_unlock(&mrt_lock);
1513
e905a9ed 1514 if (reg_dev == NULL)
b1879204 1515 return 1;
1da177e4 1516
b0e380b1 1517 skb->mac_header = skb->network_header;
1da177e4 1518 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1519 skb_reset_network_header(skb);
1da177e4 1520 skb->dev = reg_dev;
1da177e4
LT
1521 skb->protocol = htons(ETH_P_IP);
1522 skb->ip_summed = 0;
1523 skb->pkt_type = PACKET_HOST;
1524 dst_release(skb->dst);
1525 skb->dst = NULL;
cf3677ae
PE
1526 reg_dev->stats.rx_bytes += skb->len;
1527 reg_dev->stats.rx_packets++;
1da177e4
LT
1528 nf_reset(skb);
1529 netif_rx(skb);
1530 dev_put(reg_dev);
b1879204 1531
1da177e4 1532 return 0;
b1879204
IJ
1533}
1534#endif
1535
1536#ifdef CONFIG_IP_PIMSM_V1
1537/*
1538 * Handle IGMP messages of PIMv1
1539 */
1540
1541int pim_rcv_v1(struct sk_buff * skb)
1542{
1543 struct igmphdr *pim;
1544
1545 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1546 goto drop;
1547
1548 pim = igmp_hdr(skb);
1549
1550 if (!mroute_do_pim ||
1551 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1552 goto drop;
1553
1554 if (__pim_rcv(skb, sizeof(*pim))) {
1555drop:
1556 kfree_skb(skb);
1557 }
1da177e4
LT
1558 return 0;
1559}
1560#endif
1561
1562#ifdef CONFIG_IP_PIMSM_V2
1563static int pim_rcv(struct sk_buff * skb)
1564{
1565 struct pimreghdr *pim;
1da177e4 1566
b1879204 1567 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1568 goto drop;
1569
9c70220b 1570 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1571 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1572 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1573 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1574 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1575 goto drop;
1576
b1879204
IJ
1577 if (__pim_rcv(skb, sizeof(*pim))) {
1578drop:
1579 kfree_skb(skb);
1580 }
1da177e4
LT
1581 return 0;
1582}
1583#endif
1584
1585static int
1586ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1587{
1588 int ct;
1589 struct rtnexthop *nhp;
cf958ae3 1590 struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
27a884dc 1591 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1592 struct rtattr *mp_head;
1593
1594 if (dev)
1595 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1596
c354e124 1597 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1598
1599 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1600 if (c->mfc_un.res.ttls[ct] < 255) {
1601 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1602 goto rtattr_failure;
c354e124 1603 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1604 nhp->rtnh_flags = 0;
1605 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
cf958ae3 1606 nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1607 nhp->rtnh_len = sizeof(*nhp);
1608 }
1609 }
1610 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1611 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1612 rtm->rtm_type = RTN_MULTICAST;
1613 return 1;
1614
1615rtattr_failure:
dc5fc579 1616 nlmsg_trim(skb, b);
1da177e4
LT
1617 return -EMSGSIZE;
1618}
1619
1620int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1621{
1622 int err;
1623 struct mfc_cache *cache;
ee6b9673 1624 struct rtable *rt = skb->rtable;
1da177e4
LT
1625
1626 read_lock(&mrt_lock);
1627 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1628
c354e124 1629 if (cache == NULL) {
72287490 1630 struct sk_buff *skb2;
eddc9ec5 1631 struct iphdr *iph;
1da177e4
LT
1632 struct net_device *dev;
1633 int vif;
1634
1635 if (nowait) {
1636 read_unlock(&mrt_lock);
1637 return -EAGAIN;
1638 }
1639
1640 dev = skb->dev;
1641 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1642 read_unlock(&mrt_lock);
1643 return -ENODEV;
1644 }
72287490
AK
1645 skb2 = skb_clone(skb, GFP_ATOMIC);
1646 if (!skb2) {
1647 read_unlock(&mrt_lock);
1648 return -ENOMEM;
1649 }
1650
e2d1bca7
ACM
1651 skb_push(skb2, sizeof(struct iphdr));
1652 skb_reset_network_header(skb2);
eddc9ec5
ACM
1653 iph = ip_hdr(skb2);
1654 iph->ihl = sizeof(struct iphdr) >> 2;
1655 iph->saddr = rt->rt_src;
1656 iph->daddr = rt->rt_dst;
1657 iph->version = 0;
72287490 1658 err = ipmr_cache_unresolved(vif, skb2);
1da177e4
LT
1659 read_unlock(&mrt_lock);
1660 return err;
1661 }
1662
1663 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1664 cache->mfc_flags |= MFC_NOTIFY;
1665 err = ipmr_fill_mroute(skb, cache, rtm);
1666 read_unlock(&mrt_lock);
1667 return err;
1668}
1669
e905a9ed 1670#ifdef CONFIG_PROC_FS
1da177e4
LT
1671/*
1672 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1673 */
1674struct ipmr_vif_iter {
1675 int ct;
1676};
1677
1678static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1679 loff_t pos)
1680{
cf958ae3
BT
1681 for (iter->ct = 0; iter->ct < init_net.ipv4.maxvif; ++iter->ct) {
1682 if (!VIF_EXISTS(&init_net, iter->ct))
1da177e4 1683 continue;
e905a9ed 1684 if (pos-- == 0)
cf958ae3 1685 return &init_net.ipv4.vif_table[iter->ct];
1da177e4
LT
1686 }
1687 return NULL;
1688}
1689
1690static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1691 __acquires(mrt_lock)
1da177e4
LT
1692{
1693 read_lock(&mrt_lock);
e905a9ed 1694 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1da177e4
LT
1695 : SEQ_START_TOKEN;
1696}
1697
1698static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1699{
1700 struct ipmr_vif_iter *iter = seq->private;
1701
1702 ++*pos;
1703 if (v == SEQ_START_TOKEN)
1704 return ipmr_vif_seq_idx(iter, 0);
e905a9ed 1705
cf958ae3
BT
1706 while (++iter->ct < init_net.ipv4.maxvif) {
1707 if (!VIF_EXISTS(&init_net, iter->ct))
1da177e4 1708 continue;
cf958ae3 1709 return &init_net.ipv4.vif_table[iter->ct];
1da177e4
LT
1710 }
1711 return NULL;
1712}
1713
1714static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1715 __releases(mrt_lock)
1da177e4
LT
1716{
1717 read_unlock(&mrt_lock);
1718}
1719
1720static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1721{
1722 if (v == SEQ_START_TOKEN) {
e905a9ed 1723 seq_puts(seq,
1da177e4
LT
1724 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1725 } else {
1726 const struct vif_device *vif = v;
1727 const char *name = vif->dev ? vif->dev->name : "none";
1728
1729 seq_printf(seq,
1730 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
cf958ae3 1731 vif - init_net.ipv4.vif_table,
e905a9ed 1732 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1733 vif->bytes_out, vif->pkt_out,
1734 vif->flags, vif->local, vif->remote);
1735 }
1736 return 0;
1737}
1738
f690808e 1739static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1740 .start = ipmr_vif_seq_start,
1741 .next = ipmr_vif_seq_next,
1742 .stop = ipmr_vif_seq_stop,
1743 .show = ipmr_vif_seq_show,
1744};
1745
1746static int ipmr_vif_open(struct inode *inode, struct file *file)
1747{
cf7732e4
PE
1748 return seq_open_private(file, &ipmr_vif_seq_ops,
1749 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1750}
1751
9a32144e 1752static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1753 .owner = THIS_MODULE,
1754 .open = ipmr_vif_open,
1755 .read = seq_read,
1756 .llseek = seq_lseek,
1757 .release = seq_release_private,
1758};
1759
1760struct ipmr_mfc_iter {
1761 struct mfc_cache **cache;
1762 int ct;
1763};
1764
1765
1766static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1767{
1768 struct mfc_cache *mfc;
1769
2bb8b26c 1770 it->cache = init_net.ipv4.mfc_cache_array;
1da177e4 1771 read_lock(&mrt_lock);
e905a9ed 1772 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
2bb8b26c
BT
1773 for (mfc = init_net.ipv4.mfc_cache_array[it->ct];
1774 mfc; mfc = mfc->next)
e905a9ed 1775 if (pos-- == 0)
1da177e4
LT
1776 return mfc;
1777 read_unlock(&mrt_lock);
1778
1779 it->cache = &mfc_unres_queue;
1780 spin_lock_bh(&mfc_unres_lock);
132adf54 1781 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1da177e4
LT
1782 if (pos-- == 0)
1783 return mfc;
1784 spin_unlock_bh(&mfc_unres_lock);
1785
1786 it->cache = NULL;
1787 return NULL;
1788}
1789
1790
1791static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1792{
1793 struct ipmr_mfc_iter *it = seq->private;
1794 it->cache = NULL;
1795 it->ct = 0;
e905a9ed 1796 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1da177e4
LT
1797 : SEQ_START_TOKEN;
1798}
1799
1800static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1801{
1802 struct mfc_cache *mfc = v;
1803 struct ipmr_mfc_iter *it = seq->private;
1804
1805 ++*pos;
1806
1807 if (v == SEQ_START_TOKEN)
1808 return ipmr_mfc_seq_idx(seq->private, 0);
1809
1810 if (mfc->next)
1811 return mfc->next;
e905a9ed
YH
1812
1813 if (it->cache == &mfc_unres_queue)
1da177e4
LT
1814 goto end_of_list;
1815
2bb8b26c 1816 BUG_ON(it->cache != init_net.ipv4.mfc_cache_array);
1da177e4
LT
1817
1818 while (++it->ct < MFC_LINES) {
2bb8b26c 1819 mfc = init_net.ipv4.mfc_cache_array[it->ct];
1da177e4
LT
1820 if (mfc)
1821 return mfc;
1822 }
1823
1824 /* exhausted cache_array, show unresolved */
1825 read_unlock(&mrt_lock);
1826 it->cache = &mfc_unres_queue;
1827 it->ct = 0;
e905a9ed 1828
1da177e4
LT
1829 spin_lock_bh(&mfc_unres_lock);
1830 mfc = mfc_unres_queue;
e905a9ed 1831 if (mfc)
1da177e4
LT
1832 return mfc;
1833
1834 end_of_list:
1835 spin_unlock_bh(&mfc_unres_lock);
1836 it->cache = NULL;
1837
1838 return NULL;
1839}
1840
1841static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1842{
1843 struct ipmr_mfc_iter *it = seq->private;
1844
1845 if (it->cache == &mfc_unres_queue)
1846 spin_unlock_bh(&mfc_unres_lock);
2bb8b26c 1847 else if (it->cache == init_net.ipv4.mfc_cache_array)
1da177e4
LT
1848 read_unlock(&mrt_lock);
1849}
1850
1851static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1852{
1853 int n;
1854
1855 if (v == SEQ_START_TOKEN) {
e905a9ed 1856 seq_puts(seq,
1da177e4
LT
1857 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1858 } else {
1859 const struct mfc_cache *mfc = v;
1860 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1861
999890b2 1862 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1863 (unsigned long) mfc->mfc_mcastgrp,
1864 (unsigned long) mfc->mfc_origin,
1ea472e2 1865 mfc->mfc_parent);
1da177e4
LT
1866
1867 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
1868 seq_printf(seq, " %8lu %8lu %8lu",
1869 mfc->mfc_un.res.pkt,
1870 mfc->mfc_un.res.bytes,
1871 mfc->mfc_un.res.wrong_if);
132adf54
SH
1872 for (n = mfc->mfc_un.res.minvif;
1873 n < mfc->mfc_un.res.maxvif; n++ ) {
cf958ae3
BT
1874 if (VIF_EXISTS(&init_net, n) &&
1875 mfc->mfc_un.res.ttls[n] < 255)
1876 seq_printf(seq,
e905a9ed 1877 " %2d:%-3d",
1da177e4
LT
1878 n, mfc->mfc_un.res.ttls[n]);
1879 }
1ea472e2
BT
1880 } else {
1881 /* unresolved mfc_caches don't contain
1882 * pkt, bytes and wrong_if values
1883 */
1884 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1885 }
1886 seq_putc(seq, '\n');
1887 }
1888 return 0;
1889}
1890
f690808e 1891static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1892 .start = ipmr_mfc_seq_start,
1893 .next = ipmr_mfc_seq_next,
1894 .stop = ipmr_mfc_seq_stop,
1895 .show = ipmr_mfc_seq_show,
1896};
1897
1898static int ipmr_mfc_open(struct inode *inode, struct file *file)
1899{
cf7732e4
PE
1900 return seq_open_private(file, &ipmr_mfc_seq_ops,
1901 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1902}
1903
9a32144e 1904static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1905 .owner = THIS_MODULE,
1906 .open = ipmr_mfc_open,
1907 .read = seq_read,
1908 .llseek = seq_lseek,
1909 .release = seq_release_private,
1910};
e905a9ed 1911#endif
1da177e4
LT
1912
1913#ifdef CONFIG_IP_PIMSM_V2
1914static struct net_protocol pim_protocol = {
1915 .handler = pim_rcv,
1916};
1917#endif
1918
1919
1920/*
1921 * Setup for IP multicast routing
1922 */
cf958ae3
BT
1923static int __net_init ipmr_net_init(struct net *net)
1924{
1925 int err = 0;
1926
1927 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1928 GFP_KERNEL);
1929 if (!net->ipv4.vif_table) {
1930 err = -ENOMEM;
1931 goto fail;
1932 }
2bb8b26c
BT
1933
1934 /* Forwarding cache */
1935 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1936 sizeof(struct mfc_cache *),
1937 GFP_KERNEL);
1938 if (!net->ipv4.mfc_cache_array) {
1939 err = -ENOMEM;
1940 goto fail_mfc_cache;
1941 }
1942 return 0;
1943
1944fail_mfc_cache:
1945 kfree(net->ipv4.vif_table);
cf958ae3
BT
1946fail:
1947 return err;
1948}
1949
1950static void __net_exit ipmr_net_exit(struct net *net)
1951{
2bb8b26c 1952 kfree(net->ipv4.mfc_cache_array);
cf958ae3
BT
1953 kfree(net->ipv4.vif_table);
1954}
1955
1956static struct pernet_operations ipmr_net_ops = {
1957 .init = ipmr_net_init,
1958 .exit = ipmr_net_exit,
1959};
e905a9ed 1960
03d2f897 1961int __init ip_mr_init(void)
1da177e4 1962{
03d2f897
WC
1963 int err;
1964
1da177e4
LT
1965 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1966 sizeof(struct mfc_cache),
e5d679f3 1967 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 1968 NULL);
03d2f897
WC
1969 if (!mrt_cachep)
1970 return -ENOMEM;
1971
cf958ae3
BT
1972 err = register_pernet_subsys(&ipmr_net_ops);
1973 if (err)
1974 goto reg_pernet_fail;
1975
b24b8a24 1976 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
03d2f897
WC
1977 err = register_netdevice_notifier(&ip_mr_notifier);
1978 if (err)
1979 goto reg_notif_fail;
e905a9ed 1980#ifdef CONFIG_PROC_FS
03d2f897
WC
1981 err = -ENOMEM;
1982 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1983 goto proc_vif_fail;
1984 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1985 goto proc_cache_fail;
e905a9ed 1986#endif
03d2f897 1987 return 0;
03d2f897 1988#ifdef CONFIG_PROC_FS
03d2f897
WC
1989proc_cache_fail:
1990 proc_net_remove(&init_net, "ip_mr_vif");
c3e38896
BT
1991proc_vif_fail:
1992 unregister_netdevice_notifier(&ip_mr_notifier);
03d2f897 1993#endif
c3e38896
BT
1994reg_notif_fail:
1995 del_timer(&ipmr_expire_timer);
cf958ae3
BT
1996 unregister_pernet_subsys(&ipmr_net_ops);
1997reg_pernet_fail:
c3e38896 1998 kmem_cache_destroy(mrt_cachep);
03d2f897 1999 return err;
1da177e4 2000}