]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
netns: ipmr: declare ipmr /proc/net entries per-namespace
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
457c4cbc 50#include <net/net_namespace.h>
1da177e4
LT
51#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
14c85021 54#include <net/route.h>
1da177e4
LT
55#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
dc5fc579 64#include <net/netlink.h>
1da177e4
LT
65
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
1da177e4
LT
70/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
cf958ae3 80#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4 81
1da177e4 82static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
1da177e4
LT
83
84/* Special spinlock for queue of unresolved entries */
85static DEFINE_SPINLOCK(mfc_unres_lock);
86
87/* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
91
92 In this case data path is free of exclusive locks at all.
93 */
94
e18b890b 95static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4
LT
96
97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
99static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
100
101#ifdef CONFIG_IP_PIMSM_V2
102static struct net_protocol pim_protocol;
103#endif
104
105static struct timer_list ipmr_expire_timer;
106
107/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
108
d607032d
WC
109static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
110{
111 dev_close(dev);
112
113 dev = __dev_get_by_name(&init_net, "tunl0");
114 if (dev) {
5bc3eb7e 115 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 116 struct ifreq ifr;
d607032d
WC
117 struct ip_tunnel_parm p;
118
119 memset(&p, 0, sizeof(p));
120 p.iph.daddr = v->vifc_rmt_addr.s_addr;
121 p.iph.saddr = v->vifc_lcl_addr.s_addr;
122 p.iph.version = 4;
123 p.iph.ihl = 5;
124 p.iph.protocol = IPPROTO_IPIP;
125 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
126 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
127
5bc3eb7e
SH
128 if (ops->ndo_do_ioctl) {
129 mm_segment_t oldfs = get_fs();
130
131 set_fs(KERNEL_DS);
132 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
133 set_fs(oldfs);
134 }
d607032d
WC
135 }
136}
137
1da177e4
LT
138static
139struct net_device *ipmr_new_tunnel(struct vifctl *v)
140{
141 struct net_device *dev;
142
881d966b 143 dev = __dev_get_by_name(&init_net, "tunl0");
1da177e4
LT
144
145 if (dev) {
5bc3eb7e 146 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
147 int err;
148 struct ifreq ifr;
1da177e4
LT
149 struct ip_tunnel_parm p;
150 struct in_device *in_dev;
151
152 memset(&p, 0, sizeof(p));
153 p.iph.daddr = v->vifc_rmt_addr.s_addr;
154 p.iph.saddr = v->vifc_lcl_addr.s_addr;
155 p.iph.version = 4;
156 p.iph.ihl = 5;
157 p.iph.protocol = IPPROTO_IPIP;
158 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 159 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 160
5bc3eb7e
SH
161 if (ops->ndo_do_ioctl) {
162 mm_segment_t oldfs = get_fs();
163
164 set_fs(KERNEL_DS);
165 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
166 set_fs(oldfs);
167 } else
168 err = -EOPNOTSUPP;
1da177e4
LT
169
170 dev = NULL;
171
881d966b 172 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
1da177e4
LT
173 dev->flags |= IFF_MULTICAST;
174
e5ed6399 175 in_dev = __in_dev_get_rtnl(dev);
71e27da9 176 if (in_dev == NULL)
1da177e4 177 goto failure;
71e27da9
HX
178
179 ipv4_devconf_setall(in_dev);
180 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
181
182 if (dev_open(dev))
183 goto failure;
7dc00c82 184 dev_hold(dev);
1da177e4
LT
185 }
186 }
187 return dev;
188
189failure:
190 /* allow the register to be completed before unregistering. */
191 rtnl_unlock();
192 rtnl_lock();
193
194 unregister_netdevice(dev);
195 return NULL;
196}
197
198#ifdef CONFIG_IP_PIMSM
199
1da177e4
LT
200static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201{
202 read_lock(&mrt_lock);
cf3677ae
PE
203 dev->stats.tx_bytes += skb->len;
204 dev->stats.tx_packets++;
6c5143db 205 ipmr_cache_report(skb, init_net.ipv4.mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
206 read_unlock(&mrt_lock);
207 kfree_skb(skb);
208 return 0;
209}
210
007c3838
SH
211static const struct net_device_ops reg_vif_netdev_ops = {
212 .ndo_start_xmit = reg_vif_xmit,
213};
214
1da177e4
LT
215static void reg_vif_setup(struct net_device *dev)
216{
217 dev->type = ARPHRD_PIMREG;
46f25dff 218 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 219 dev->flags = IFF_NOARP;
007c3838 220 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4
LT
221 dev->destructor = free_netdev;
222}
223
224static struct net_device *ipmr_reg_vif(void)
225{
226 struct net_device *dev;
227 struct in_device *in_dev;
228
cf3677ae 229 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
230
231 if (dev == NULL)
232 return NULL;
233
234 if (register_netdevice(dev)) {
235 free_netdev(dev);
236 return NULL;
237 }
238 dev->iflink = 0;
239
71e27da9
HX
240 rcu_read_lock();
241 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
242 rcu_read_unlock();
1da177e4 243 goto failure;
71e27da9 244 }
1da177e4 245
71e27da9
HX
246 ipv4_devconf_setall(in_dev);
247 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
248 rcu_read_unlock();
1da177e4
LT
249
250 if (dev_open(dev))
251 goto failure;
252
7dc00c82
WC
253 dev_hold(dev);
254
1da177e4
LT
255 return dev;
256
257failure:
258 /* allow the register to be completed before unregistering. */
259 rtnl_unlock();
260 rtnl_lock();
261
262 unregister_netdevice(dev);
263 return NULL;
264}
265#endif
266
267/*
268 * Delete a VIF entry
7dc00c82 269 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 270 */
e905a9ed 271
7dc00c82 272static int vif_delete(int vifi, int notify)
1da177e4
LT
273{
274 struct vif_device *v;
275 struct net_device *dev;
276 struct in_device *in_dev;
277
cf958ae3 278 if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
1da177e4
LT
279 return -EADDRNOTAVAIL;
280
cf958ae3 281 v = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
282
283 write_lock_bh(&mrt_lock);
284 dev = v->dev;
285 v->dev = NULL;
286
287 if (!dev) {
288 write_unlock_bh(&mrt_lock);
289 return -EADDRNOTAVAIL;
290 }
291
292#ifdef CONFIG_IP_PIMSM
6c5143db
BT
293 if (vifi == init_net.ipv4.mroute_reg_vif_num)
294 init_net.ipv4.mroute_reg_vif_num = -1;
1da177e4
LT
295#endif
296
cf958ae3 297 if (vifi+1 == init_net.ipv4.maxvif) {
1da177e4
LT
298 int tmp;
299 for (tmp=vifi-1; tmp>=0; tmp--) {
cf958ae3 300 if (VIF_EXISTS(&init_net, tmp))
1da177e4
LT
301 break;
302 }
cf958ae3 303 init_net.ipv4.maxvif = tmp+1;
1da177e4
LT
304 }
305
306 write_unlock_bh(&mrt_lock);
307
308 dev_set_allmulti(dev, -1);
309
e5ed6399 310 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 311 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
312 ip_rt_multicast_event(in_dev);
313 }
314
7dc00c82 315 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
1da177e4
LT
316 unregister_netdevice(dev);
317
318 dev_put(dev);
319 return 0;
320}
321
5c0a66f5
BT
322static inline void ipmr_cache_free(struct mfc_cache *c)
323{
324 release_net(mfc_net(c));
325 kmem_cache_free(mrt_cachep, c);
326}
327
1da177e4
LT
328/* Destroy an unresolved cache entry, killing queued skbs
329 and reporting error to netlink readers.
330 */
331
332static void ipmr_destroy_unres(struct mfc_cache *c)
333{
334 struct sk_buff *skb;
9ef1d4c7 335 struct nlmsgerr *e;
1da177e4 336
1e8fb3b6 337 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
1da177e4 338
c354e124 339 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 340 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
341 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
342 nlh->nlmsg_type = NLMSG_ERROR;
343 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
344 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
345 e = NLMSG_DATA(nlh);
346 e->error = -ETIMEDOUT;
347 memset(&e->msg, 0, sizeof(e->msg));
2942e900 348
97c53cac 349 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
1da177e4
LT
350 } else
351 kfree_skb(skb);
352 }
353
5c0a66f5 354 ipmr_cache_free(c);
1da177e4
LT
355}
356
357
358/* Single timer process for all the unresolved queue. */
359
360static void ipmr_expire_process(unsigned long dummy)
361{
362 unsigned long now;
363 unsigned long expires;
364 struct mfc_cache *c, **cp;
365
366 if (!spin_trylock(&mfc_unres_lock)) {
367 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
368 return;
369 }
370
1e8fb3b6 371 if (mfc_unres_queue == NULL)
1da177e4
LT
372 goto out;
373
374 now = jiffies;
375 expires = 10*HZ;
376 cp = &mfc_unres_queue;
377
378 while ((c=*cp) != NULL) {
379 if (time_after(c->mfc_un.unres.expires, now)) {
380 unsigned long interval = c->mfc_un.unres.expires - now;
381 if (interval < expires)
382 expires = interval;
383 cp = &c->next;
384 continue;
385 }
386
387 *cp = c->next;
388
389 ipmr_destroy_unres(c);
390 }
391
1e8fb3b6 392 if (mfc_unres_queue != NULL)
1da177e4
LT
393 mod_timer(&ipmr_expire_timer, jiffies + expires);
394
395out:
396 spin_unlock(&mfc_unres_lock);
397}
398
399/* Fill oifs list. It is called under write locked mrt_lock. */
400
d1b04c08 401static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
1da177e4
LT
402{
403 int vifi;
404
405 cache->mfc_un.res.minvif = MAXVIFS;
406 cache->mfc_un.res.maxvif = 0;
407 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
408
cf958ae3
BT
409 for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
410 if (VIF_EXISTS(&init_net, vifi) &&
411 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
412 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
413 if (cache->mfc_un.res.minvif > vifi)
414 cache->mfc_un.res.minvif = vifi;
415 if (cache->mfc_un.res.maxvif <= vifi)
416 cache->mfc_un.res.maxvif = vifi + 1;
417 }
418 }
419}
420
421static int vif_add(struct vifctl *vifc, int mrtsock)
422{
423 int vifi = vifc->vifc_vifi;
cf958ae3 424 struct vif_device *v = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
425 struct net_device *dev;
426 struct in_device *in_dev;
d607032d 427 int err;
1da177e4
LT
428
429 /* Is vif busy ? */
cf958ae3 430 if (VIF_EXISTS(&init_net, vifi))
1da177e4
LT
431 return -EADDRINUSE;
432
433 switch (vifc->vifc_flags) {
434#ifdef CONFIG_IP_PIMSM
435 case VIFF_REGISTER:
436 /*
437 * Special Purpose VIF in PIM
438 * All the packets will be sent to the daemon
439 */
6c5143db 440 if (init_net.ipv4.mroute_reg_vif_num >= 0)
1da177e4
LT
441 return -EADDRINUSE;
442 dev = ipmr_reg_vif();
443 if (!dev)
444 return -ENOBUFS;
d607032d
WC
445 err = dev_set_allmulti(dev, 1);
446 if (err) {
447 unregister_netdevice(dev);
7dc00c82 448 dev_put(dev);
d607032d
WC
449 return err;
450 }
1da177e4
LT
451 break;
452#endif
e905a9ed 453 case VIFF_TUNNEL:
1da177e4
LT
454 dev = ipmr_new_tunnel(vifc);
455 if (!dev)
456 return -ENOBUFS;
d607032d
WC
457 err = dev_set_allmulti(dev, 1);
458 if (err) {
459 ipmr_del_tunnel(dev, vifc);
7dc00c82 460 dev_put(dev);
d607032d
WC
461 return err;
462 }
1da177e4
LT
463 break;
464 case 0:
1ab35276 465 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
1da177e4
LT
466 if (!dev)
467 return -EADDRNOTAVAIL;
d607032d 468 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
469 if (err) {
470 dev_put(dev);
d607032d 471 return err;
7dc00c82 472 }
1da177e4
LT
473 break;
474 default:
475 return -EINVAL;
476 }
477
e5ed6399 478 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1da177e4 479 return -EADDRNOTAVAIL;
42f811b8 480 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
481 ip_rt_multicast_event(in_dev);
482
483 /*
484 * Fill in the VIF structures
485 */
c354e124
JK
486 v->rate_limit = vifc->vifc_rate_limit;
487 v->local = vifc->vifc_lcl_addr.s_addr;
488 v->remote = vifc->vifc_rmt_addr.s_addr;
489 v->flags = vifc->vifc_flags;
1da177e4
LT
490 if (!mrtsock)
491 v->flags |= VIFF_STATIC;
c354e124 492 v->threshold = vifc->vifc_threshold;
1da177e4
LT
493 v->bytes_in = 0;
494 v->bytes_out = 0;
495 v->pkt_in = 0;
496 v->pkt_out = 0;
497 v->link = dev->ifindex;
498 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
499 v->link = dev->iflink;
500
501 /* And finish update writing critical data */
502 write_lock_bh(&mrt_lock);
c354e124 503 v->dev = dev;
1da177e4
LT
504#ifdef CONFIG_IP_PIMSM
505 if (v->flags&VIFF_REGISTER)
6c5143db 506 init_net.ipv4.mroute_reg_vif_num = vifi;
1da177e4 507#endif
cf958ae3
BT
508 if (vifi+1 > init_net.ipv4.maxvif)
509 init_net.ipv4.maxvif = vifi+1;
1da177e4
LT
510 write_unlock_bh(&mrt_lock);
511 return 0;
512}
513
114c7844 514static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
1da177e4 515{
c354e124 516 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
517 struct mfc_cache *c;
518
2bb8b26c 519 for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) {
1da177e4
LT
520 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
521 break;
522 }
523 return c;
524}
525
526/*
527 * Allocate a multicast cache entry
528 */
5c0a66f5 529static struct mfc_cache *ipmr_cache_alloc(struct net *net)
1da177e4 530{
c354e124
JK
531 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
532 if (c == NULL)
1da177e4 533 return NULL;
1da177e4 534 c->mfc_un.res.minvif = MAXVIFS;
5c0a66f5 535 mfc_net_set(c, net);
1da177e4
LT
536 return c;
537}
538
5c0a66f5 539static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
1da177e4 540{
c354e124
JK
541 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
542 if (c == NULL)
1da177e4 543 return NULL;
1da177e4
LT
544 skb_queue_head_init(&c->mfc_un.unres.unresolved);
545 c->mfc_un.unres.expires = jiffies + 10*HZ;
5c0a66f5 546 mfc_net_set(c, net);
1da177e4
LT
547 return c;
548}
549
550/*
551 * A cache entry has gone into a resolved state from queued
552 */
e905a9ed 553
1da177e4
LT
554static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
555{
556 struct sk_buff *skb;
9ef1d4c7 557 struct nlmsgerr *e;
1da177e4
LT
558
559 /*
560 * Play the pending entries through our router
561 */
562
c354e124 563 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 564 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
565 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
566
567 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
568 nlh->nlmsg_len = (skb_tail_pointer(skb) -
569 (u8 *)nlh);
1da177e4
LT
570 } else {
571 nlh->nlmsg_type = NLMSG_ERROR;
572 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
573 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
574 e = NLMSG_DATA(nlh);
575 e->error = -EMSGSIZE;
576 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 577 }
2942e900 578
97c53cac 579 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
1da177e4
LT
580 } else
581 ip_mr_forward(skb, c, 0);
582 }
583}
584
585/*
586 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
587 * expects the following bizarre scheme.
588 *
589 * Called under mrt_lock.
590 */
e905a9ed 591
1da177e4
LT
592static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
593{
594 struct sk_buff *skb;
c9bdd4b5 595 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
596 struct igmphdr *igmp;
597 struct igmpmsg *msg;
598 int ret;
599
600#ifdef CONFIG_IP_PIMSM
601 if (assert == IGMPMSG_WHOLEPKT)
602 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
603 else
604#endif
605 skb = alloc_skb(128, GFP_ATOMIC);
606
132adf54 607 if (!skb)
1da177e4
LT
608 return -ENOBUFS;
609
610#ifdef CONFIG_IP_PIMSM
611 if (assert == IGMPMSG_WHOLEPKT) {
612 /* Ugly, but we have no choice with this interface.
613 Duplicate old header, fix ihl, length etc.
614 And all this only to mangle msg->im_msgtype and
615 to set msg->im_mbz to "mbz" :-)
616 */
878c8145
ACM
617 skb_push(skb, sizeof(struct iphdr));
618 skb_reset_network_header(skb);
badff6d0 619 skb_reset_transport_header(skb);
0272ffc4 620 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 621 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
622 msg->im_msgtype = IGMPMSG_WHOLEPKT;
623 msg->im_mbz = 0;
6c5143db 624 msg->im_vif = init_net.ipv4.mroute_reg_vif_num;
eddc9ec5
ACM
625 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
626 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
627 sizeof(struct iphdr));
e905a9ed 628 } else
1da177e4 629#endif
e905a9ed
YH
630 {
631
1da177e4
LT
632 /*
633 * Copy the IP header
634 */
635
27a884dc 636 skb->network_header = skb->tail;
ddc7b8e3 637 skb_put(skb, ihl);
27d7ff46 638 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
639 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
640 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4
LT
641 msg->im_vif = vifi;
642 skb->dst = dst_clone(pkt->dst);
643
644 /*
645 * Add our header
646 */
647
c354e124 648 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
649 igmp->type =
650 msg->im_msgtype = assert;
651 igmp->code = 0;
eddc9ec5 652 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 653 skb->transport_header = skb->network_header;
e905a9ed 654 }
1da177e4 655
70a269e6 656 if (init_net.ipv4.mroute_sk == NULL) {
1da177e4
LT
657 kfree_skb(skb);
658 return -EINVAL;
659 }
660
661 /*
662 * Deliver to mrouted
663 */
70a269e6
BT
664 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
665 if (ret < 0) {
1da177e4
LT
666 if (net_ratelimit())
667 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
668 kfree_skb(skb);
669 }
670
671 return ret;
672}
673
674/*
675 * Queue a packet for resolution. It gets locked cache entry!
676 */
e905a9ed 677
1da177e4
LT
678static int
679ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
680{
681 int err;
682 struct mfc_cache *c;
eddc9ec5 683 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
684
685 spin_lock_bh(&mfc_unres_lock);
686 for (c=mfc_unres_queue; c; c=c->next) {
1e8fb3b6
BT
687 if (net_eq(mfc_net(c), &init_net) &&
688 c->mfc_mcastgrp == iph->daddr &&
eddc9ec5 689 c->mfc_origin == iph->saddr)
1da177e4
LT
690 break;
691 }
692
693 if (c == NULL) {
694 /*
695 * Create a new entry if allowable
696 */
697
1e8fb3b6 698 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 ||
5c0a66f5 699 (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
1da177e4
LT
700 spin_unlock_bh(&mfc_unres_lock);
701
702 kfree_skb(skb);
703 return -ENOBUFS;
704 }
705
706 /*
707 * Fill in the new cache entry
708 */
eddc9ec5
ACM
709 c->mfc_parent = -1;
710 c->mfc_origin = iph->saddr;
711 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
712
713 /*
714 * Reflect first query at mrouted.
715 */
716 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
e905a9ed 717 /* If the report failed throw the cache entry
1da177e4
LT
718 out - Brad Parker
719 */
720 spin_unlock_bh(&mfc_unres_lock);
721
5c0a66f5 722 ipmr_cache_free(c);
1da177e4
LT
723 kfree_skb(skb);
724 return err;
725 }
726
1e8fb3b6 727 atomic_inc(&init_net.ipv4.cache_resolve_queue_len);
1da177e4
LT
728 c->next = mfc_unres_queue;
729 mfc_unres_queue = c;
730
731 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
732 }
733
734 /*
735 * See if we can append the packet
736 */
737 if (c->mfc_un.unres.unresolved.qlen>3) {
738 kfree_skb(skb);
739 err = -ENOBUFS;
740 } else {
c354e124 741 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
742 err = 0;
743 }
744
745 spin_unlock_bh(&mfc_unres_lock);
746 return err;
747}
748
749/*
750 * MFC cache manipulation by user space mroute daemon
751 */
752
753static int ipmr_mfc_delete(struct mfcctl *mfc)
754{
755 int line;
756 struct mfc_cache *c, **cp;
757
c354e124 758 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 759
2bb8b26c
BT
760 for (cp = &init_net.ipv4.mfc_cache_array[line];
761 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
762 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
763 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
764 write_lock_bh(&mrt_lock);
765 *cp = c->next;
766 write_unlock_bh(&mrt_lock);
767
5c0a66f5 768 ipmr_cache_free(c);
1da177e4
LT
769 return 0;
770 }
771 }
772 return -ENOENT;
773}
774
775static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
776{
777 int line;
778 struct mfc_cache *uc, *c, **cp;
779
c354e124 780 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 781
2bb8b26c
BT
782 for (cp = &init_net.ipv4.mfc_cache_array[line];
783 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
784 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
785 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
786 break;
787 }
788
789 if (c != NULL) {
790 write_lock_bh(&mrt_lock);
791 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 792 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
793 if (!mrtsock)
794 c->mfc_flags |= MFC_STATIC;
795 write_unlock_bh(&mrt_lock);
796 return 0;
797 }
798
f97c1e0c 799 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
800 return -EINVAL;
801
5c0a66f5 802 c = ipmr_cache_alloc(&init_net);
c354e124 803 if (c == NULL)
1da177e4
LT
804 return -ENOMEM;
805
c354e124
JK
806 c->mfc_origin = mfc->mfcc_origin.s_addr;
807 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
808 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 809 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
810 if (!mrtsock)
811 c->mfc_flags |= MFC_STATIC;
812
813 write_lock_bh(&mrt_lock);
2bb8b26c
BT
814 c->next = init_net.ipv4.mfc_cache_array[line];
815 init_net.ipv4.mfc_cache_array[line] = c;
1da177e4
LT
816 write_unlock_bh(&mrt_lock);
817
818 /*
819 * Check to see if we resolved a queued list. If so we
820 * need to send on the frames and tidy up.
821 */
822 spin_lock_bh(&mfc_unres_lock);
823 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
824 cp = &uc->next) {
1e8fb3b6
BT
825 if (net_eq(mfc_net(uc), &init_net) &&
826 uc->mfc_origin == c->mfc_origin &&
1da177e4
LT
827 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
828 *cp = uc->next;
1e8fb3b6 829 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
1da177e4
LT
830 break;
831 }
832 }
1e8fb3b6
BT
833 if (mfc_unres_queue == NULL)
834 del_timer(&ipmr_expire_timer);
1da177e4
LT
835 spin_unlock_bh(&mfc_unres_lock);
836
837 if (uc) {
838 ipmr_cache_resolve(uc, c);
5c0a66f5 839 ipmr_cache_free(uc);
1da177e4
LT
840 }
841 return 0;
842}
843
844/*
845 * Close the multicast socket, and clear the vif tables etc
846 */
e905a9ed 847
1da177e4
LT
848static void mroute_clean_tables(struct sock *sk)
849{
850 int i;
e905a9ed 851
1da177e4
LT
852 /*
853 * Shut down all active vif entries
854 */
cf958ae3
BT
855 for (i = 0; i < init_net.ipv4.maxvif; i++) {
856 if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
7dc00c82 857 vif_delete(i, 0);
1da177e4
LT
858 }
859
860 /*
861 * Wipe the cache
862 */
c354e124 863 for (i=0; i<MFC_LINES; i++) {
1da177e4
LT
864 struct mfc_cache *c, **cp;
865
2bb8b26c 866 cp = &init_net.ipv4.mfc_cache_array[i];
1da177e4
LT
867 while ((c = *cp) != NULL) {
868 if (c->mfc_flags&MFC_STATIC) {
869 cp = &c->next;
870 continue;
871 }
872 write_lock_bh(&mrt_lock);
873 *cp = c->next;
874 write_unlock_bh(&mrt_lock);
875
5c0a66f5 876 ipmr_cache_free(c);
1da177e4
LT
877 }
878 }
879
1e8fb3b6
BT
880 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) {
881 struct mfc_cache *c, **cp;
1da177e4
LT
882
883 spin_lock_bh(&mfc_unres_lock);
1e8fb3b6
BT
884 cp = &mfc_unres_queue;
885 while ((c = *cp) != NULL) {
886 if (!net_eq(mfc_net(c), &init_net)) {
887 cp = &c->next;
888 continue;
889 }
890 *cp = c->next;
1da177e4
LT
891
892 ipmr_destroy_unres(c);
1da177e4
LT
893 }
894 spin_unlock_bh(&mfc_unres_lock);
895 }
896}
897
898static void mrtsock_destruct(struct sock *sk)
899{
900 rtnl_lock();
70a269e6 901 if (sk == init_net.ipv4.mroute_sk) {
3b1e0a65 902 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
1da177e4
LT
903
904 write_lock_bh(&mrt_lock);
70a269e6 905 init_net.ipv4.mroute_sk = NULL;
1da177e4
LT
906 write_unlock_bh(&mrt_lock);
907
908 mroute_clean_tables(sk);
909 }
910 rtnl_unlock();
911}
912
913/*
914 * Socket options and virtual interface manipulation. The whole
915 * virtual interface system is a complete heap, but unfortunately
916 * that's how BSD mrouted happens to think. Maybe one day with a proper
917 * MOSPF/PIM router set up we can clean this up.
918 */
e905a9ed 919
c354e124 920int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1da177e4
LT
921{
922 int ret;
923 struct vifctl vif;
924 struct mfcctl mfc;
e905a9ed 925
132adf54 926 if (optname != MRT_INIT) {
70a269e6 927 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
928 return -EACCES;
929 }
930
132adf54
SH
931 switch (optname) {
932 case MRT_INIT:
933 if (sk->sk_type != SOCK_RAW ||
934 inet_sk(sk)->num != IPPROTO_IGMP)
935 return -EOPNOTSUPP;
c354e124 936 if (optlen != sizeof(int))
132adf54 937 return -ENOPROTOOPT;
1da177e4 938
132adf54 939 rtnl_lock();
70a269e6 940 if (init_net.ipv4.mroute_sk) {
1da177e4 941 rtnl_unlock();
132adf54
SH
942 return -EADDRINUSE;
943 }
944
945 ret = ip_ra_control(sk, 1, mrtsock_destruct);
946 if (ret == 0) {
947 write_lock_bh(&mrt_lock);
70a269e6 948 init_net.ipv4.mroute_sk = sk;
132adf54
SH
949 write_unlock_bh(&mrt_lock);
950
3b1e0a65 951 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
132adf54
SH
952 }
953 rtnl_unlock();
954 return ret;
955 case MRT_DONE:
70a269e6 956 if (sk != init_net.ipv4.mroute_sk)
132adf54
SH
957 return -EACCES;
958 return ip_ra_control(sk, 0, NULL);
959 case MRT_ADD_VIF:
960 case MRT_DEL_VIF:
c354e124 961 if (optlen != sizeof(vif))
132adf54 962 return -EINVAL;
c354e124 963 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
964 return -EFAULT;
965 if (vif.vifc_vifi >= MAXVIFS)
966 return -ENFILE;
967 rtnl_lock();
c354e124 968 if (optname == MRT_ADD_VIF) {
70a269e6 969 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
132adf54 970 } else {
7dc00c82 971 ret = vif_delete(vif.vifc_vifi, 0);
132adf54
SH
972 }
973 rtnl_unlock();
974 return ret;
1da177e4
LT
975
976 /*
977 * Manipulate the forwarding caches. These live
978 * in a sort of kernel/user symbiosis.
979 */
132adf54
SH
980 case MRT_ADD_MFC:
981 case MRT_DEL_MFC:
c354e124 982 if (optlen != sizeof(mfc))
132adf54 983 return -EINVAL;
c354e124 984 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
985 return -EFAULT;
986 rtnl_lock();
c354e124 987 if (optname == MRT_DEL_MFC)
132adf54
SH
988 ret = ipmr_mfc_delete(&mfc);
989 else
70a269e6 990 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
132adf54
SH
991 rtnl_unlock();
992 return ret;
1da177e4
LT
993 /*
994 * Control PIM assert.
995 */
132adf54
SH
996 case MRT_ASSERT:
997 {
998 int v;
999 if (get_user(v,(int __user *)optval))
1000 return -EFAULT;
6f9374a9 1001 init_net.ipv4.mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1002 return 0;
1003 }
1da177e4 1004#ifdef CONFIG_IP_PIMSM
132adf54
SH
1005 case MRT_PIM:
1006 {
ba93ef74
SH
1007 int v;
1008
132adf54
SH
1009 if (get_user(v,(int __user *)optval))
1010 return -EFAULT;
ba93ef74
SH
1011 v = (v) ? 1 : 0;
1012
132adf54
SH
1013 rtnl_lock();
1014 ret = 0;
6f9374a9
BT
1015 if (v != init_net.ipv4.mroute_do_pim) {
1016 init_net.ipv4.mroute_do_pim = v;
1017 init_net.ipv4.mroute_do_assert = v;
1da177e4 1018#ifdef CONFIG_IP_PIMSM_V2
6f9374a9 1019 if (init_net.ipv4.mroute_do_pim)
132adf54
SH
1020 ret = inet_add_protocol(&pim_protocol,
1021 IPPROTO_PIM);
1022 else
1023 ret = inet_del_protocol(&pim_protocol,
1024 IPPROTO_PIM);
1025 if (ret < 0)
1026 ret = -EAGAIN;
1da177e4 1027#endif
1da177e4 1028 }
132adf54
SH
1029 rtnl_unlock();
1030 return ret;
1031 }
1da177e4 1032#endif
132adf54
SH
1033 /*
1034 * Spurious command, or MRT_VERSION which you cannot
1035 * set.
1036 */
1037 default:
1038 return -ENOPROTOOPT;
1da177e4
LT
1039 }
1040}
1041
1042/*
1043 * Getsock opt support for the multicast routing system.
1044 */
e905a9ed 1045
c354e124 1046int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1047{
1048 int olr;
1049 int val;
1050
c354e124 1051 if (optname != MRT_VERSION &&
1da177e4
LT
1052#ifdef CONFIG_IP_PIMSM
1053 optname!=MRT_PIM &&
1054#endif
1055 optname!=MRT_ASSERT)
1056 return -ENOPROTOOPT;
1057
1058 if (get_user(olr, optlen))
1059 return -EFAULT;
1060
1061 olr = min_t(unsigned int, olr, sizeof(int));
1062 if (olr < 0)
1063 return -EINVAL;
e905a9ed 1064
c354e124 1065 if (put_user(olr, optlen))
1da177e4 1066 return -EFAULT;
c354e124
JK
1067 if (optname == MRT_VERSION)
1068 val = 0x0305;
1da177e4 1069#ifdef CONFIG_IP_PIMSM
c354e124 1070 else if (optname == MRT_PIM)
6f9374a9 1071 val = init_net.ipv4.mroute_do_pim;
1da177e4
LT
1072#endif
1073 else
6f9374a9 1074 val = init_net.ipv4.mroute_do_assert;
c354e124 1075 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1076 return -EFAULT;
1077 return 0;
1078}
1079
1080/*
1081 * The IP multicast ioctl support routines.
1082 */
e905a9ed 1083
1da177e4
LT
1084int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1085{
1086 struct sioc_sg_req sr;
1087 struct sioc_vif_req vr;
1088 struct vif_device *vif;
1089 struct mfc_cache *c;
e905a9ed 1090
132adf54
SH
1091 switch (cmd) {
1092 case SIOCGETVIFCNT:
c354e124 1093 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1094 return -EFAULT;
cf958ae3 1095 if (vr.vifi >= init_net.ipv4.maxvif)
132adf54
SH
1096 return -EINVAL;
1097 read_lock(&mrt_lock);
cf958ae3
BT
1098 vif = &init_net.ipv4.vif_table[vr.vifi];
1099 if (VIF_EXISTS(&init_net, vr.vifi)) {
c354e124
JK
1100 vr.icount = vif->pkt_in;
1101 vr.ocount = vif->pkt_out;
1102 vr.ibytes = vif->bytes_in;
1103 vr.obytes = vif->bytes_out;
1da177e4 1104 read_unlock(&mrt_lock);
1da177e4 1105
c354e124 1106 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1107 return -EFAULT;
1108 return 0;
1109 }
1110 read_unlock(&mrt_lock);
1111 return -EADDRNOTAVAIL;
1112 case SIOCGETSGCNT:
c354e124 1113 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1114 return -EFAULT;
1115
1116 read_lock(&mrt_lock);
1117 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1118 if (c) {
1119 sr.pktcnt = c->mfc_un.res.pkt;
1120 sr.bytecnt = c->mfc_un.res.bytes;
1121 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1122 read_unlock(&mrt_lock);
132adf54 1123
c354e124 1124 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1125 return -EFAULT;
1126 return 0;
1127 }
1128 read_unlock(&mrt_lock);
1129 return -EADDRNOTAVAIL;
1130 default:
1131 return -ENOIOCTLCMD;
1da177e4
LT
1132 }
1133}
1134
1135
1136static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1137{
e9dc8653 1138 struct net_device *dev = ptr;
1da177e4
LT
1139 struct vif_device *v;
1140 int ct;
e9dc8653 1141
721499e8 1142 if (!net_eq(dev_net(dev), &init_net))
e9dc8653
EB
1143 return NOTIFY_DONE;
1144
1da177e4
LT
1145 if (event != NETDEV_UNREGISTER)
1146 return NOTIFY_DONE;
cf958ae3
BT
1147 v = &init_net.ipv4.vif_table[0];
1148 for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
c354e124 1149 if (v->dev == dev)
7dc00c82 1150 vif_delete(ct, 1);
1da177e4
LT
1151 }
1152 return NOTIFY_DONE;
1153}
1154
1155
c354e124 1156static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1157 .notifier_call = ipmr_device_event,
1158};
1159
1160/*
1161 * Encapsulate a packet by attaching a valid IPIP header to it.
1162 * This avoids tunnel drivers and other mess and gives us the speed so
1163 * important for multicast video.
1164 */
e905a9ed 1165
114c7844 1166static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1167{
8856dfa3 1168 struct iphdr *iph;
eddc9ec5 1169 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1170
1171 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1172 skb->transport_header = skb->network_header;
8856dfa3 1173 skb_reset_network_header(skb);
eddc9ec5 1174 iph = ip_hdr(skb);
1da177e4
LT
1175
1176 iph->version = 4;
e023dd64
ACM
1177 iph->tos = old_iph->tos;
1178 iph->ttl = old_iph->ttl;
1da177e4
LT
1179 iph->frag_off = 0;
1180 iph->daddr = daddr;
1181 iph->saddr = saddr;
1182 iph->protocol = IPPROTO_IPIP;
1183 iph->ihl = 5;
1184 iph->tot_len = htons(skb->len);
1185 ip_select_ident(iph, skb->dst, NULL);
1186 ip_send_check(iph);
1187
1da177e4
LT
1188 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1189 nf_reset(skb);
1190}
1191
1192static inline int ipmr_forward_finish(struct sk_buff *skb)
1193{
1194 struct ip_options * opt = &(IPCB(skb)->opt);
1195
7c73a6fa 1196 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1197
1198 if (unlikely(opt->optlen))
1199 ip_forward_options(skb);
1200
1201 return dst_output(skb);
1202}
1203
1204/*
1205 * Processing handlers for ipmr_forward
1206 */
1207
1208static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1209{
eddc9ec5 1210 const struct iphdr *iph = ip_hdr(skb);
cf958ae3 1211 struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
1da177e4
LT
1212 struct net_device *dev;
1213 struct rtable *rt;
1214 int encap = 0;
1215
1216 if (vif->dev == NULL)
1217 goto out_free;
1218
1219#ifdef CONFIG_IP_PIMSM
1220 if (vif->flags & VIFF_REGISTER) {
1221 vif->pkt_out++;
c354e124 1222 vif->bytes_out += skb->len;
cf3677ae
PE
1223 vif->dev->stats.tx_bytes += skb->len;
1224 vif->dev->stats.tx_packets++;
1da177e4
LT
1225 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1226 kfree_skb(skb);
1227 return;
1228 }
1229#endif
1230
1231 if (vif->flags&VIFF_TUNNEL) {
1232 struct flowi fl = { .oif = vif->link,
1233 .nl_u = { .ip4_u =
1234 { .daddr = vif->remote,
1235 .saddr = vif->local,
1236 .tos = RT_TOS(iph->tos) } },
1237 .proto = IPPROTO_IPIP };
f206351a 1238 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1239 goto out_free;
1240 encap = sizeof(struct iphdr);
1241 } else {
1242 struct flowi fl = { .oif = vif->link,
1243 .nl_u = { .ip4_u =
1244 { .daddr = iph->daddr,
1245 .tos = RT_TOS(iph->tos) } },
1246 .proto = IPPROTO_IPIP };
f206351a 1247 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1248 goto out_free;
1249 }
1250
1251 dev = rt->u.dst.dev;
1252
1253 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1254 /* Do not fragment multicasts. Alas, IPv4 does not
1255 allow to send ICMP, so that packets will disappear
1256 to blackhole.
1257 */
1258
7c73a6fa 1259 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1260 ip_rt_put(rt);
1261 goto out_free;
1262 }
1263
1264 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1265
1266 if (skb_cow(skb, encap)) {
e905a9ed 1267 ip_rt_put(rt);
1da177e4
LT
1268 goto out_free;
1269 }
1270
1271 vif->pkt_out++;
c354e124 1272 vif->bytes_out += skb->len;
1da177e4
LT
1273
1274 dst_release(skb->dst);
1275 skb->dst = &rt->u.dst;
eddc9ec5 1276 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1277
1278 /* FIXME: forward and output firewalls used to be called here.
1279 * What do we do with netfilter? -- RR */
1280 if (vif->flags & VIFF_TUNNEL) {
1281 ip_encap(skb, vif->local, vif->remote);
1282 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1283 vif->dev->stats.tx_packets++;
1284 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1285 }
1286
1287 IPCB(skb)->flags |= IPSKB_FORWARDED;
1288
1289 /*
1290 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1291 * not only before forwarding, but after forwarding on all output
1292 * interfaces. It is clear, if mrouter runs a multicasting
1293 * program, it should receive packets not depending to what interface
1294 * program is joined.
1295 * If we will not make it, the program will have to join on all
1296 * interfaces. On the other hand, multihoming host (or router, but
1297 * not mrouter) cannot join to more than one interface - it will
1298 * result in receiving multiple packets.
1299 */
6e23ae2a 1300 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1301 ipmr_forward_finish);
1302 return;
1303
1304out_free:
1305 kfree_skb(skb);
1306 return;
1307}
1308
1309static int ipmr_find_vif(struct net_device *dev)
1310{
1311 int ct;
cf958ae3
BT
1312 for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
1313 if (init_net.ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1314 break;
1315 }
1316 return ct;
1317}
1318
1319/* "local" means that we should preserve one skb (for local delivery) */
1320
1321static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1322{
1323 int psend = -1;
1324 int vif, ct;
1325
1326 vif = cache->mfc_parent;
1327 cache->mfc_un.res.pkt++;
1328 cache->mfc_un.res.bytes += skb->len;
1329
1330 /*
1331 * Wrong interface: drop packet and (maybe) send PIM assert.
1332 */
cf958ae3 1333 if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1334 int true_vifi;
1335
ee6b9673 1336 if (skb->rtable->fl.iif == 0) {
1da177e4
LT
1337 /* It is our own packet, looped back.
1338 Very complicated situation...
1339
1340 The best workaround until routing daemons will be
1341 fixed is not to redistribute packet, if it was
1342 send through wrong interface. It means, that
1343 multicast applications WILL NOT work for
1344 (S,G), which have default multicast route pointing
1345 to wrong oif. In any case, it is not a good
1346 idea to use multicasting applications on router.
1347 */
1348 goto dont_forward;
1349 }
1350
1351 cache->mfc_un.res.wrong_if++;
1352 true_vifi = ipmr_find_vif(skb->dev);
1353
6f9374a9 1354 if (true_vifi >= 0 && init_net.ipv4.mroute_do_assert &&
1da177e4
LT
1355 /* pimsm uses asserts, when switching from RPT to SPT,
1356 so that we cannot check that packet arrived on an oif.
1357 It is bad, but otherwise we would need to move pretty
1358 large chunk of pimd to kernel. Ough... --ANK
1359 */
6f9374a9
BT
1360 (init_net.ipv4.mroute_do_pim ||
1361 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1362 time_after(jiffies,
1da177e4
LT
1363 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1364 cache->mfc_un.res.last_assert = jiffies;
1365 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1366 }
1367 goto dont_forward;
1368 }
1369
cf958ae3
BT
1370 init_net.ipv4.vif_table[vif].pkt_in++;
1371 init_net.ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1372
1373 /*
1374 * Forward the frame
1375 */
1376 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1377 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1378 if (psend != -1) {
1379 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1380 if (skb2)
1381 ipmr_queue_xmit(skb2, cache, psend);
1382 }
c354e124 1383 psend = ct;
1da177e4
LT
1384 }
1385 }
1386 if (psend != -1) {
1387 if (local) {
1388 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1389 if (skb2)
1390 ipmr_queue_xmit(skb2, cache, psend);
1391 } else {
1392 ipmr_queue_xmit(skb, cache, psend);
1393 return 0;
1394 }
1395 }
1396
1397dont_forward:
1398 if (!local)
1399 kfree_skb(skb);
1400 return 0;
1401}
1402
1403
1404/*
1405 * Multicast packets for forwarding arrive here
1406 */
1407
1408int ip_mr_input(struct sk_buff *skb)
1409{
1410 struct mfc_cache *cache;
ee6b9673 1411 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1da177e4
LT
1412
1413 /* Packet is looped back after forward, it should not be
1414 forwarded second time, but still can be delivered locally.
1415 */
1416 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1417 goto dont_forward;
1418
1419 if (!local) {
1420 if (IPCB(skb)->opt.router_alert) {
1421 if (ip_call_ra_chain(skb))
1422 return 0;
eddc9ec5 1423 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1424 /* IGMPv1 (and broken IGMPv2 implementations sort of
1425 Cisco IOS <= 11.2(8)) do not put router alert
1426 option to IGMP packets destined to routable
1427 groups. It is very bad, because it means
1428 that we can forward NO IGMP messages.
1429 */
1430 read_lock(&mrt_lock);
70a269e6 1431 if (init_net.ipv4.mroute_sk) {
2715bcf9 1432 nf_reset(skb);
70a269e6 1433 raw_rcv(init_net.ipv4.mroute_sk, skb);
1da177e4
LT
1434 read_unlock(&mrt_lock);
1435 return 0;
1436 }
1437 read_unlock(&mrt_lock);
1438 }
1439 }
1440
1441 read_lock(&mrt_lock);
eddc9ec5 1442 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1443
1444 /*
1445 * No usable cache entry
1446 */
c354e124 1447 if (cache == NULL) {
1da177e4
LT
1448 int vif;
1449
1450 if (local) {
1451 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1452 ip_local_deliver(skb);
1453 if (skb2 == NULL) {
1454 read_unlock(&mrt_lock);
1455 return -ENOBUFS;
1456 }
1457 skb = skb2;
1458 }
1459
1460 vif = ipmr_find_vif(skb->dev);
1461 if (vif >= 0) {
1462 int err = ipmr_cache_unresolved(vif, skb);
1463 read_unlock(&mrt_lock);
1464
1465 return err;
1466 }
1467 read_unlock(&mrt_lock);
1468 kfree_skb(skb);
1469 return -ENODEV;
1470 }
1471
1472 ip_mr_forward(skb, cache, local);
1473
1474 read_unlock(&mrt_lock);
1475
1476 if (local)
1477 return ip_local_deliver(skb);
1478
1479 return 0;
1480
1481dont_forward:
1482 if (local)
1483 return ip_local_deliver(skb);
1484 kfree_skb(skb);
1485 return 0;
1486}
1487
b1879204
IJ
1488#ifdef CONFIG_IP_PIMSM
1489static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1490{
b1879204
IJ
1491 struct net_device *reg_dev = NULL;
1492 struct iphdr *encap;
1da177e4 1493
b1879204 1494 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1495 /*
1496 Check that:
1497 a. packet is really destinted to a multicast group
1498 b. packet is not a NULL-REGISTER
1499 c. packet is not truncated
1500 */
f97c1e0c 1501 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1502 encap->tot_len == 0 ||
b1879204
IJ
1503 ntohs(encap->tot_len) + pimlen > skb->len)
1504 return 1;
1da177e4
LT
1505
1506 read_lock(&mrt_lock);
6c5143db
BT
1507 if (init_net.ipv4.mroute_reg_vif_num >= 0)
1508 reg_dev = init_net.ipv4.vif_table[init_net.ipv4.mroute_reg_vif_num].dev;
1da177e4
LT
1509 if (reg_dev)
1510 dev_hold(reg_dev);
1511 read_unlock(&mrt_lock);
1512
e905a9ed 1513 if (reg_dev == NULL)
b1879204 1514 return 1;
1da177e4 1515
b0e380b1 1516 skb->mac_header = skb->network_header;
1da177e4 1517 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1518 skb_reset_network_header(skb);
1da177e4 1519 skb->dev = reg_dev;
1da177e4
LT
1520 skb->protocol = htons(ETH_P_IP);
1521 skb->ip_summed = 0;
1522 skb->pkt_type = PACKET_HOST;
1523 dst_release(skb->dst);
1524 skb->dst = NULL;
cf3677ae
PE
1525 reg_dev->stats.rx_bytes += skb->len;
1526 reg_dev->stats.rx_packets++;
1da177e4
LT
1527 nf_reset(skb);
1528 netif_rx(skb);
1529 dev_put(reg_dev);
b1879204 1530
1da177e4 1531 return 0;
b1879204
IJ
1532}
1533#endif
1534
1535#ifdef CONFIG_IP_PIMSM_V1
1536/*
1537 * Handle IGMP messages of PIMv1
1538 */
1539
1540int pim_rcv_v1(struct sk_buff * skb)
1541{
1542 struct igmphdr *pim;
1543
1544 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1545 goto drop;
1546
1547 pim = igmp_hdr(skb);
1548
6f9374a9 1549 if (!init_net.ipv4.mroute_do_pim ||
b1879204
IJ
1550 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1551 goto drop;
1552
1553 if (__pim_rcv(skb, sizeof(*pim))) {
1554drop:
1555 kfree_skb(skb);
1556 }
1da177e4
LT
1557 return 0;
1558}
1559#endif
1560
1561#ifdef CONFIG_IP_PIMSM_V2
1562static int pim_rcv(struct sk_buff * skb)
1563{
1564 struct pimreghdr *pim;
1da177e4 1565
b1879204 1566 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1567 goto drop;
1568
9c70220b 1569 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1570 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1571 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1572 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1573 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1574 goto drop;
1575
b1879204
IJ
1576 if (__pim_rcv(skb, sizeof(*pim))) {
1577drop:
1578 kfree_skb(skb);
1579 }
1da177e4
LT
1580 return 0;
1581}
1582#endif
1583
1584static int
1585ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1586{
1587 int ct;
1588 struct rtnexthop *nhp;
cf958ae3 1589 struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
27a884dc 1590 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1591 struct rtattr *mp_head;
1592
1593 if (dev)
1594 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1595
c354e124 1596 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1597
1598 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1599 if (c->mfc_un.res.ttls[ct] < 255) {
1600 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1601 goto rtattr_failure;
c354e124 1602 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1603 nhp->rtnh_flags = 0;
1604 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
cf958ae3 1605 nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1606 nhp->rtnh_len = sizeof(*nhp);
1607 }
1608 }
1609 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1610 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1611 rtm->rtm_type = RTN_MULTICAST;
1612 return 1;
1613
1614rtattr_failure:
dc5fc579 1615 nlmsg_trim(skb, b);
1da177e4
LT
1616 return -EMSGSIZE;
1617}
1618
1619int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1620{
1621 int err;
1622 struct mfc_cache *cache;
ee6b9673 1623 struct rtable *rt = skb->rtable;
1da177e4
LT
1624
1625 read_lock(&mrt_lock);
1626 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1627
c354e124 1628 if (cache == NULL) {
72287490 1629 struct sk_buff *skb2;
eddc9ec5 1630 struct iphdr *iph;
1da177e4
LT
1631 struct net_device *dev;
1632 int vif;
1633
1634 if (nowait) {
1635 read_unlock(&mrt_lock);
1636 return -EAGAIN;
1637 }
1638
1639 dev = skb->dev;
1640 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1641 read_unlock(&mrt_lock);
1642 return -ENODEV;
1643 }
72287490
AK
1644 skb2 = skb_clone(skb, GFP_ATOMIC);
1645 if (!skb2) {
1646 read_unlock(&mrt_lock);
1647 return -ENOMEM;
1648 }
1649
e2d1bca7
ACM
1650 skb_push(skb2, sizeof(struct iphdr));
1651 skb_reset_network_header(skb2);
eddc9ec5
ACM
1652 iph = ip_hdr(skb2);
1653 iph->ihl = sizeof(struct iphdr) >> 2;
1654 iph->saddr = rt->rt_src;
1655 iph->daddr = rt->rt_dst;
1656 iph->version = 0;
72287490 1657 err = ipmr_cache_unresolved(vif, skb2);
1da177e4
LT
1658 read_unlock(&mrt_lock);
1659 return err;
1660 }
1661
1662 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1663 cache->mfc_flags |= MFC_NOTIFY;
1664 err = ipmr_fill_mroute(skb, cache, rtm);
1665 read_unlock(&mrt_lock);
1666 return err;
1667}
1668
e905a9ed 1669#ifdef CONFIG_PROC_FS
1da177e4
LT
1670/*
1671 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1672 */
1673struct ipmr_vif_iter {
f6bb4514 1674 struct seq_net_private p;
1da177e4
LT
1675 int ct;
1676};
1677
f6bb4514
BT
1678static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1679 struct ipmr_vif_iter *iter,
1da177e4
LT
1680 loff_t pos)
1681{
f6bb4514
BT
1682 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1683 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1684 continue;
e905a9ed 1685 if (pos-- == 0)
f6bb4514 1686 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1687 }
1688 return NULL;
1689}
1690
1691static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1692 __acquires(mrt_lock)
1da177e4 1693{
f6bb4514
BT
1694 struct net *net = seq_file_net(seq);
1695
1da177e4 1696 read_lock(&mrt_lock);
f6bb4514 1697 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1698 : SEQ_START_TOKEN;
1699}
1700
1701static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1702{
1703 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 1704 struct net *net = seq_file_net(seq);
1da177e4
LT
1705
1706 ++*pos;
1707 if (v == SEQ_START_TOKEN)
f6bb4514 1708 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 1709
f6bb4514
BT
1710 while (++iter->ct < net->ipv4.maxvif) {
1711 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1712 continue;
f6bb4514 1713 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1714 }
1715 return NULL;
1716}
1717
1718static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1719 __releases(mrt_lock)
1da177e4
LT
1720{
1721 read_unlock(&mrt_lock);
1722}
1723
1724static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1725{
f6bb4514
BT
1726 struct net *net = seq_file_net(seq);
1727
1da177e4 1728 if (v == SEQ_START_TOKEN) {
e905a9ed 1729 seq_puts(seq,
1da177e4
LT
1730 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1731 } else {
1732 const struct vif_device *vif = v;
1733 const char *name = vif->dev ? vif->dev->name : "none";
1734
1735 seq_printf(seq,
1736 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
f6bb4514 1737 vif - net->ipv4.vif_table,
e905a9ed 1738 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1739 vif->bytes_out, vif->pkt_out,
1740 vif->flags, vif->local, vif->remote);
1741 }
1742 return 0;
1743}
1744
f690808e 1745static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1746 .start = ipmr_vif_seq_start,
1747 .next = ipmr_vif_seq_next,
1748 .stop = ipmr_vif_seq_stop,
1749 .show = ipmr_vif_seq_show,
1750};
1751
1752static int ipmr_vif_open(struct inode *inode, struct file *file)
1753{
f6bb4514
BT
1754 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1755 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1756}
1757
9a32144e 1758static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1759 .owner = THIS_MODULE,
1760 .open = ipmr_vif_open,
1761 .read = seq_read,
1762 .llseek = seq_lseek,
f6bb4514 1763 .release = seq_release_net,
1da177e4
LT
1764};
1765
1766struct ipmr_mfc_iter {
f6bb4514 1767 struct seq_net_private p;
1da177e4
LT
1768 struct mfc_cache **cache;
1769 int ct;
1770};
1771
1772
f6bb4514
BT
1773static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1774 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4
LT
1775{
1776 struct mfc_cache *mfc;
1777
f6bb4514 1778 it->cache = net->ipv4.mfc_cache_array;
1da177e4 1779 read_lock(&mrt_lock);
e905a9ed 1780 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
f6bb4514 1781 for (mfc = net->ipv4.mfc_cache_array[it->ct];
2bb8b26c 1782 mfc; mfc = mfc->next)
e905a9ed 1783 if (pos-- == 0)
1da177e4
LT
1784 return mfc;
1785 read_unlock(&mrt_lock);
1786
1787 it->cache = &mfc_unres_queue;
1788 spin_lock_bh(&mfc_unres_lock);
132adf54 1789 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
f6bb4514
BT
1790 if (net_eq(mfc_net(mfc), net) &&
1791 pos-- == 0)
1da177e4
LT
1792 return mfc;
1793 spin_unlock_bh(&mfc_unres_lock);
1794
1795 it->cache = NULL;
1796 return NULL;
1797}
1798
1799
1800static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1801{
1802 struct ipmr_mfc_iter *it = seq->private;
f6bb4514
BT
1803 struct net *net = seq_file_net(seq);
1804
1da177e4
LT
1805 it->cache = NULL;
1806 it->ct = 0;
f6bb4514 1807 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1808 : SEQ_START_TOKEN;
1809}
1810
1811static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1812{
1813 struct mfc_cache *mfc = v;
1814 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1815 struct net *net = seq_file_net(seq);
1da177e4
LT
1816
1817 ++*pos;
1818
1819 if (v == SEQ_START_TOKEN)
f6bb4514 1820 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4
LT
1821
1822 if (mfc->next)
1823 return mfc->next;
e905a9ed
YH
1824
1825 if (it->cache == &mfc_unres_queue)
1da177e4
LT
1826 goto end_of_list;
1827
f6bb4514 1828 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1da177e4
LT
1829
1830 while (++it->ct < MFC_LINES) {
f6bb4514 1831 mfc = net->ipv4.mfc_cache_array[it->ct];
1da177e4
LT
1832 if (mfc)
1833 return mfc;
1834 }
1835
1836 /* exhausted cache_array, show unresolved */
1837 read_unlock(&mrt_lock);
1838 it->cache = &mfc_unres_queue;
1839 it->ct = 0;
e905a9ed 1840
1da177e4
LT
1841 spin_lock_bh(&mfc_unres_lock);
1842 mfc = mfc_unres_queue;
f6bb4514
BT
1843 while (mfc && !net_eq(mfc_net(mfc), net))
1844 mfc = mfc->next;
e905a9ed 1845 if (mfc)
1da177e4
LT
1846 return mfc;
1847
1848 end_of_list:
1849 spin_unlock_bh(&mfc_unres_lock);
1850 it->cache = NULL;
1851
1852 return NULL;
1853}
1854
1855static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1856{
1857 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1858 struct net *net = seq_file_net(seq);
1da177e4
LT
1859
1860 if (it->cache == &mfc_unres_queue)
1861 spin_unlock_bh(&mfc_unres_lock);
f6bb4514 1862 else if (it->cache == net->ipv4.mfc_cache_array)
1da177e4
LT
1863 read_unlock(&mrt_lock);
1864}
1865
1866static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1867{
1868 int n;
f6bb4514 1869 struct net *net = seq_file_net(seq);
1da177e4
LT
1870
1871 if (v == SEQ_START_TOKEN) {
e905a9ed 1872 seq_puts(seq,
1da177e4
LT
1873 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1874 } else {
1875 const struct mfc_cache *mfc = v;
1876 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1877
999890b2 1878 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1879 (unsigned long) mfc->mfc_mcastgrp,
1880 (unsigned long) mfc->mfc_origin,
1ea472e2 1881 mfc->mfc_parent);
1da177e4
LT
1882
1883 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
1884 seq_printf(seq, " %8lu %8lu %8lu",
1885 mfc->mfc_un.res.pkt,
1886 mfc->mfc_un.res.bytes,
1887 mfc->mfc_un.res.wrong_if);
132adf54
SH
1888 for (n = mfc->mfc_un.res.minvif;
1889 n < mfc->mfc_un.res.maxvif; n++ ) {
f6bb4514 1890 if (VIF_EXISTS(net, n) &&
cf958ae3
BT
1891 mfc->mfc_un.res.ttls[n] < 255)
1892 seq_printf(seq,
e905a9ed 1893 " %2d:%-3d",
1da177e4
LT
1894 n, mfc->mfc_un.res.ttls[n]);
1895 }
1ea472e2
BT
1896 } else {
1897 /* unresolved mfc_caches don't contain
1898 * pkt, bytes and wrong_if values
1899 */
1900 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1901 }
1902 seq_putc(seq, '\n');
1903 }
1904 return 0;
1905}
1906
f690808e 1907static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1908 .start = ipmr_mfc_seq_start,
1909 .next = ipmr_mfc_seq_next,
1910 .stop = ipmr_mfc_seq_stop,
1911 .show = ipmr_mfc_seq_show,
1912};
1913
1914static int ipmr_mfc_open(struct inode *inode, struct file *file)
1915{
f6bb4514
BT
1916 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1917 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1918}
1919
9a32144e 1920static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1921 .owner = THIS_MODULE,
1922 .open = ipmr_mfc_open,
1923 .read = seq_read,
1924 .llseek = seq_lseek,
f6bb4514 1925 .release = seq_release_net,
1da177e4 1926};
e905a9ed 1927#endif
1da177e4
LT
1928
1929#ifdef CONFIG_IP_PIMSM_V2
1930static struct net_protocol pim_protocol = {
1931 .handler = pim_rcv,
1932};
1933#endif
1934
1935
1936/*
1937 * Setup for IP multicast routing
1938 */
cf958ae3
BT
1939static int __net_init ipmr_net_init(struct net *net)
1940{
1941 int err = 0;
1942
1943 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1944 GFP_KERNEL);
1945 if (!net->ipv4.vif_table) {
1946 err = -ENOMEM;
1947 goto fail;
1948 }
2bb8b26c
BT
1949
1950 /* Forwarding cache */
1951 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1952 sizeof(struct mfc_cache *),
1953 GFP_KERNEL);
1954 if (!net->ipv4.mfc_cache_array) {
1955 err = -ENOMEM;
1956 goto fail_mfc_cache;
1957 }
6c5143db
BT
1958
1959#ifdef CONFIG_IP_PIMSM
1960 net->ipv4.mroute_reg_vif_num = -1;
1961#endif
f6bb4514
BT
1962
1963#ifdef CONFIG_PROC_FS
1964 err = -ENOMEM;
1965 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1966 goto proc_vif_fail;
1967 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1968 goto proc_cache_fail;
1969#endif
2bb8b26c
BT
1970 return 0;
1971
f6bb4514
BT
1972#ifdef CONFIG_PROC_FS
1973proc_cache_fail:
1974 proc_net_remove(net, "ip_mr_vif");
1975proc_vif_fail:
1976 kfree(net->ipv4.mfc_cache_array);
1977#endif
2bb8b26c
BT
1978fail_mfc_cache:
1979 kfree(net->ipv4.vif_table);
cf958ae3
BT
1980fail:
1981 return err;
1982}
1983
1984static void __net_exit ipmr_net_exit(struct net *net)
1985{
f6bb4514
BT
1986#ifdef CONFIG_PROC_FS
1987 proc_net_remove(net, "ip_mr_cache");
1988 proc_net_remove(net, "ip_mr_vif");
1989#endif
2bb8b26c 1990 kfree(net->ipv4.mfc_cache_array);
cf958ae3
BT
1991 kfree(net->ipv4.vif_table);
1992}
1993
1994static struct pernet_operations ipmr_net_ops = {
1995 .init = ipmr_net_init,
1996 .exit = ipmr_net_exit,
1997};
e905a9ed 1998
03d2f897 1999int __init ip_mr_init(void)
1da177e4 2000{
03d2f897
WC
2001 int err;
2002
1da177e4
LT
2003 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2004 sizeof(struct mfc_cache),
e5d679f3 2005 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2006 NULL);
03d2f897
WC
2007 if (!mrt_cachep)
2008 return -ENOMEM;
2009
cf958ae3
BT
2010 err = register_pernet_subsys(&ipmr_net_ops);
2011 if (err)
2012 goto reg_pernet_fail;
2013
b24b8a24 2014 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
03d2f897
WC
2015 err = register_netdevice_notifier(&ip_mr_notifier);
2016 if (err)
2017 goto reg_notif_fail;
03d2f897 2018 return 0;
f6bb4514 2019
c3e38896
BT
2020reg_notif_fail:
2021 del_timer(&ipmr_expire_timer);
cf958ae3
BT
2022 unregister_pernet_subsys(&ipmr_net_ops);
2023reg_pernet_fail:
c3e38896 2024 kmem_cache_destroy(mrt_cachep);
03d2f897 2025 return err;
1da177e4 2026}