]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
ipv4: update ARPD help text
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
457c4cbc 50#include <net/net_namespace.h>
1da177e4
LT
51#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
14c85021 54#include <net/route.h>
1da177e4
LT
55#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
dc5fc579 64#include <net/netlink.h>
1da177e4
LT
65
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
1da177e4
LT
70/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
cf958ae3 80#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4 81
1da177e4 82static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
1da177e4
LT
83
84/* Special spinlock for queue of unresolved entries */
85static DEFINE_SPINLOCK(mfc_unres_lock);
86
87/* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
91
92 In this case data path is free of exclusive locks at all.
93 */
94
e18b890b 95static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4
LT
96
97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
4feb88e5
BT
98static int ipmr_cache_report(struct net *net,
99 struct sk_buff *pkt, vifi_t vifi, int assert);
1da177e4
LT
100static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101
102#ifdef CONFIG_IP_PIMSM_V2
103static struct net_protocol pim_protocol;
104#endif
105
106static struct timer_list ipmr_expire_timer;
107
108/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
109
d607032d
WC
110static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
111{
4feb88e5
BT
112 struct net *net = dev_net(dev);
113
d607032d
WC
114 dev_close(dev);
115
4feb88e5 116 dev = __dev_get_by_name(net, "tunl0");
d607032d 117 if (dev) {
5bc3eb7e 118 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 119 struct ifreq ifr;
d607032d
WC
120 struct ip_tunnel_parm p;
121
122 memset(&p, 0, sizeof(p));
123 p.iph.daddr = v->vifc_rmt_addr.s_addr;
124 p.iph.saddr = v->vifc_lcl_addr.s_addr;
125 p.iph.version = 4;
126 p.iph.ihl = 5;
127 p.iph.protocol = IPPROTO_IPIP;
128 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
129 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
130
5bc3eb7e
SH
131 if (ops->ndo_do_ioctl) {
132 mm_segment_t oldfs = get_fs();
133
134 set_fs(KERNEL_DS);
135 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
136 set_fs(oldfs);
137 }
d607032d
WC
138 }
139}
140
1da177e4 141static
4feb88e5 142struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
143{
144 struct net_device *dev;
145
4feb88e5 146 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
147
148 if (dev) {
5bc3eb7e 149 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
150 int err;
151 struct ifreq ifr;
1da177e4
LT
152 struct ip_tunnel_parm p;
153 struct in_device *in_dev;
154
155 memset(&p, 0, sizeof(p));
156 p.iph.daddr = v->vifc_rmt_addr.s_addr;
157 p.iph.saddr = v->vifc_lcl_addr.s_addr;
158 p.iph.version = 4;
159 p.iph.ihl = 5;
160 p.iph.protocol = IPPROTO_IPIP;
161 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 162 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 163
5bc3eb7e
SH
164 if (ops->ndo_do_ioctl) {
165 mm_segment_t oldfs = get_fs();
166
167 set_fs(KERNEL_DS);
168 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
169 set_fs(oldfs);
170 } else
171 err = -EOPNOTSUPP;
1da177e4
LT
172
173 dev = NULL;
174
4feb88e5
BT
175 if (err == 0 &&
176 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
177 dev->flags |= IFF_MULTICAST;
178
e5ed6399 179 in_dev = __in_dev_get_rtnl(dev);
71e27da9 180 if (in_dev == NULL)
1da177e4 181 goto failure;
71e27da9
HX
182
183 ipv4_devconf_setall(in_dev);
184 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
185
186 if (dev_open(dev))
187 goto failure;
7dc00c82 188 dev_hold(dev);
1da177e4
LT
189 }
190 }
191 return dev;
192
193failure:
194 /* allow the register to be completed before unregistering. */
195 rtnl_unlock();
196 rtnl_lock();
197
198 unregister_netdevice(dev);
199 return NULL;
200}
201
202#ifdef CONFIG_IP_PIMSM
203
1da177e4
LT
204static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205{
4feb88e5
BT
206 struct net *net = dev_net(dev);
207
1da177e4 208 read_lock(&mrt_lock);
cf3677ae
PE
209 dev->stats.tx_bytes += skb->len;
210 dev->stats.tx_packets++;
4feb88e5
BT
211 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
212 IGMPMSG_WHOLEPKT);
1da177e4
LT
213 read_unlock(&mrt_lock);
214 kfree_skb(skb);
215 return 0;
216}
217
007c3838
SH
218static const struct net_device_ops reg_vif_netdev_ops = {
219 .ndo_start_xmit = reg_vif_xmit,
220};
221
1da177e4
LT
222static void reg_vif_setup(struct net_device *dev)
223{
224 dev->type = ARPHRD_PIMREG;
46f25dff 225 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 226 dev->flags = IFF_NOARP;
007c3838 227 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4
LT
228 dev->destructor = free_netdev;
229}
230
231static struct net_device *ipmr_reg_vif(void)
232{
233 struct net_device *dev;
234 struct in_device *in_dev;
235
cf3677ae 236 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
237
238 if (dev == NULL)
239 return NULL;
240
241 if (register_netdevice(dev)) {
242 free_netdev(dev);
243 return NULL;
244 }
245 dev->iflink = 0;
246
71e27da9
HX
247 rcu_read_lock();
248 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249 rcu_read_unlock();
1da177e4 250 goto failure;
71e27da9 251 }
1da177e4 252
71e27da9
HX
253 ipv4_devconf_setall(in_dev);
254 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255 rcu_read_unlock();
1da177e4
LT
256
257 if (dev_open(dev))
258 goto failure;
259
7dc00c82
WC
260 dev_hold(dev);
261
1da177e4
LT
262 return dev;
263
264failure:
265 /* allow the register to be completed before unregistering. */
266 rtnl_unlock();
267 rtnl_lock();
268
269 unregister_netdevice(dev);
270 return NULL;
271}
272#endif
273
274/*
275 * Delete a VIF entry
7dc00c82 276 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 277 */
e905a9ed 278
4feb88e5 279static int vif_delete(struct net *net, int vifi, int notify)
1da177e4
LT
280{
281 struct vif_device *v;
282 struct net_device *dev;
283 struct in_device *in_dev;
284
4feb88e5 285 if (vifi < 0 || vifi >= net->ipv4.maxvif)
1da177e4
LT
286 return -EADDRNOTAVAIL;
287
4feb88e5 288 v = &net->ipv4.vif_table[vifi];
1da177e4
LT
289
290 write_lock_bh(&mrt_lock);
291 dev = v->dev;
292 v->dev = NULL;
293
294 if (!dev) {
295 write_unlock_bh(&mrt_lock);
296 return -EADDRNOTAVAIL;
297 }
298
299#ifdef CONFIG_IP_PIMSM
4feb88e5
BT
300 if (vifi == net->ipv4.mroute_reg_vif_num)
301 net->ipv4.mroute_reg_vif_num = -1;
1da177e4
LT
302#endif
303
4feb88e5 304 if (vifi+1 == net->ipv4.maxvif) {
1da177e4
LT
305 int tmp;
306 for (tmp=vifi-1; tmp>=0; tmp--) {
4feb88e5 307 if (VIF_EXISTS(net, tmp))
1da177e4
LT
308 break;
309 }
4feb88e5 310 net->ipv4.maxvif = tmp+1;
1da177e4
LT
311 }
312
313 write_unlock_bh(&mrt_lock);
314
315 dev_set_allmulti(dev, -1);
316
e5ed6399 317 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 318 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
319 ip_rt_multicast_event(in_dev);
320 }
321
7dc00c82 322 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
1da177e4
LT
323 unregister_netdevice(dev);
324
325 dev_put(dev);
326 return 0;
327}
328
5c0a66f5
BT
329static inline void ipmr_cache_free(struct mfc_cache *c)
330{
331 release_net(mfc_net(c));
332 kmem_cache_free(mrt_cachep, c);
333}
334
1da177e4
LT
335/* Destroy an unresolved cache entry, killing queued skbs
336 and reporting error to netlink readers.
337 */
338
339static void ipmr_destroy_unres(struct mfc_cache *c)
340{
341 struct sk_buff *skb;
9ef1d4c7 342 struct nlmsgerr *e;
4feb88e5 343 struct net *net = mfc_net(c);
1da177e4 344
4feb88e5 345 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4 346
c354e124 347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 348 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
349 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350 nlh->nlmsg_type = NLMSG_ERROR;
351 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
353 e = NLMSG_DATA(nlh);
354 e->error = -ETIMEDOUT;
355 memset(&e->msg, 0, sizeof(e->msg));
2942e900 356
4feb88e5 357 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
358 } else
359 kfree_skb(skb);
360 }
361
5c0a66f5 362 ipmr_cache_free(c);
1da177e4
LT
363}
364
365
366/* Single timer process for all the unresolved queue. */
367
368static void ipmr_expire_process(unsigned long dummy)
369{
370 unsigned long now;
371 unsigned long expires;
372 struct mfc_cache *c, **cp;
373
374 if (!spin_trylock(&mfc_unres_lock)) {
375 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376 return;
377 }
378
1e8fb3b6 379 if (mfc_unres_queue == NULL)
1da177e4
LT
380 goto out;
381
382 now = jiffies;
383 expires = 10*HZ;
384 cp = &mfc_unres_queue;
385
386 while ((c=*cp) != NULL) {
387 if (time_after(c->mfc_un.unres.expires, now)) {
388 unsigned long interval = c->mfc_un.unres.expires - now;
389 if (interval < expires)
390 expires = interval;
391 cp = &c->next;
392 continue;
393 }
394
395 *cp = c->next;
396
397 ipmr_destroy_unres(c);
398 }
399
1e8fb3b6 400 if (mfc_unres_queue != NULL)
1da177e4
LT
401 mod_timer(&ipmr_expire_timer, jiffies + expires);
402
403out:
404 spin_unlock(&mfc_unres_lock);
405}
406
407/* Fill oifs list. It is called under write locked mrt_lock. */
408
d1b04c08 409static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
1da177e4
LT
410{
411 int vifi;
4feb88e5 412 struct net *net = mfc_net(cache);
1da177e4
LT
413
414 cache->mfc_un.res.minvif = MAXVIFS;
415 cache->mfc_un.res.maxvif = 0;
416 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417
4feb88e5
BT
418 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419 if (VIF_EXISTS(net, vifi) &&
cf958ae3 420 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
421 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422 if (cache->mfc_un.res.minvif > vifi)
423 cache->mfc_un.res.minvif = vifi;
424 if (cache->mfc_un.res.maxvif <= vifi)
425 cache->mfc_un.res.maxvif = vifi + 1;
426 }
427 }
428}
429
4feb88e5 430static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
1da177e4
LT
431{
432 int vifi = vifc->vifc_vifi;
4feb88e5 433 struct vif_device *v = &net->ipv4.vif_table[vifi];
1da177e4
LT
434 struct net_device *dev;
435 struct in_device *in_dev;
d607032d 436 int err;
1da177e4
LT
437
438 /* Is vif busy ? */
4feb88e5 439 if (VIF_EXISTS(net, vifi))
1da177e4
LT
440 return -EADDRINUSE;
441
442 switch (vifc->vifc_flags) {
443#ifdef CONFIG_IP_PIMSM
444 case VIFF_REGISTER:
445 /*
446 * Special Purpose VIF in PIM
447 * All the packets will be sent to the daemon
448 */
4feb88e5 449 if (net->ipv4.mroute_reg_vif_num >= 0)
1da177e4
LT
450 return -EADDRINUSE;
451 dev = ipmr_reg_vif();
452 if (!dev)
453 return -ENOBUFS;
d607032d
WC
454 err = dev_set_allmulti(dev, 1);
455 if (err) {
456 unregister_netdevice(dev);
7dc00c82 457 dev_put(dev);
d607032d
WC
458 return err;
459 }
1da177e4
LT
460 break;
461#endif
e905a9ed 462 case VIFF_TUNNEL:
4feb88e5 463 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
464 if (!dev)
465 return -ENOBUFS;
d607032d
WC
466 err = dev_set_allmulti(dev, 1);
467 if (err) {
468 ipmr_del_tunnel(dev, vifc);
7dc00c82 469 dev_put(dev);
d607032d
WC
470 return err;
471 }
1da177e4
LT
472 break;
473 case 0:
4feb88e5 474 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
1da177e4
LT
475 if (!dev)
476 return -EADDRNOTAVAIL;
d607032d 477 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
478 if (err) {
479 dev_put(dev);
d607032d 480 return err;
7dc00c82 481 }
1da177e4
LT
482 break;
483 default:
484 return -EINVAL;
485 }
486
e5ed6399 487 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1da177e4 488 return -EADDRNOTAVAIL;
42f811b8 489 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
490 ip_rt_multicast_event(in_dev);
491
492 /*
493 * Fill in the VIF structures
494 */
c354e124
JK
495 v->rate_limit = vifc->vifc_rate_limit;
496 v->local = vifc->vifc_lcl_addr.s_addr;
497 v->remote = vifc->vifc_rmt_addr.s_addr;
498 v->flags = vifc->vifc_flags;
1da177e4
LT
499 if (!mrtsock)
500 v->flags |= VIFF_STATIC;
c354e124 501 v->threshold = vifc->vifc_threshold;
1da177e4
LT
502 v->bytes_in = 0;
503 v->bytes_out = 0;
504 v->pkt_in = 0;
505 v->pkt_out = 0;
506 v->link = dev->ifindex;
507 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
508 v->link = dev->iflink;
509
510 /* And finish update writing critical data */
511 write_lock_bh(&mrt_lock);
c354e124 512 v->dev = dev;
1da177e4
LT
513#ifdef CONFIG_IP_PIMSM
514 if (v->flags&VIFF_REGISTER)
4feb88e5 515 net->ipv4.mroute_reg_vif_num = vifi;
1da177e4 516#endif
4feb88e5
BT
517 if (vifi+1 > net->ipv4.maxvif)
518 net->ipv4.maxvif = vifi+1;
1da177e4
LT
519 write_unlock_bh(&mrt_lock);
520 return 0;
521}
522
4feb88e5
BT
523static struct mfc_cache *ipmr_cache_find(struct net *net,
524 __be32 origin,
525 __be32 mcastgrp)
1da177e4 526{
c354e124 527 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
528 struct mfc_cache *c;
529
4feb88e5 530 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
1da177e4
LT
531 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
532 break;
533 }
534 return c;
535}
536
537/*
538 * Allocate a multicast cache entry
539 */
5c0a66f5 540static struct mfc_cache *ipmr_cache_alloc(struct net *net)
1da177e4 541{
c354e124
JK
542 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
543 if (c == NULL)
1da177e4 544 return NULL;
1da177e4 545 c->mfc_un.res.minvif = MAXVIFS;
5c0a66f5 546 mfc_net_set(c, net);
1da177e4
LT
547 return c;
548}
549
5c0a66f5 550static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
1da177e4 551{
c354e124
JK
552 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
553 if (c == NULL)
1da177e4 554 return NULL;
1da177e4
LT
555 skb_queue_head_init(&c->mfc_un.unres.unresolved);
556 c->mfc_un.unres.expires = jiffies + 10*HZ;
5c0a66f5 557 mfc_net_set(c, net);
1da177e4
LT
558 return c;
559}
560
561/*
562 * A cache entry has gone into a resolved state from queued
563 */
e905a9ed 564
1da177e4
LT
565static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
566{
567 struct sk_buff *skb;
9ef1d4c7 568 struct nlmsgerr *e;
1da177e4
LT
569
570 /*
571 * Play the pending entries through our router
572 */
573
c354e124 574 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 575 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
576 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
577
578 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
579 nlh->nlmsg_len = (skb_tail_pointer(skb) -
580 (u8 *)nlh);
1da177e4
LT
581 } else {
582 nlh->nlmsg_type = NLMSG_ERROR;
583 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
584 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
585 e = NLMSG_DATA(nlh);
586 e->error = -EMSGSIZE;
587 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 588 }
2942e900 589
4feb88e5 590 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
1da177e4
LT
591 } else
592 ip_mr_forward(skb, c, 0);
593 }
594}
595
596/*
597 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
598 * expects the following bizarre scheme.
599 *
600 * Called under mrt_lock.
601 */
e905a9ed 602
4feb88e5
BT
603static int ipmr_cache_report(struct net *net,
604 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
605{
606 struct sk_buff *skb;
c9bdd4b5 607 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
608 struct igmphdr *igmp;
609 struct igmpmsg *msg;
610 int ret;
611
612#ifdef CONFIG_IP_PIMSM
613 if (assert == IGMPMSG_WHOLEPKT)
614 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
615 else
616#endif
617 skb = alloc_skb(128, GFP_ATOMIC);
618
132adf54 619 if (!skb)
1da177e4
LT
620 return -ENOBUFS;
621
622#ifdef CONFIG_IP_PIMSM
623 if (assert == IGMPMSG_WHOLEPKT) {
624 /* Ugly, but we have no choice with this interface.
625 Duplicate old header, fix ihl, length etc.
626 And all this only to mangle msg->im_msgtype and
627 to set msg->im_mbz to "mbz" :-)
628 */
878c8145
ACM
629 skb_push(skb, sizeof(struct iphdr));
630 skb_reset_network_header(skb);
badff6d0 631 skb_reset_transport_header(skb);
0272ffc4 632 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 633 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
634 msg->im_msgtype = IGMPMSG_WHOLEPKT;
635 msg->im_mbz = 0;
4feb88e5 636 msg->im_vif = net->ipv4.mroute_reg_vif_num;
eddc9ec5
ACM
637 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
638 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
639 sizeof(struct iphdr));
e905a9ed 640 } else
1da177e4 641#endif
e905a9ed
YH
642 {
643
1da177e4
LT
644 /*
645 * Copy the IP header
646 */
647
27a884dc 648 skb->network_header = skb->tail;
ddc7b8e3 649 skb_put(skb, ihl);
27d7ff46 650 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
651 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
652 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 653 msg->im_vif = vifi;
adf30907 654 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
655
656 /*
657 * Add our header
658 */
659
c354e124 660 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
661 igmp->type =
662 msg->im_msgtype = assert;
663 igmp->code = 0;
eddc9ec5 664 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 665 skb->transport_header = skb->network_header;
e905a9ed 666 }
1da177e4 667
4feb88e5 668 if (net->ipv4.mroute_sk == NULL) {
1da177e4
LT
669 kfree_skb(skb);
670 return -EINVAL;
671 }
672
673 /*
674 * Deliver to mrouted
675 */
4feb88e5 676 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
70a269e6 677 if (ret < 0) {
1da177e4
LT
678 if (net_ratelimit())
679 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
680 kfree_skb(skb);
681 }
682
683 return ret;
684}
685
686/*
687 * Queue a packet for resolution. It gets locked cache entry!
688 */
e905a9ed 689
1da177e4 690static int
4feb88e5 691ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
1da177e4
LT
692{
693 int err;
694 struct mfc_cache *c;
eddc9ec5 695 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
696
697 spin_lock_bh(&mfc_unres_lock);
698 for (c=mfc_unres_queue; c; c=c->next) {
4feb88e5 699 if (net_eq(mfc_net(c), net) &&
1e8fb3b6 700 c->mfc_mcastgrp == iph->daddr &&
eddc9ec5 701 c->mfc_origin == iph->saddr)
1da177e4
LT
702 break;
703 }
704
705 if (c == NULL) {
706 /*
707 * Create a new entry if allowable
708 */
709
4feb88e5
BT
710 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
711 (c = ipmr_cache_alloc_unres(net)) == NULL) {
1da177e4
LT
712 spin_unlock_bh(&mfc_unres_lock);
713
714 kfree_skb(skb);
715 return -ENOBUFS;
716 }
717
718 /*
719 * Fill in the new cache entry
720 */
eddc9ec5
ACM
721 c->mfc_parent = -1;
722 c->mfc_origin = iph->saddr;
723 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
724
725 /*
726 * Reflect first query at mrouted.
727 */
4feb88e5
BT
728 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
729 if (err < 0) {
e905a9ed 730 /* If the report failed throw the cache entry
1da177e4
LT
731 out - Brad Parker
732 */
733 spin_unlock_bh(&mfc_unres_lock);
734
5c0a66f5 735 ipmr_cache_free(c);
1da177e4
LT
736 kfree_skb(skb);
737 return err;
738 }
739
4feb88e5 740 atomic_inc(&net->ipv4.cache_resolve_queue_len);
1da177e4
LT
741 c->next = mfc_unres_queue;
742 mfc_unres_queue = c;
743
744 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
745 }
746
747 /*
748 * See if we can append the packet
749 */
750 if (c->mfc_un.unres.unresolved.qlen>3) {
751 kfree_skb(skb);
752 err = -ENOBUFS;
753 } else {
c354e124 754 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
755 err = 0;
756 }
757
758 spin_unlock_bh(&mfc_unres_lock);
759 return err;
760}
761
762/*
763 * MFC cache manipulation by user space mroute daemon
764 */
765
4feb88e5 766static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
1da177e4
LT
767{
768 int line;
769 struct mfc_cache *c, **cp;
770
c354e124 771 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 772
4feb88e5 773 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 774 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
775 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
776 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
777 write_lock_bh(&mrt_lock);
778 *cp = c->next;
779 write_unlock_bh(&mrt_lock);
780
5c0a66f5 781 ipmr_cache_free(c);
1da177e4
LT
782 return 0;
783 }
784 }
785 return -ENOENT;
786}
787
4feb88e5 788static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
1da177e4
LT
789{
790 int line;
791 struct mfc_cache *uc, *c, **cp;
792
c354e124 793 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 794
4feb88e5 795 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 796 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
797 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
798 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
799 break;
800 }
801
802 if (c != NULL) {
803 write_lock_bh(&mrt_lock);
804 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 805 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
806 if (!mrtsock)
807 c->mfc_flags |= MFC_STATIC;
808 write_unlock_bh(&mrt_lock);
809 return 0;
810 }
811
f97c1e0c 812 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
813 return -EINVAL;
814
4feb88e5 815 c = ipmr_cache_alloc(net);
c354e124 816 if (c == NULL)
1da177e4
LT
817 return -ENOMEM;
818
c354e124
JK
819 c->mfc_origin = mfc->mfcc_origin.s_addr;
820 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
821 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 822 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
823 if (!mrtsock)
824 c->mfc_flags |= MFC_STATIC;
825
826 write_lock_bh(&mrt_lock);
4feb88e5
BT
827 c->next = net->ipv4.mfc_cache_array[line];
828 net->ipv4.mfc_cache_array[line] = c;
1da177e4
LT
829 write_unlock_bh(&mrt_lock);
830
831 /*
832 * Check to see if we resolved a queued list. If so we
833 * need to send on the frames and tidy up.
834 */
835 spin_lock_bh(&mfc_unres_lock);
836 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
837 cp = &uc->next) {
4feb88e5 838 if (net_eq(mfc_net(uc), net) &&
1e8fb3b6 839 uc->mfc_origin == c->mfc_origin &&
1da177e4
LT
840 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
841 *cp = uc->next;
4feb88e5 842 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4
LT
843 break;
844 }
845 }
1e8fb3b6
BT
846 if (mfc_unres_queue == NULL)
847 del_timer(&ipmr_expire_timer);
1da177e4
LT
848 spin_unlock_bh(&mfc_unres_lock);
849
850 if (uc) {
851 ipmr_cache_resolve(uc, c);
5c0a66f5 852 ipmr_cache_free(uc);
1da177e4
LT
853 }
854 return 0;
855}
856
857/*
858 * Close the multicast socket, and clear the vif tables etc
859 */
e905a9ed 860
4feb88e5 861static void mroute_clean_tables(struct net *net)
1da177e4
LT
862{
863 int i;
e905a9ed 864
1da177e4
LT
865 /*
866 * Shut down all active vif entries
867 */
4feb88e5
BT
868 for (i = 0; i < net->ipv4.maxvif; i++) {
869 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
870 vif_delete(net, i, 0);
1da177e4
LT
871 }
872
873 /*
874 * Wipe the cache
875 */
c354e124 876 for (i=0; i<MFC_LINES; i++) {
1da177e4
LT
877 struct mfc_cache *c, **cp;
878
4feb88e5 879 cp = &net->ipv4.mfc_cache_array[i];
1da177e4
LT
880 while ((c = *cp) != NULL) {
881 if (c->mfc_flags&MFC_STATIC) {
882 cp = &c->next;
883 continue;
884 }
885 write_lock_bh(&mrt_lock);
886 *cp = c->next;
887 write_unlock_bh(&mrt_lock);
888
5c0a66f5 889 ipmr_cache_free(c);
1da177e4
LT
890 }
891 }
892
4feb88e5 893 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
1e8fb3b6 894 struct mfc_cache *c, **cp;
1da177e4
LT
895
896 spin_lock_bh(&mfc_unres_lock);
1e8fb3b6
BT
897 cp = &mfc_unres_queue;
898 while ((c = *cp) != NULL) {
4feb88e5 899 if (!net_eq(mfc_net(c), net)) {
1e8fb3b6
BT
900 cp = &c->next;
901 continue;
902 }
903 *cp = c->next;
1da177e4
LT
904
905 ipmr_destroy_unres(c);
1da177e4
LT
906 }
907 spin_unlock_bh(&mfc_unres_lock);
908 }
909}
910
911static void mrtsock_destruct(struct sock *sk)
912{
4feb88e5
BT
913 struct net *net = sock_net(sk);
914
1da177e4 915 rtnl_lock();
4feb88e5
BT
916 if (sk == net->ipv4.mroute_sk) {
917 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4
LT
918
919 write_lock_bh(&mrt_lock);
4feb88e5 920 net->ipv4.mroute_sk = NULL;
1da177e4
LT
921 write_unlock_bh(&mrt_lock);
922
4feb88e5 923 mroute_clean_tables(net);
1da177e4
LT
924 }
925 rtnl_unlock();
926}
927
928/*
929 * Socket options and virtual interface manipulation. The whole
930 * virtual interface system is a complete heap, but unfortunately
931 * that's how BSD mrouted happens to think. Maybe one day with a proper
932 * MOSPF/PIM router set up we can clean this up.
933 */
e905a9ed 934
c354e124 935int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1da177e4
LT
936{
937 int ret;
938 struct vifctl vif;
939 struct mfcctl mfc;
4feb88e5 940 struct net *net = sock_net(sk);
e905a9ed 941
132adf54 942 if (optname != MRT_INIT) {
4feb88e5 943 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
944 return -EACCES;
945 }
946
132adf54
SH
947 switch (optname) {
948 case MRT_INIT:
949 if (sk->sk_type != SOCK_RAW ||
950 inet_sk(sk)->num != IPPROTO_IGMP)
951 return -EOPNOTSUPP;
c354e124 952 if (optlen != sizeof(int))
132adf54 953 return -ENOPROTOOPT;
1da177e4 954
132adf54 955 rtnl_lock();
4feb88e5 956 if (net->ipv4.mroute_sk) {
1da177e4 957 rtnl_unlock();
132adf54
SH
958 return -EADDRINUSE;
959 }
960
961 ret = ip_ra_control(sk, 1, mrtsock_destruct);
962 if (ret == 0) {
963 write_lock_bh(&mrt_lock);
4feb88e5 964 net->ipv4.mroute_sk = sk;
132adf54
SH
965 write_unlock_bh(&mrt_lock);
966
4feb88e5 967 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
968 }
969 rtnl_unlock();
970 return ret;
971 case MRT_DONE:
4feb88e5 972 if (sk != net->ipv4.mroute_sk)
132adf54
SH
973 return -EACCES;
974 return ip_ra_control(sk, 0, NULL);
975 case MRT_ADD_VIF:
976 case MRT_DEL_VIF:
c354e124 977 if (optlen != sizeof(vif))
132adf54 978 return -EINVAL;
c354e124 979 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
980 return -EFAULT;
981 if (vif.vifc_vifi >= MAXVIFS)
982 return -ENFILE;
983 rtnl_lock();
c354e124 984 if (optname == MRT_ADD_VIF) {
4feb88e5 985 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
132adf54 986 } else {
4feb88e5 987 ret = vif_delete(net, vif.vifc_vifi, 0);
132adf54
SH
988 }
989 rtnl_unlock();
990 return ret;
1da177e4
LT
991
992 /*
993 * Manipulate the forwarding caches. These live
994 * in a sort of kernel/user symbiosis.
995 */
132adf54
SH
996 case MRT_ADD_MFC:
997 case MRT_DEL_MFC:
c354e124 998 if (optlen != sizeof(mfc))
132adf54 999 return -EINVAL;
c354e124 1000 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1001 return -EFAULT;
1002 rtnl_lock();
c354e124 1003 if (optname == MRT_DEL_MFC)
4feb88e5 1004 ret = ipmr_mfc_delete(net, &mfc);
132adf54 1005 else
4feb88e5 1006 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
132adf54
SH
1007 rtnl_unlock();
1008 return ret;
1da177e4
LT
1009 /*
1010 * Control PIM assert.
1011 */
132adf54
SH
1012 case MRT_ASSERT:
1013 {
1014 int v;
1015 if (get_user(v,(int __user *)optval))
1016 return -EFAULT;
4feb88e5 1017 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1018 return 0;
1019 }
1da177e4 1020#ifdef CONFIG_IP_PIMSM
132adf54
SH
1021 case MRT_PIM:
1022 {
ba93ef74
SH
1023 int v;
1024
132adf54
SH
1025 if (get_user(v,(int __user *)optval))
1026 return -EFAULT;
ba93ef74
SH
1027 v = (v) ? 1 : 0;
1028
132adf54
SH
1029 rtnl_lock();
1030 ret = 0;
4feb88e5
BT
1031 if (v != net->ipv4.mroute_do_pim) {
1032 net->ipv4.mroute_do_pim = v;
1033 net->ipv4.mroute_do_assert = v;
1da177e4 1034#ifdef CONFIG_IP_PIMSM_V2
4feb88e5 1035 if (net->ipv4.mroute_do_pim)
132adf54
SH
1036 ret = inet_add_protocol(&pim_protocol,
1037 IPPROTO_PIM);
1038 else
1039 ret = inet_del_protocol(&pim_protocol,
1040 IPPROTO_PIM);
1041 if (ret < 0)
1042 ret = -EAGAIN;
1da177e4 1043#endif
1da177e4 1044 }
132adf54
SH
1045 rtnl_unlock();
1046 return ret;
1047 }
1da177e4 1048#endif
132adf54
SH
1049 /*
1050 * Spurious command, or MRT_VERSION which you cannot
1051 * set.
1052 */
1053 default:
1054 return -ENOPROTOOPT;
1da177e4
LT
1055 }
1056}
1057
1058/*
1059 * Getsock opt support for the multicast routing system.
1060 */
e905a9ed 1061
c354e124 1062int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1063{
1064 int olr;
1065 int val;
4feb88e5 1066 struct net *net = sock_net(sk);
1da177e4 1067
c354e124 1068 if (optname != MRT_VERSION &&
1da177e4
LT
1069#ifdef CONFIG_IP_PIMSM
1070 optname!=MRT_PIM &&
1071#endif
1072 optname!=MRT_ASSERT)
1073 return -ENOPROTOOPT;
1074
1075 if (get_user(olr, optlen))
1076 return -EFAULT;
1077
1078 olr = min_t(unsigned int, olr, sizeof(int));
1079 if (olr < 0)
1080 return -EINVAL;
e905a9ed 1081
c354e124 1082 if (put_user(olr, optlen))
1da177e4 1083 return -EFAULT;
c354e124
JK
1084 if (optname == MRT_VERSION)
1085 val = 0x0305;
1da177e4 1086#ifdef CONFIG_IP_PIMSM
c354e124 1087 else if (optname == MRT_PIM)
4feb88e5 1088 val = net->ipv4.mroute_do_pim;
1da177e4
LT
1089#endif
1090 else
4feb88e5 1091 val = net->ipv4.mroute_do_assert;
c354e124 1092 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1093 return -EFAULT;
1094 return 0;
1095}
1096
1097/*
1098 * The IP multicast ioctl support routines.
1099 */
e905a9ed 1100
1da177e4
LT
1101int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1102{
1103 struct sioc_sg_req sr;
1104 struct sioc_vif_req vr;
1105 struct vif_device *vif;
1106 struct mfc_cache *c;
4feb88e5 1107 struct net *net = sock_net(sk);
e905a9ed 1108
132adf54
SH
1109 switch (cmd) {
1110 case SIOCGETVIFCNT:
c354e124 1111 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1112 return -EFAULT;
4feb88e5 1113 if (vr.vifi >= net->ipv4.maxvif)
132adf54
SH
1114 return -EINVAL;
1115 read_lock(&mrt_lock);
4feb88e5
BT
1116 vif = &net->ipv4.vif_table[vr.vifi];
1117 if (VIF_EXISTS(net, vr.vifi)) {
c354e124
JK
1118 vr.icount = vif->pkt_in;
1119 vr.ocount = vif->pkt_out;
1120 vr.ibytes = vif->bytes_in;
1121 vr.obytes = vif->bytes_out;
1da177e4 1122 read_unlock(&mrt_lock);
1da177e4 1123
c354e124 1124 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1125 return -EFAULT;
1126 return 0;
1127 }
1128 read_unlock(&mrt_lock);
1129 return -EADDRNOTAVAIL;
1130 case SIOCGETSGCNT:
c354e124 1131 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1132 return -EFAULT;
1133
1134 read_lock(&mrt_lock);
4feb88e5 1135 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1136 if (c) {
1137 sr.pktcnt = c->mfc_un.res.pkt;
1138 sr.bytecnt = c->mfc_un.res.bytes;
1139 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1140 read_unlock(&mrt_lock);
132adf54 1141
c354e124 1142 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1143 return -EFAULT;
1144 return 0;
1145 }
1146 read_unlock(&mrt_lock);
1147 return -EADDRNOTAVAIL;
1148 default:
1149 return -ENOIOCTLCMD;
1da177e4
LT
1150 }
1151}
1152
1153
1154static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1155{
e9dc8653 1156 struct net_device *dev = ptr;
4feb88e5 1157 struct net *net = dev_net(dev);
1da177e4
LT
1158 struct vif_device *v;
1159 int ct;
e9dc8653 1160
4feb88e5 1161 if (!net_eq(dev_net(dev), net))
e9dc8653
EB
1162 return NOTIFY_DONE;
1163
1da177e4
LT
1164 if (event != NETDEV_UNREGISTER)
1165 return NOTIFY_DONE;
4feb88e5
BT
1166 v = &net->ipv4.vif_table[0];
1167 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
c354e124 1168 if (v->dev == dev)
4feb88e5 1169 vif_delete(net, ct, 1);
1da177e4
LT
1170 }
1171 return NOTIFY_DONE;
1172}
1173
1174
c354e124 1175static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1176 .notifier_call = ipmr_device_event,
1177};
1178
1179/*
1180 * Encapsulate a packet by attaching a valid IPIP header to it.
1181 * This avoids tunnel drivers and other mess and gives us the speed so
1182 * important for multicast video.
1183 */
e905a9ed 1184
114c7844 1185static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1186{
8856dfa3 1187 struct iphdr *iph;
eddc9ec5 1188 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1189
1190 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1191 skb->transport_header = skb->network_header;
8856dfa3 1192 skb_reset_network_header(skb);
eddc9ec5 1193 iph = ip_hdr(skb);
1da177e4
LT
1194
1195 iph->version = 4;
e023dd64
ACM
1196 iph->tos = old_iph->tos;
1197 iph->ttl = old_iph->ttl;
1da177e4
LT
1198 iph->frag_off = 0;
1199 iph->daddr = daddr;
1200 iph->saddr = saddr;
1201 iph->protocol = IPPROTO_IPIP;
1202 iph->ihl = 5;
1203 iph->tot_len = htons(skb->len);
adf30907 1204 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1205 ip_send_check(iph);
1206
1da177e4
LT
1207 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1208 nf_reset(skb);
1209}
1210
1211static inline int ipmr_forward_finish(struct sk_buff *skb)
1212{
1213 struct ip_options * opt = &(IPCB(skb)->opt);
1214
adf30907 1215 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1216
1217 if (unlikely(opt->optlen))
1218 ip_forward_options(skb);
1219
1220 return dst_output(skb);
1221}
1222
1223/*
1224 * Processing handlers for ipmr_forward
1225 */
1226
1227static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1228{
4feb88e5 1229 struct net *net = mfc_net(c);
eddc9ec5 1230 const struct iphdr *iph = ip_hdr(skb);
4feb88e5 1231 struct vif_device *vif = &net->ipv4.vif_table[vifi];
1da177e4
LT
1232 struct net_device *dev;
1233 struct rtable *rt;
1234 int encap = 0;
1235
1236 if (vif->dev == NULL)
1237 goto out_free;
1238
1239#ifdef CONFIG_IP_PIMSM
1240 if (vif->flags & VIFF_REGISTER) {
1241 vif->pkt_out++;
c354e124 1242 vif->bytes_out += skb->len;
cf3677ae
PE
1243 vif->dev->stats.tx_bytes += skb->len;
1244 vif->dev->stats.tx_packets++;
4feb88e5 1245 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1246 goto out_free;
1da177e4
LT
1247 }
1248#endif
1249
1250 if (vif->flags&VIFF_TUNNEL) {
1251 struct flowi fl = { .oif = vif->link,
1252 .nl_u = { .ip4_u =
1253 { .daddr = vif->remote,
1254 .saddr = vif->local,
1255 .tos = RT_TOS(iph->tos) } },
1256 .proto = IPPROTO_IPIP };
4feb88e5 1257 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1258 goto out_free;
1259 encap = sizeof(struct iphdr);
1260 } else {
1261 struct flowi fl = { .oif = vif->link,
1262 .nl_u = { .ip4_u =
1263 { .daddr = iph->daddr,
1264 .tos = RT_TOS(iph->tos) } },
1265 .proto = IPPROTO_IPIP };
4feb88e5 1266 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1267 goto out_free;
1268 }
1269
1270 dev = rt->u.dst.dev;
1271
1272 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1273 /* Do not fragment multicasts. Alas, IPv4 does not
1274 allow to send ICMP, so that packets will disappear
1275 to blackhole.
1276 */
1277
7c73a6fa 1278 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1279 ip_rt_put(rt);
1280 goto out_free;
1281 }
1282
1283 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1284
1285 if (skb_cow(skb, encap)) {
e905a9ed 1286 ip_rt_put(rt);
1da177e4
LT
1287 goto out_free;
1288 }
1289
1290 vif->pkt_out++;
c354e124 1291 vif->bytes_out += skb->len;
1da177e4 1292
adf30907
ED
1293 skb_dst_drop(skb);
1294 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1295 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1296
1297 /* FIXME: forward and output firewalls used to be called here.
1298 * What do we do with netfilter? -- RR */
1299 if (vif->flags & VIFF_TUNNEL) {
1300 ip_encap(skb, vif->local, vif->remote);
1301 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1302 vif->dev->stats.tx_packets++;
1303 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1304 }
1305
1306 IPCB(skb)->flags |= IPSKB_FORWARDED;
1307
1308 /*
1309 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1310 * not only before forwarding, but after forwarding on all output
1311 * interfaces. It is clear, if mrouter runs a multicasting
1312 * program, it should receive packets not depending to what interface
1313 * program is joined.
1314 * If we will not make it, the program will have to join on all
1315 * interfaces. On the other hand, multihoming host (or router, but
1316 * not mrouter) cannot join to more than one interface - it will
1317 * result in receiving multiple packets.
1318 */
6e23ae2a 1319 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1320 ipmr_forward_finish);
1321 return;
1322
1323out_free:
1324 kfree_skb(skb);
1325 return;
1326}
1327
1328static int ipmr_find_vif(struct net_device *dev)
1329{
4feb88e5 1330 struct net *net = dev_net(dev);
1da177e4 1331 int ct;
4feb88e5
BT
1332 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1333 if (net->ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1334 break;
1335 }
1336 return ct;
1337}
1338
1339/* "local" means that we should preserve one skb (for local delivery) */
1340
1341static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1342{
1343 int psend = -1;
1344 int vif, ct;
4feb88e5 1345 struct net *net = mfc_net(cache);
1da177e4
LT
1346
1347 vif = cache->mfc_parent;
1348 cache->mfc_un.res.pkt++;
1349 cache->mfc_un.res.bytes += skb->len;
1350
1351 /*
1352 * Wrong interface: drop packet and (maybe) send PIM assert.
1353 */
4feb88e5 1354 if (net->ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1355 int true_vifi;
1356
511c3f92 1357 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1358 /* It is our own packet, looped back.
1359 Very complicated situation...
1360
1361 The best workaround until routing daemons will be
1362 fixed is not to redistribute packet, if it was
1363 send through wrong interface. It means, that
1364 multicast applications WILL NOT work for
1365 (S,G), which have default multicast route pointing
1366 to wrong oif. In any case, it is not a good
1367 idea to use multicasting applications on router.
1368 */
1369 goto dont_forward;
1370 }
1371
1372 cache->mfc_un.res.wrong_if++;
1373 true_vifi = ipmr_find_vif(skb->dev);
1374
4feb88e5 1375 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1da177e4
LT
1376 /* pimsm uses asserts, when switching from RPT to SPT,
1377 so that we cannot check that packet arrived on an oif.
1378 It is bad, but otherwise we would need to move pretty
1379 large chunk of pimd to kernel. Ough... --ANK
1380 */
4feb88e5 1381 (net->ipv4.mroute_do_pim ||
6f9374a9 1382 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1383 time_after(jiffies,
1da177e4
LT
1384 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1385 cache->mfc_un.res.last_assert = jiffies;
4feb88e5 1386 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1387 }
1388 goto dont_forward;
1389 }
1390
4feb88e5
BT
1391 net->ipv4.vif_table[vif].pkt_in++;
1392 net->ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1393
1394 /*
1395 * Forward the frame
1396 */
1397 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1398 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1399 if (psend != -1) {
1400 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1401 if (skb2)
1402 ipmr_queue_xmit(skb2, cache, psend);
1403 }
c354e124 1404 psend = ct;
1da177e4
LT
1405 }
1406 }
1407 if (psend != -1) {
1408 if (local) {
1409 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1410 if (skb2)
1411 ipmr_queue_xmit(skb2, cache, psend);
1412 } else {
1413 ipmr_queue_xmit(skb, cache, psend);
1414 return 0;
1415 }
1416 }
1417
1418dont_forward:
1419 if (!local)
1420 kfree_skb(skb);
1421 return 0;
1422}
1423
1424
1425/*
1426 * Multicast packets for forwarding arrive here
1427 */
1428
1429int ip_mr_input(struct sk_buff *skb)
1430{
1431 struct mfc_cache *cache;
4feb88e5 1432 struct net *net = dev_net(skb->dev);
511c3f92 1433 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1da177e4
LT
1434
1435 /* Packet is looped back after forward, it should not be
1436 forwarded second time, but still can be delivered locally.
1437 */
1438 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1439 goto dont_forward;
1440
1441 if (!local) {
1442 if (IPCB(skb)->opt.router_alert) {
1443 if (ip_call_ra_chain(skb))
1444 return 0;
eddc9ec5 1445 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1446 /* IGMPv1 (and broken IGMPv2 implementations sort of
1447 Cisco IOS <= 11.2(8)) do not put router alert
1448 option to IGMP packets destined to routable
1449 groups. It is very bad, because it means
1450 that we can forward NO IGMP messages.
1451 */
1452 read_lock(&mrt_lock);
4feb88e5 1453 if (net->ipv4.mroute_sk) {
2715bcf9 1454 nf_reset(skb);
4feb88e5 1455 raw_rcv(net->ipv4.mroute_sk, skb);
1da177e4
LT
1456 read_unlock(&mrt_lock);
1457 return 0;
1458 }
1459 read_unlock(&mrt_lock);
1460 }
1461 }
1462
1463 read_lock(&mrt_lock);
4feb88e5 1464 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1465
1466 /*
1467 * No usable cache entry
1468 */
c354e124 1469 if (cache == NULL) {
1da177e4
LT
1470 int vif;
1471
1472 if (local) {
1473 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1474 ip_local_deliver(skb);
1475 if (skb2 == NULL) {
1476 read_unlock(&mrt_lock);
1477 return -ENOBUFS;
1478 }
1479 skb = skb2;
1480 }
1481
1482 vif = ipmr_find_vif(skb->dev);
1483 if (vif >= 0) {
4feb88e5 1484 int err = ipmr_cache_unresolved(net, vif, skb);
1da177e4
LT
1485 read_unlock(&mrt_lock);
1486
1487 return err;
1488 }
1489 read_unlock(&mrt_lock);
1490 kfree_skb(skb);
1491 return -ENODEV;
1492 }
1493
1494 ip_mr_forward(skb, cache, local);
1495
1496 read_unlock(&mrt_lock);
1497
1498 if (local)
1499 return ip_local_deliver(skb);
1500
1501 return 0;
1502
1503dont_forward:
1504 if (local)
1505 return ip_local_deliver(skb);
1506 kfree_skb(skb);
1507 return 0;
1508}
1509
b1879204
IJ
1510#ifdef CONFIG_IP_PIMSM
1511static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1512{
b1879204
IJ
1513 struct net_device *reg_dev = NULL;
1514 struct iphdr *encap;
4feb88e5 1515 struct net *net = dev_net(skb->dev);
1da177e4 1516
b1879204 1517 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1518 /*
1519 Check that:
1520 a. packet is really destinted to a multicast group
1521 b. packet is not a NULL-REGISTER
1522 c. packet is not truncated
1523 */
f97c1e0c 1524 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1525 encap->tot_len == 0 ||
b1879204
IJ
1526 ntohs(encap->tot_len) + pimlen > skb->len)
1527 return 1;
1da177e4
LT
1528
1529 read_lock(&mrt_lock);
4feb88e5
BT
1530 if (net->ipv4.mroute_reg_vif_num >= 0)
1531 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1da177e4
LT
1532 if (reg_dev)
1533 dev_hold(reg_dev);
1534 read_unlock(&mrt_lock);
1535
e905a9ed 1536 if (reg_dev == NULL)
b1879204 1537 return 1;
1da177e4 1538
b0e380b1 1539 skb->mac_header = skb->network_header;
1da177e4 1540 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1541 skb_reset_network_header(skb);
1da177e4 1542 skb->dev = reg_dev;
1da177e4
LT
1543 skb->protocol = htons(ETH_P_IP);
1544 skb->ip_summed = 0;
1545 skb->pkt_type = PACKET_HOST;
adf30907 1546 skb_dst_drop(skb);
cf3677ae
PE
1547 reg_dev->stats.rx_bytes += skb->len;
1548 reg_dev->stats.rx_packets++;
1da177e4
LT
1549 nf_reset(skb);
1550 netif_rx(skb);
1551 dev_put(reg_dev);
b1879204 1552
1da177e4 1553 return 0;
b1879204
IJ
1554}
1555#endif
1556
1557#ifdef CONFIG_IP_PIMSM_V1
1558/*
1559 * Handle IGMP messages of PIMv1
1560 */
1561
1562int pim_rcv_v1(struct sk_buff * skb)
1563{
1564 struct igmphdr *pim;
4feb88e5 1565 struct net *net = dev_net(skb->dev);
b1879204
IJ
1566
1567 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1568 goto drop;
1569
1570 pim = igmp_hdr(skb);
1571
4feb88e5 1572 if (!net->ipv4.mroute_do_pim ||
b1879204
IJ
1573 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1574 goto drop;
1575
1576 if (__pim_rcv(skb, sizeof(*pim))) {
1577drop:
1578 kfree_skb(skb);
1579 }
1da177e4
LT
1580 return 0;
1581}
1582#endif
1583
1584#ifdef CONFIG_IP_PIMSM_V2
1585static int pim_rcv(struct sk_buff * skb)
1586{
1587 struct pimreghdr *pim;
1da177e4 1588
b1879204 1589 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1590 goto drop;
1591
9c70220b 1592 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1593 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1594 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1595 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1596 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1597 goto drop;
1598
b1879204
IJ
1599 if (__pim_rcv(skb, sizeof(*pim))) {
1600drop:
1601 kfree_skb(skb);
1602 }
1da177e4
LT
1603 return 0;
1604}
1605#endif
1606
1607static int
1608ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1609{
1610 int ct;
1611 struct rtnexthop *nhp;
4feb88e5
BT
1612 struct net *net = mfc_net(c);
1613 struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
27a884dc 1614 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1615 struct rtattr *mp_head;
1616
1617 if (dev)
1618 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1619
c354e124 1620 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1621
1622 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1623 if (c->mfc_un.res.ttls[ct] < 255) {
1624 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1625 goto rtattr_failure;
c354e124 1626 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1627 nhp->rtnh_flags = 0;
1628 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4feb88e5 1629 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1630 nhp->rtnh_len = sizeof(*nhp);
1631 }
1632 }
1633 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1634 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1635 rtm->rtm_type = RTN_MULTICAST;
1636 return 1;
1637
1638rtattr_failure:
dc5fc579 1639 nlmsg_trim(skb, b);
1da177e4
LT
1640 return -EMSGSIZE;
1641}
1642
4feb88e5
BT
1643int ipmr_get_route(struct net *net,
1644 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1645{
1646 int err;
1647 struct mfc_cache *cache;
511c3f92 1648 struct rtable *rt = skb_rtable(skb);
1da177e4
LT
1649
1650 read_lock(&mrt_lock);
4feb88e5 1651 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1da177e4 1652
c354e124 1653 if (cache == NULL) {
72287490 1654 struct sk_buff *skb2;
eddc9ec5 1655 struct iphdr *iph;
1da177e4
LT
1656 struct net_device *dev;
1657 int vif;
1658
1659 if (nowait) {
1660 read_unlock(&mrt_lock);
1661 return -EAGAIN;
1662 }
1663
1664 dev = skb->dev;
1665 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1666 read_unlock(&mrt_lock);
1667 return -ENODEV;
1668 }
72287490
AK
1669 skb2 = skb_clone(skb, GFP_ATOMIC);
1670 if (!skb2) {
1671 read_unlock(&mrt_lock);
1672 return -ENOMEM;
1673 }
1674
e2d1bca7
ACM
1675 skb_push(skb2, sizeof(struct iphdr));
1676 skb_reset_network_header(skb2);
eddc9ec5
ACM
1677 iph = ip_hdr(skb2);
1678 iph->ihl = sizeof(struct iphdr) >> 2;
1679 iph->saddr = rt->rt_src;
1680 iph->daddr = rt->rt_dst;
1681 iph->version = 0;
4feb88e5 1682 err = ipmr_cache_unresolved(net, vif, skb2);
1da177e4
LT
1683 read_unlock(&mrt_lock);
1684 return err;
1685 }
1686
1687 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1688 cache->mfc_flags |= MFC_NOTIFY;
1689 err = ipmr_fill_mroute(skb, cache, rtm);
1690 read_unlock(&mrt_lock);
1691 return err;
1692}
1693
e905a9ed 1694#ifdef CONFIG_PROC_FS
1da177e4
LT
1695/*
1696 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1697 */
1698struct ipmr_vif_iter {
f6bb4514 1699 struct seq_net_private p;
1da177e4
LT
1700 int ct;
1701};
1702
f6bb4514
BT
1703static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1704 struct ipmr_vif_iter *iter,
1da177e4
LT
1705 loff_t pos)
1706{
f6bb4514
BT
1707 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1708 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1709 continue;
e905a9ed 1710 if (pos-- == 0)
f6bb4514 1711 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1712 }
1713 return NULL;
1714}
1715
1716static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1717 __acquires(mrt_lock)
1da177e4 1718{
f6bb4514
BT
1719 struct net *net = seq_file_net(seq);
1720
1da177e4 1721 read_lock(&mrt_lock);
f6bb4514 1722 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1723 : SEQ_START_TOKEN;
1724}
1725
1726static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1727{
1728 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 1729 struct net *net = seq_file_net(seq);
1da177e4
LT
1730
1731 ++*pos;
1732 if (v == SEQ_START_TOKEN)
f6bb4514 1733 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 1734
f6bb4514
BT
1735 while (++iter->ct < net->ipv4.maxvif) {
1736 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1737 continue;
f6bb4514 1738 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1739 }
1740 return NULL;
1741}
1742
1743static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1744 __releases(mrt_lock)
1da177e4
LT
1745{
1746 read_unlock(&mrt_lock);
1747}
1748
1749static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1750{
f6bb4514
BT
1751 struct net *net = seq_file_net(seq);
1752
1da177e4 1753 if (v == SEQ_START_TOKEN) {
e905a9ed 1754 seq_puts(seq,
1da177e4
LT
1755 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1756 } else {
1757 const struct vif_device *vif = v;
1758 const char *name = vif->dev ? vif->dev->name : "none";
1759
1760 seq_printf(seq,
1761 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
f6bb4514 1762 vif - net->ipv4.vif_table,
e905a9ed 1763 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1764 vif->bytes_out, vif->pkt_out,
1765 vif->flags, vif->local, vif->remote);
1766 }
1767 return 0;
1768}
1769
f690808e 1770static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1771 .start = ipmr_vif_seq_start,
1772 .next = ipmr_vif_seq_next,
1773 .stop = ipmr_vif_seq_stop,
1774 .show = ipmr_vif_seq_show,
1775};
1776
1777static int ipmr_vif_open(struct inode *inode, struct file *file)
1778{
f6bb4514
BT
1779 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1780 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1781}
1782
9a32144e 1783static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1784 .owner = THIS_MODULE,
1785 .open = ipmr_vif_open,
1786 .read = seq_read,
1787 .llseek = seq_lseek,
f6bb4514 1788 .release = seq_release_net,
1da177e4
LT
1789};
1790
1791struct ipmr_mfc_iter {
f6bb4514 1792 struct seq_net_private p;
1da177e4
LT
1793 struct mfc_cache **cache;
1794 int ct;
1795};
1796
1797
f6bb4514
BT
1798static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1799 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4
LT
1800{
1801 struct mfc_cache *mfc;
1802
f6bb4514 1803 it->cache = net->ipv4.mfc_cache_array;
1da177e4 1804 read_lock(&mrt_lock);
e905a9ed 1805 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
f6bb4514 1806 for (mfc = net->ipv4.mfc_cache_array[it->ct];
2bb8b26c 1807 mfc; mfc = mfc->next)
e905a9ed 1808 if (pos-- == 0)
1da177e4
LT
1809 return mfc;
1810 read_unlock(&mrt_lock);
1811
1812 it->cache = &mfc_unres_queue;
1813 spin_lock_bh(&mfc_unres_lock);
132adf54 1814 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
f6bb4514
BT
1815 if (net_eq(mfc_net(mfc), net) &&
1816 pos-- == 0)
1da177e4
LT
1817 return mfc;
1818 spin_unlock_bh(&mfc_unres_lock);
1819
1820 it->cache = NULL;
1821 return NULL;
1822}
1823
1824
1825static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1826{
1827 struct ipmr_mfc_iter *it = seq->private;
f6bb4514
BT
1828 struct net *net = seq_file_net(seq);
1829
1da177e4
LT
1830 it->cache = NULL;
1831 it->ct = 0;
f6bb4514 1832 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1833 : SEQ_START_TOKEN;
1834}
1835
1836static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1837{
1838 struct mfc_cache *mfc = v;
1839 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1840 struct net *net = seq_file_net(seq);
1da177e4
LT
1841
1842 ++*pos;
1843
1844 if (v == SEQ_START_TOKEN)
f6bb4514 1845 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4
LT
1846
1847 if (mfc->next)
1848 return mfc->next;
e905a9ed
YH
1849
1850 if (it->cache == &mfc_unres_queue)
1da177e4
LT
1851 goto end_of_list;
1852
f6bb4514 1853 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1da177e4
LT
1854
1855 while (++it->ct < MFC_LINES) {
f6bb4514 1856 mfc = net->ipv4.mfc_cache_array[it->ct];
1da177e4
LT
1857 if (mfc)
1858 return mfc;
1859 }
1860
1861 /* exhausted cache_array, show unresolved */
1862 read_unlock(&mrt_lock);
1863 it->cache = &mfc_unres_queue;
1864 it->ct = 0;
e905a9ed 1865
1da177e4
LT
1866 spin_lock_bh(&mfc_unres_lock);
1867 mfc = mfc_unres_queue;
f6bb4514
BT
1868 while (mfc && !net_eq(mfc_net(mfc), net))
1869 mfc = mfc->next;
e905a9ed 1870 if (mfc)
1da177e4
LT
1871 return mfc;
1872
1873 end_of_list:
1874 spin_unlock_bh(&mfc_unres_lock);
1875 it->cache = NULL;
1876
1877 return NULL;
1878}
1879
1880static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1881{
1882 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1883 struct net *net = seq_file_net(seq);
1da177e4
LT
1884
1885 if (it->cache == &mfc_unres_queue)
1886 spin_unlock_bh(&mfc_unres_lock);
f6bb4514 1887 else if (it->cache == net->ipv4.mfc_cache_array)
1da177e4
LT
1888 read_unlock(&mrt_lock);
1889}
1890
1891static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1892{
1893 int n;
f6bb4514 1894 struct net *net = seq_file_net(seq);
1da177e4
LT
1895
1896 if (v == SEQ_START_TOKEN) {
e905a9ed 1897 seq_puts(seq,
1da177e4
LT
1898 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1899 } else {
1900 const struct mfc_cache *mfc = v;
1901 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1902
999890b2 1903 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1904 (unsigned long) mfc->mfc_mcastgrp,
1905 (unsigned long) mfc->mfc_origin,
1ea472e2 1906 mfc->mfc_parent);
1da177e4
LT
1907
1908 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
1909 seq_printf(seq, " %8lu %8lu %8lu",
1910 mfc->mfc_un.res.pkt,
1911 mfc->mfc_un.res.bytes,
1912 mfc->mfc_un.res.wrong_if);
132adf54
SH
1913 for (n = mfc->mfc_un.res.minvif;
1914 n < mfc->mfc_un.res.maxvif; n++ ) {
f6bb4514 1915 if (VIF_EXISTS(net, n) &&
cf958ae3
BT
1916 mfc->mfc_un.res.ttls[n] < 255)
1917 seq_printf(seq,
e905a9ed 1918 " %2d:%-3d",
1da177e4
LT
1919 n, mfc->mfc_un.res.ttls[n]);
1920 }
1ea472e2
BT
1921 } else {
1922 /* unresolved mfc_caches don't contain
1923 * pkt, bytes and wrong_if values
1924 */
1925 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1926 }
1927 seq_putc(seq, '\n');
1928 }
1929 return 0;
1930}
1931
f690808e 1932static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1933 .start = ipmr_mfc_seq_start,
1934 .next = ipmr_mfc_seq_next,
1935 .stop = ipmr_mfc_seq_stop,
1936 .show = ipmr_mfc_seq_show,
1937};
1938
1939static int ipmr_mfc_open(struct inode *inode, struct file *file)
1940{
f6bb4514
BT
1941 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1942 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1943}
1944
9a32144e 1945static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1946 .owner = THIS_MODULE,
1947 .open = ipmr_mfc_open,
1948 .read = seq_read,
1949 .llseek = seq_lseek,
f6bb4514 1950 .release = seq_release_net,
1da177e4 1951};
e905a9ed 1952#endif
1da177e4
LT
1953
1954#ifdef CONFIG_IP_PIMSM_V2
1955static struct net_protocol pim_protocol = {
1956 .handler = pim_rcv,
1957};
1958#endif
1959
1960
1961/*
1962 * Setup for IP multicast routing
1963 */
cf958ae3
BT
1964static int __net_init ipmr_net_init(struct net *net)
1965{
1966 int err = 0;
1967
1968 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1969 GFP_KERNEL);
1970 if (!net->ipv4.vif_table) {
1971 err = -ENOMEM;
1972 goto fail;
1973 }
2bb8b26c
BT
1974
1975 /* Forwarding cache */
1976 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1977 sizeof(struct mfc_cache *),
1978 GFP_KERNEL);
1979 if (!net->ipv4.mfc_cache_array) {
1980 err = -ENOMEM;
1981 goto fail_mfc_cache;
1982 }
6c5143db
BT
1983
1984#ifdef CONFIG_IP_PIMSM
1985 net->ipv4.mroute_reg_vif_num = -1;
1986#endif
f6bb4514
BT
1987
1988#ifdef CONFIG_PROC_FS
1989 err = -ENOMEM;
1990 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1991 goto proc_vif_fail;
1992 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1993 goto proc_cache_fail;
1994#endif
2bb8b26c
BT
1995 return 0;
1996
f6bb4514
BT
1997#ifdef CONFIG_PROC_FS
1998proc_cache_fail:
1999 proc_net_remove(net, "ip_mr_vif");
2000proc_vif_fail:
2001 kfree(net->ipv4.mfc_cache_array);
2002#endif
2bb8b26c
BT
2003fail_mfc_cache:
2004 kfree(net->ipv4.vif_table);
cf958ae3
BT
2005fail:
2006 return err;
2007}
2008
2009static void __net_exit ipmr_net_exit(struct net *net)
2010{
f6bb4514
BT
2011#ifdef CONFIG_PROC_FS
2012 proc_net_remove(net, "ip_mr_cache");
2013 proc_net_remove(net, "ip_mr_vif");
2014#endif
2bb8b26c 2015 kfree(net->ipv4.mfc_cache_array);
cf958ae3
BT
2016 kfree(net->ipv4.vif_table);
2017}
2018
2019static struct pernet_operations ipmr_net_ops = {
2020 .init = ipmr_net_init,
2021 .exit = ipmr_net_exit,
2022};
e905a9ed 2023
03d2f897 2024int __init ip_mr_init(void)
1da177e4 2025{
03d2f897
WC
2026 int err;
2027
1da177e4
LT
2028 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2029 sizeof(struct mfc_cache),
e5d679f3 2030 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2031 NULL);
03d2f897
WC
2032 if (!mrt_cachep)
2033 return -ENOMEM;
2034
cf958ae3
BT
2035 err = register_pernet_subsys(&ipmr_net_ops);
2036 if (err)
2037 goto reg_pernet_fail;
2038
b24b8a24 2039 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
03d2f897
WC
2040 err = register_netdevice_notifier(&ip_mr_notifier);
2041 if (err)
2042 goto reg_notif_fail;
03d2f897 2043 return 0;
f6bb4514 2044
c3e38896
BT
2045reg_notif_fail:
2046 del_timer(&ipmr_expire_timer);
cf958ae3
BT
2047 unregister_pernet_subsys(&ipmr_net_ops);
2048reg_pernet_fail:
c3e38896 2049 kmem_cache_destroy(mrt_cachep);
03d2f897 2050 return err;
1da177e4 2051}