]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
net: fix problem in reading sock TX queue
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
f77f13e2 25 * Relax this requirement to work with older peers.
1da177e4
LT
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
f0ad0860 66#include <net/fib_rules.h>
1da177e4
LT
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
0c12295a 72struct mr_table {
f0ad0860 73 struct list_head list;
8de53dfb
PM
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
f0ad0860 77 u32 id;
0c12295a
PM
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
f0ad0860
PM
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
1da177e4
LT
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
0c12295a 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
1da177e4
LT
111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
e18b890b 123static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 124
f0ad0860 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
0c12295a
PM
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 130 struct sk_buff *pkt, vifi_t vifi, int assert);
cb6a4e46
PM
131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
f0ad0860
PM
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
1da177e4 169
f0ad0860
PM
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
3d0c9c4e 219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
25239cee 220 .family = RTNL_FAMILY_IPMR,
f0ad0860
PM
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
035320d5
ED
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
f0ad0860 272 kfree(mrt);
035320d5 273 }
f0ad0860
PM
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
275}
276#else
277#define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
279
280static struct mr_table *ipmr_get_table(struct net *net, u32 id)
281{
282 return net->ipv4.mrt;
283}
284
285static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
287{
288 *mrt = net->ipv4.mrt;
289 return 0;
290}
291
292static int __net_init ipmr_rules_init(struct net *net)
293{
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
296}
297
298static void __net_exit ipmr_rules_exit(struct net *net)
299{
300 kfree(net->ipv4.mrt);
301}
302#endif
303
304static struct mr_table *ipmr_new_table(struct net *net, u32 id)
305{
306 struct mr_table *mrt;
307 unsigned int i;
1da177e4 308
f0ad0860
PM
309 mrt = ipmr_get_table(net, id);
310 if (mrt != NULL)
311 return mrt;
312
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 if (mrt == NULL)
315 return NULL;
8de53dfb 316 write_pnet(&mrt->net, net);
f0ad0860
PM
317 mrt->id = id;
318
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
322
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
324
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 (unsigned long)mrt);
327
328#ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
330#endif
331#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333#endif
334 return mrt;
335}
1da177e4
LT
336
337/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
338
d607032d
WC
339static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
340{
4feb88e5
BT
341 struct net *net = dev_net(dev);
342
d607032d
WC
343 dev_close(dev);
344
4feb88e5 345 dev = __dev_get_by_name(net, "tunl0");
d607032d 346 if (dev) {
5bc3eb7e 347 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 348 struct ifreq ifr;
d607032d
WC
349 struct ip_tunnel_parm p;
350
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354 p.iph.version = 4;
355 p.iph.ihl = 5;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
359
5bc3eb7e
SH
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
362
363 set_fs(KERNEL_DS);
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 set_fs(oldfs);
366 }
d607032d
WC
367 }
368}
369
1da177e4 370static
4feb88e5 371struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
372{
373 struct net_device *dev;
374
4feb88e5 375 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
376
377 if (dev) {
5bc3eb7e 378 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
379 int err;
380 struct ifreq ifr;
1da177e4
LT
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
383
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387 p.iph.version = 4;
388 p.iph.ihl = 5;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 392
5bc3eb7e
SH
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
395
396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs);
399 } else
400 err = -EOPNOTSUPP;
1da177e4
LT
401
402 dev = NULL;
403
4feb88e5
BT
404 if (err == 0 &&
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
406 dev->flags |= IFF_MULTICAST;
407
e5ed6399 408 in_dev = __in_dev_get_rtnl(dev);
71e27da9 409 if (in_dev == NULL)
1da177e4 410 goto failure;
71e27da9
HX
411
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
414
415 if (dev_open(dev))
416 goto failure;
7dc00c82 417 dev_hold(dev);
1da177e4
LT
418 }
419 }
420 return dev;
421
422failure:
423 /* allow the register to be completed before unregistering. */
424 rtnl_unlock();
425 rtnl_lock();
426
427 unregister_netdevice(dev);
428 return NULL;
429}
430
431#ifdef CONFIG_IP_PIMSM
432
6fef4c0c 433static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 434{
4feb88e5 435 struct net *net = dev_net(dev);
f0ad0860
PM
436 struct mr_table *mrt;
437 struct flowi fl = {
438 .oif = dev->ifindex,
439 .iif = skb->skb_iif,
440 .mark = skb->mark,
441 };
442 int err;
443
444 err = ipmr_fib_lookup(net, &fl, &mrt);
445 if (err < 0)
446 return err;
4feb88e5 447
1da177e4 448 read_lock(&mrt_lock);
cf3677ae
PE
449 dev->stats.tx_bytes += skb->len;
450 dev->stats.tx_packets++;
0c12295a 451 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
452 read_unlock(&mrt_lock);
453 kfree_skb(skb);
6ed10654 454 return NETDEV_TX_OK;
1da177e4
LT
455}
456
007c3838
SH
457static const struct net_device_ops reg_vif_netdev_ops = {
458 .ndo_start_xmit = reg_vif_xmit,
459};
460
1da177e4
LT
461static void reg_vif_setup(struct net_device *dev)
462{
463 dev->type = ARPHRD_PIMREG;
46f25dff 464 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 465 dev->flags = IFF_NOARP;
007c3838 466 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 467 dev->destructor = free_netdev;
403dbb97 468 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
469}
470
f0ad0860 471static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
1da177e4
LT
472{
473 struct net_device *dev;
474 struct in_device *in_dev;
f0ad0860 475 char name[IFNAMSIZ];
1da177e4 476
f0ad0860
PM
477 if (mrt->id == RT_TABLE_DEFAULT)
478 sprintf(name, "pimreg");
479 else
480 sprintf(name, "pimreg%u", mrt->id);
1da177e4 481
f0ad0860 482 dev = alloc_netdev(0, name, reg_vif_setup);
1da177e4
LT
483
484 if (dev == NULL)
485 return NULL;
486
403dbb97
TG
487 dev_net_set(dev, net);
488
1da177e4
LT
489 if (register_netdevice(dev)) {
490 free_netdev(dev);
491 return NULL;
492 }
493 dev->iflink = 0;
494
71e27da9
HX
495 rcu_read_lock();
496 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
497 rcu_read_unlock();
1da177e4 498 goto failure;
71e27da9 499 }
1da177e4 500
71e27da9
HX
501 ipv4_devconf_setall(in_dev);
502 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
503 rcu_read_unlock();
1da177e4
LT
504
505 if (dev_open(dev))
506 goto failure;
507
7dc00c82
WC
508 dev_hold(dev);
509
1da177e4
LT
510 return dev;
511
512failure:
513 /* allow the register to be completed before unregistering. */
514 rtnl_unlock();
515 rtnl_lock();
516
517 unregister_netdevice(dev);
518 return NULL;
519}
520#endif
521
522/*
523 * Delete a VIF entry
7dc00c82 524 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 525 */
e905a9ed 526
0c12295a 527static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 528 struct list_head *head)
1da177e4
LT
529{
530 struct vif_device *v;
531 struct net_device *dev;
532 struct in_device *in_dev;
533
0c12295a 534 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
535 return -EADDRNOTAVAIL;
536
0c12295a 537 v = &mrt->vif_table[vifi];
1da177e4
LT
538
539 write_lock_bh(&mrt_lock);
540 dev = v->dev;
541 v->dev = NULL;
542
543 if (!dev) {
544 write_unlock_bh(&mrt_lock);
545 return -EADDRNOTAVAIL;
546 }
547
548#ifdef CONFIG_IP_PIMSM
0c12295a
PM
549 if (vifi == mrt->mroute_reg_vif_num)
550 mrt->mroute_reg_vif_num = -1;
1da177e4
LT
551#endif
552
0c12295a 553 if (vifi+1 == mrt->maxvif) {
1da177e4
LT
554 int tmp;
555 for (tmp=vifi-1; tmp>=0; tmp--) {
0c12295a 556 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
557 break;
558 }
0c12295a 559 mrt->maxvif = tmp+1;
1da177e4
LT
560 }
561
562 write_unlock_bh(&mrt_lock);
563
564 dev_set_allmulti(dev, -1);
565
e5ed6399 566 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 567 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
568 ip_rt_multicast_event(in_dev);
569 }
570
7dc00c82 571 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 572 unregister_netdevice_queue(dev, head);
1da177e4
LT
573
574 dev_put(dev);
575 return 0;
576}
577
5c0a66f5
BT
578static inline void ipmr_cache_free(struct mfc_cache *c)
579{
5c0a66f5
BT
580 kmem_cache_free(mrt_cachep, c);
581}
582
1da177e4
LT
583/* Destroy an unresolved cache entry, killing queued skbs
584 and reporting error to netlink readers.
585 */
586
0c12295a 587static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 588{
8de53dfb 589 struct net *net = read_pnet(&mrt->net);
1da177e4 590 struct sk_buff *skb;
9ef1d4c7 591 struct nlmsgerr *e;
1da177e4 592
0c12295a 593 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 594
c354e124 595 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 596 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
597 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
598 nlh->nlmsg_type = NLMSG_ERROR;
599 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
600 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
601 e = NLMSG_DATA(nlh);
602 e->error = -ETIMEDOUT;
603 memset(&e->msg, 0, sizeof(e->msg));
2942e900 604
4feb88e5 605 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
606 } else
607 kfree_skb(skb);
608 }
609
5c0a66f5 610 ipmr_cache_free(c);
1da177e4
LT
611}
612
613
e258beb2 614/* Timer process for the unresolved queue. */
1da177e4 615
e258beb2 616static void ipmr_expire_process(unsigned long arg)
1da177e4 617{
0c12295a 618 struct mr_table *mrt = (struct mr_table *)arg;
1da177e4
LT
619 unsigned long now;
620 unsigned long expires;
862465f2 621 struct mfc_cache *c, *next;
1da177e4
LT
622
623 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 624 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
625 return;
626 }
627
0c12295a 628 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
629 goto out;
630
631 now = jiffies;
632 expires = 10*HZ;
1da177e4 633
0c12295a 634 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
635 if (time_after(c->mfc_un.unres.expires, now)) {
636 unsigned long interval = c->mfc_un.unres.expires - now;
637 if (interval < expires)
638 expires = interval;
1da177e4
LT
639 continue;
640 }
641
862465f2 642 list_del(&c->list);
0c12295a 643 ipmr_destroy_unres(mrt, c);
1da177e4
LT
644 }
645
0c12295a
PM
646 if (!list_empty(&mrt->mfc_unres_queue))
647 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
648
649out:
650 spin_unlock(&mfc_unres_lock);
651}
652
653/* Fill oifs list. It is called under write locked mrt_lock. */
654
0c12295a 655static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 656 unsigned char *ttls)
1da177e4
LT
657{
658 int vifi;
659
660 cache->mfc_un.res.minvif = MAXVIFS;
661 cache->mfc_un.res.maxvif = 0;
662 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
663
0c12295a
PM
664 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
665 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 666 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
667 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
668 if (cache->mfc_un.res.minvif > vifi)
669 cache->mfc_un.res.minvif = vifi;
670 if (cache->mfc_un.res.maxvif <= vifi)
671 cache->mfc_un.res.maxvif = vifi + 1;
672 }
673 }
674}
675
0c12295a
PM
676static int vif_add(struct net *net, struct mr_table *mrt,
677 struct vifctl *vifc, int mrtsock)
1da177e4
LT
678{
679 int vifi = vifc->vifc_vifi;
0c12295a 680 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
681 struct net_device *dev;
682 struct in_device *in_dev;
d607032d 683 int err;
1da177e4
LT
684
685 /* Is vif busy ? */
0c12295a 686 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
687 return -EADDRINUSE;
688
689 switch (vifc->vifc_flags) {
690#ifdef CONFIG_IP_PIMSM
691 case VIFF_REGISTER:
692 /*
693 * Special Purpose VIF in PIM
694 * All the packets will be sent to the daemon
695 */
0c12295a 696 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 697 return -EADDRINUSE;
f0ad0860 698 dev = ipmr_reg_vif(net, mrt);
1da177e4
LT
699 if (!dev)
700 return -ENOBUFS;
d607032d
WC
701 err = dev_set_allmulti(dev, 1);
702 if (err) {
703 unregister_netdevice(dev);
7dc00c82 704 dev_put(dev);
d607032d
WC
705 return err;
706 }
1da177e4
LT
707 break;
708#endif
e905a9ed 709 case VIFF_TUNNEL:
4feb88e5 710 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
711 if (!dev)
712 return -ENOBUFS;
d607032d
WC
713 err = dev_set_allmulti(dev, 1);
714 if (err) {
715 ipmr_del_tunnel(dev, vifc);
7dc00c82 716 dev_put(dev);
d607032d
WC
717 return err;
718 }
1da177e4 719 break;
ee5e81f0
I
720
721 case VIFF_USE_IFINDEX:
1da177e4 722 case 0:
ee5e81f0
I
723 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
724 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
725 if (dev && dev->ip_ptr == NULL) {
726 dev_put(dev);
727 return -EADDRNOTAVAIL;
728 }
729 } else
730 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
731
1da177e4
LT
732 if (!dev)
733 return -EADDRNOTAVAIL;
d607032d 734 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
735 if (err) {
736 dev_put(dev);
d607032d 737 return err;
7dc00c82 738 }
1da177e4
LT
739 break;
740 default:
741 return -EINVAL;
742 }
743
d0490cfd
DC
744 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
745 dev_put(dev);
1da177e4 746 return -EADDRNOTAVAIL;
d0490cfd 747 }
42f811b8 748 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
749 ip_rt_multicast_event(in_dev);
750
751 /*
752 * Fill in the VIF structures
753 */
c354e124
JK
754 v->rate_limit = vifc->vifc_rate_limit;
755 v->local = vifc->vifc_lcl_addr.s_addr;
756 v->remote = vifc->vifc_rmt_addr.s_addr;
757 v->flags = vifc->vifc_flags;
1da177e4
LT
758 if (!mrtsock)
759 v->flags |= VIFF_STATIC;
c354e124 760 v->threshold = vifc->vifc_threshold;
1da177e4
LT
761 v->bytes_in = 0;
762 v->bytes_out = 0;
763 v->pkt_in = 0;
764 v->pkt_out = 0;
765 v->link = dev->ifindex;
766 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
767 v->link = dev->iflink;
768
769 /* And finish update writing critical data */
770 write_lock_bh(&mrt_lock);
c354e124 771 v->dev = dev;
1da177e4
LT
772#ifdef CONFIG_IP_PIMSM
773 if (v->flags&VIFF_REGISTER)
0c12295a 774 mrt->mroute_reg_vif_num = vifi;
1da177e4 775#endif
0c12295a
PM
776 if (vifi+1 > mrt->maxvif)
777 mrt->maxvif = vifi+1;
1da177e4
LT
778 write_unlock_bh(&mrt_lock);
779 return 0;
780}
781
0c12295a 782static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
783 __be32 origin,
784 __be32 mcastgrp)
1da177e4 785{
c354e124 786 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
787 struct mfc_cache *c;
788
0c12295a 789 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
862465f2
PM
790 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
791 return c;
1da177e4 792 }
862465f2 793 return NULL;
1da177e4
LT
794}
795
796/*
797 * Allocate a multicast cache entry
798 */
d658f8a0 799static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 800{
c354e124
JK
801 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
802 if (c == NULL)
1da177e4 803 return NULL;
1da177e4
LT
804 c->mfc_un.res.minvif = MAXVIFS;
805 return c;
806}
807
d658f8a0 808static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 809{
c354e124
JK
810 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
811 if (c == NULL)
1da177e4 812 return NULL;
1da177e4
LT
813 skb_queue_head_init(&c->mfc_un.unres.unresolved);
814 c->mfc_un.unres.expires = jiffies + 10*HZ;
815 return c;
816}
817
818/*
819 * A cache entry has gone into a resolved state from queued
820 */
e905a9ed 821
0c12295a
PM
822static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
823 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
824{
825 struct sk_buff *skb;
9ef1d4c7 826 struct nlmsgerr *e;
1da177e4
LT
827
828 /*
829 * Play the pending entries through our router
830 */
831
c354e124 832 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 833 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
834 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
835
cb6a4e46 836 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
837 nlh->nlmsg_len = (skb_tail_pointer(skb) -
838 (u8 *)nlh);
1da177e4
LT
839 } else {
840 nlh->nlmsg_type = NLMSG_ERROR;
841 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
842 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
843 e = NLMSG_DATA(nlh);
844 e->error = -EMSGSIZE;
845 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 846 }
2942e900 847
d658f8a0 848 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 849 } else
0c12295a 850 ip_mr_forward(net, mrt, skb, c, 0);
1da177e4
LT
851 }
852}
853
854/*
855 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
856 * expects the following bizarre scheme.
857 *
858 * Called under mrt_lock.
859 */
e905a9ed 860
0c12295a 861static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 862 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
863{
864 struct sk_buff *skb;
c9bdd4b5 865 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
866 struct igmphdr *igmp;
867 struct igmpmsg *msg;
868 int ret;
869
870#ifdef CONFIG_IP_PIMSM
871 if (assert == IGMPMSG_WHOLEPKT)
872 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
873 else
874#endif
875 skb = alloc_skb(128, GFP_ATOMIC);
876
132adf54 877 if (!skb)
1da177e4
LT
878 return -ENOBUFS;
879
880#ifdef CONFIG_IP_PIMSM
881 if (assert == IGMPMSG_WHOLEPKT) {
882 /* Ugly, but we have no choice with this interface.
883 Duplicate old header, fix ihl, length etc.
884 And all this only to mangle msg->im_msgtype and
885 to set msg->im_mbz to "mbz" :-)
886 */
878c8145
ACM
887 skb_push(skb, sizeof(struct iphdr));
888 skb_reset_network_header(skb);
badff6d0 889 skb_reset_transport_header(skb);
0272ffc4 890 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 891 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
892 msg->im_msgtype = IGMPMSG_WHOLEPKT;
893 msg->im_mbz = 0;
0c12295a 894 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
895 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
896 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
897 sizeof(struct iphdr));
e905a9ed 898 } else
1da177e4 899#endif
e905a9ed
YH
900 {
901
1da177e4
LT
902 /*
903 * Copy the IP header
904 */
905
27a884dc 906 skb->network_header = skb->tail;
ddc7b8e3 907 skb_put(skb, ihl);
27d7ff46 908 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
909 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
910 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 911 msg->im_vif = vifi;
adf30907 912 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
913
914 /*
915 * Add our header
916 */
917
c354e124 918 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
919 igmp->type =
920 msg->im_msgtype = assert;
921 igmp->code = 0;
eddc9ec5 922 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 923 skb->transport_header = skb->network_header;
e905a9ed 924 }
1da177e4 925
0c12295a 926 if (mrt->mroute_sk == NULL) {
1da177e4
LT
927 kfree_skb(skb);
928 return -EINVAL;
929 }
930
931 /*
932 * Deliver to mrouted
933 */
0c12295a 934 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
70a269e6 935 if (ret < 0) {
1da177e4
LT
936 if (net_ratelimit())
937 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
938 kfree_skb(skb);
939 }
940
941 return ret;
942}
943
944/*
945 * Queue a packet for resolution. It gets locked cache entry!
946 */
e905a9ed 947
1da177e4 948static int
0c12295a 949ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1da177e4 950{
862465f2 951 bool found = false;
1da177e4
LT
952 int err;
953 struct mfc_cache *c;
eddc9ec5 954 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
955
956 spin_lock_bh(&mfc_unres_lock);
0c12295a 957 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 958 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
959 c->mfc_origin == iph->saddr) {
960 found = true;
1da177e4 961 break;
862465f2 962 }
1da177e4
LT
963 }
964
862465f2 965 if (!found) {
1da177e4
LT
966 /*
967 * Create a new entry if allowable
968 */
969
0c12295a 970 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 971 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
972 spin_unlock_bh(&mfc_unres_lock);
973
974 kfree_skb(skb);
975 return -ENOBUFS;
976 }
977
978 /*
979 * Fill in the new cache entry
980 */
eddc9ec5
ACM
981 c->mfc_parent = -1;
982 c->mfc_origin = iph->saddr;
983 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
984
985 /*
986 * Reflect first query at mrouted.
987 */
0c12295a 988 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 989 if (err < 0) {
e905a9ed 990 /* If the report failed throw the cache entry
1da177e4
LT
991 out - Brad Parker
992 */
993 spin_unlock_bh(&mfc_unres_lock);
994
5c0a66f5 995 ipmr_cache_free(c);
1da177e4
LT
996 kfree_skb(skb);
997 return err;
998 }
999
0c12295a
PM
1000 atomic_inc(&mrt->cache_resolve_queue_len);
1001 list_add(&c->list, &mrt->mfc_unres_queue);
1da177e4 1002
278554bd
DM
1003 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1004 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
1005 }
1006
1007 /*
1008 * See if we can append the packet
1009 */
1010 if (c->mfc_un.unres.unresolved.qlen>3) {
1011 kfree_skb(skb);
1012 err = -ENOBUFS;
1013 } else {
c354e124 1014 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
1015 err = 0;
1016 }
1017
1018 spin_unlock_bh(&mfc_unres_lock);
1019 return err;
1020}
1021
1022/*
1023 * MFC cache manipulation by user space mroute daemon
1024 */
1025
0c12295a 1026static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1da177e4
LT
1027{
1028 int line;
862465f2 1029 struct mfc_cache *c, *next;
1da177e4 1030
c354e124 1031 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1032
0c12295a 1033 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1da177e4
LT
1034 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1035 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1036 write_lock_bh(&mrt_lock);
862465f2 1037 list_del(&c->list);
1da177e4
LT
1038 write_unlock_bh(&mrt_lock);
1039
5c0a66f5 1040 ipmr_cache_free(c);
1da177e4
LT
1041 return 0;
1042 }
1043 }
1044 return -ENOENT;
1045}
1046
0c12295a
PM
1047static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1048 struct mfcctl *mfc, int mrtsock)
1da177e4 1049{
862465f2 1050 bool found = false;
1da177e4 1051 int line;
862465f2 1052 struct mfc_cache *uc, *c;
1da177e4 1053
a50436f2
PM
1054 if (mfc->mfcc_parent >= MAXVIFS)
1055 return -ENFILE;
1056
c354e124 1057 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1058
0c12295a 1059 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1da177e4 1060 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
1061 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1062 found = true;
1da177e4 1063 break;
862465f2 1064 }
1da177e4
LT
1065 }
1066
862465f2 1067 if (found) {
1da177e4
LT
1068 write_lock_bh(&mrt_lock);
1069 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1070 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1071 if (!mrtsock)
1072 c->mfc_flags |= MFC_STATIC;
1073 write_unlock_bh(&mrt_lock);
1074 return 0;
1075 }
1076
f97c1e0c 1077 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
1078 return -EINVAL;
1079
d658f8a0 1080 c = ipmr_cache_alloc();
c354e124 1081 if (c == NULL)
1da177e4
LT
1082 return -ENOMEM;
1083
c354e124
JK
1084 c->mfc_origin = mfc->mfcc_origin.s_addr;
1085 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1086 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1087 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1088 if (!mrtsock)
1089 c->mfc_flags |= MFC_STATIC;
1090
1091 write_lock_bh(&mrt_lock);
0c12295a 1092 list_add(&c->list, &mrt->mfc_cache_array[line]);
1da177e4
LT
1093 write_unlock_bh(&mrt_lock);
1094
1095 /*
1096 * Check to see if we resolved a queued list. If so we
1097 * need to send on the frames and tidy up.
1098 */
b0ebb739 1099 found = false;
1da177e4 1100 spin_lock_bh(&mfc_unres_lock);
0c12295a 1101 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 1102 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 1103 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 1104 list_del(&uc->list);
0c12295a 1105 atomic_dec(&mrt->cache_resolve_queue_len);
b0ebb739 1106 found = true;
1da177e4
LT
1107 break;
1108 }
1109 }
0c12295a
PM
1110 if (list_empty(&mrt->mfc_unres_queue))
1111 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
1112 spin_unlock_bh(&mfc_unres_lock);
1113
b0ebb739 1114 if (found) {
0c12295a 1115 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 1116 ipmr_cache_free(uc);
1da177e4
LT
1117 }
1118 return 0;
1119}
1120
1121/*
1122 * Close the multicast socket, and clear the vif tables etc
1123 */
e905a9ed 1124
0c12295a 1125static void mroute_clean_tables(struct mr_table *mrt)
1da177e4
LT
1126{
1127 int i;
d17fa6fa 1128 LIST_HEAD(list);
862465f2 1129 struct mfc_cache *c, *next;
e905a9ed 1130
1da177e4
LT
1131 /*
1132 * Shut down all active vif entries
1133 */
0c12295a
PM
1134 for (i = 0; i < mrt->maxvif; i++) {
1135 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1136 vif_delete(mrt, i, 0, &list);
1da177e4 1137 }
d17fa6fa 1138 unregister_netdevice_many(&list);
1da177e4
LT
1139
1140 /*
1141 * Wipe the cache
1142 */
862465f2 1143 for (i = 0; i < MFC_LINES; i++) {
0c12295a 1144 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
862465f2 1145 if (c->mfc_flags&MFC_STATIC)
1da177e4 1146 continue;
1da177e4 1147 write_lock_bh(&mrt_lock);
862465f2 1148 list_del(&c->list);
1da177e4
LT
1149 write_unlock_bh(&mrt_lock);
1150
5c0a66f5 1151 ipmr_cache_free(c);
1da177e4
LT
1152 }
1153 }
1154
0c12295a 1155 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 1156 spin_lock_bh(&mfc_unres_lock);
0c12295a 1157 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
862465f2 1158 list_del(&c->list);
0c12295a 1159 ipmr_destroy_unres(mrt, c);
1da177e4
LT
1160 }
1161 spin_unlock_bh(&mfc_unres_lock);
1162 }
1163}
1164
1165static void mrtsock_destruct(struct sock *sk)
1166{
4feb88e5 1167 struct net *net = sock_net(sk);
f0ad0860 1168 struct mr_table *mrt;
4feb88e5 1169
1da177e4 1170 rtnl_lock();
f0ad0860
PM
1171 ipmr_for_each_table(mrt, net) {
1172 if (sk == mrt->mroute_sk) {
1173 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4 1174
f0ad0860
PM
1175 write_lock_bh(&mrt_lock);
1176 mrt->mroute_sk = NULL;
1177 write_unlock_bh(&mrt_lock);
1da177e4 1178
f0ad0860
PM
1179 mroute_clean_tables(mrt);
1180 }
1da177e4
LT
1181 }
1182 rtnl_unlock();
1183}
1184
1185/*
1186 * Socket options and virtual interface manipulation. The whole
1187 * virtual interface system is a complete heap, but unfortunately
1188 * that's how BSD mrouted happens to think. Maybe one day with a proper
1189 * MOSPF/PIM router set up we can clean this up.
1190 */
e905a9ed 1191
b7058842 1192int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1193{
1194 int ret;
1195 struct vifctl vif;
1196 struct mfcctl mfc;
4feb88e5 1197 struct net *net = sock_net(sk);
f0ad0860
PM
1198 struct mr_table *mrt;
1199
1200 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1201 if (mrt == NULL)
1202 return -ENOENT;
e905a9ed 1203
132adf54 1204 if (optname != MRT_INIT) {
0c12295a 1205 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
1206 return -EACCES;
1207 }
1208
132adf54
SH
1209 switch (optname) {
1210 case MRT_INIT:
1211 if (sk->sk_type != SOCK_RAW ||
c720c7e8 1212 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 1213 return -EOPNOTSUPP;
c354e124 1214 if (optlen != sizeof(int))
132adf54 1215 return -ENOPROTOOPT;
1da177e4 1216
132adf54 1217 rtnl_lock();
0c12295a 1218 if (mrt->mroute_sk) {
1da177e4 1219 rtnl_unlock();
132adf54
SH
1220 return -EADDRINUSE;
1221 }
1222
1223 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1224 if (ret == 0) {
1225 write_lock_bh(&mrt_lock);
0c12295a 1226 mrt->mroute_sk = sk;
132adf54
SH
1227 write_unlock_bh(&mrt_lock);
1228
4feb88e5 1229 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
1230 }
1231 rtnl_unlock();
1232 return ret;
1233 case MRT_DONE:
0c12295a 1234 if (sk != mrt->mroute_sk)
132adf54
SH
1235 return -EACCES;
1236 return ip_ra_control(sk, 0, NULL);
1237 case MRT_ADD_VIF:
1238 case MRT_DEL_VIF:
c354e124 1239 if (optlen != sizeof(vif))
132adf54 1240 return -EINVAL;
c354e124 1241 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
1242 return -EFAULT;
1243 if (vif.vifc_vifi >= MAXVIFS)
1244 return -ENFILE;
1245 rtnl_lock();
c354e124 1246 if (optname == MRT_ADD_VIF) {
0c12295a 1247 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
132adf54 1248 } else {
0c12295a 1249 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54
SH
1250 }
1251 rtnl_unlock();
1252 return ret;
1da177e4
LT
1253
1254 /*
1255 * Manipulate the forwarding caches. These live
1256 * in a sort of kernel/user symbiosis.
1257 */
132adf54
SH
1258 case MRT_ADD_MFC:
1259 case MRT_DEL_MFC:
c354e124 1260 if (optlen != sizeof(mfc))
132adf54 1261 return -EINVAL;
c354e124 1262 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1263 return -EFAULT;
1264 rtnl_lock();
c354e124 1265 if (optname == MRT_DEL_MFC)
0c12295a 1266 ret = ipmr_mfc_delete(mrt, &mfc);
132adf54 1267 else
0c12295a 1268 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
132adf54
SH
1269 rtnl_unlock();
1270 return ret;
1da177e4
LT
1271 /*
1272 * Control PIM assert.
1273 */
132adf54
SH
1274 case MRT_ASSERT:
1275 {
1276 int v;
1277 if (get_user(v,(int __user *)optval))
1278 return -EFAULT;
0c12295a 1279 mrt->mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1280 return 0;
1281 }
1da177e4 1282#ifdef CONFIG_IP_PIMSM
132adf54
SH
1283 case MRT_PIM:
1284 {
ba93ef74
SH
1285 int v;
1286
132adf54
SH
1287 if (get_user(v,(int __user *)optval))
1288 return -EFAULT;
ba93ef74
SH
1289 v = (v) ? 1 : 0;
1290
132adf54
SH
1291 rtnl_lock();
1292 ret = 0;
0c12295a
PM
1293 if (v != mrt->mroute_do_pim) {
1294 mrt->mroute_do_pim = v;
1295 mrt->mroute_do_assert = v;
1da177e4 1296 }
132adf54
SH
1297 rtnl_unlock();
1298 return ret;
1299 }
f0ad0860
PM
1300#endif
1301#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1302 case MRT_TABLE:
1303 {
1304 u32 v;
1305
1306 if (optlen != sizeof(u32))
1307 return -EINVAL;
1308 if (get_user(v, (u32 __user *)optval))
1309 return -EFAULT;
1310 if (sk == mrt->mroute_sk)
1311 return -EBUSY;
1312
1313 rtnl_lock();
1314 ret = 0;
1315 if (!ipmr_new_table(net, v))
1316 ret = -ENOMEM;
1317 raw_sk(sk)->ipmr_table = v;
1318 rtnl_unlock();
1319 return ret;
1320 }
1da177e4 1321#endif
132adf54
SH
1322 /*
1323 * Spurious command, or MRT_VERSION which you cannot
1324 * set.
1325 */
1326 default:
1327 return -ENOPROTOOPT;
1da177e4
LT
1328 }
1329}
1330
1331/*
1332 * Getsock opt support for the multicast routing system.
1333 */
e905a9ed 1334
c354e124 1335int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1336{
1337 int olr;
1338 int val;
4feb88e5 1339 struct net *net = sock_net(sk);
f0ad0860
PM
1340 struct mr_table *mrt;
1341
1342 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1343 if (mrt == NULL)
1344 return -ENOENT;
1da177e4 1345
c354e124 1346 if (optname != MRT_VERSION &&
1da177e4
LT
1347#ifdef CONFIG_IP_PIMSM
1348 optname!=MRT_PIM &&
1349#endif
1350 optname!=MRT_ASSERT)
1351 return -ENOPROTOOPT;
1352
1353 if (get_user(olr, optlen))
1354 return -EFAULT;
1355
1356 olr = min_t(unsigned int, olr, sizeof(int));
1357 if (olr < 0)
1358 return -EINVAL;
e905a9ed 1359
c354e124 1360 if (put_user(olr, optlen))
1da177e4 1361 return -EFAULT;
c354e124
JK
1362 if (optname == MRT_VERSION)
1363 val = 0x0305;
1da177e4 1364#ifdef CONFIG_IP_PIMSM
c354e124 1365 else if (optname == MRT_PIM)
0c12295a 1366 val = mrt->mroute_do_pim;
1da177e4
LT
1367#endif
1368 else
0c12295a 1369 val = mrt->mroute_do_assert;
c354e124 1370 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1371 return -EFAULT;
1372 return 0;
1373}
1374
1375/*
1376 * The IP multicast ioctl support routines.
1377 */
e905a9ed 1378
1da177e4
LT
1379int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1380{
1381 struct sioc_sg_req sr;
1382 struct sioc_vif_req vr;
1383 struct vif_device *vif;
1384 struct mfc_cache *c;
4feb88e5 1385 struct net *net = sock_net(sk);
f0ad0860
PM
1386 struct mr_table *mrt;
1387
1388 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1389 if (mrt == NULL)
1390 return -ENOENT;
e905a9ed 1391
132adf54
SH
1392 switch (cmd) {
1393 case SIOCGETVIFCNT:
c354e124 1394 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1395 return -EFAULT;
0c12295a 1396 if (vr.vifi >= mrt->maxvif)
132adf54
SH
1397 return -EINVAL;
1398 read_lock(&mrt_lock);
0c12295a
PM
1399 vif = &mrt->vif_table[vr.vifi];
1400 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1401 vr.icount = vif->pkt_in;
1402 vr.ocount = vif->pkt_out;
1403 vr.ibytes = vif->bytes_in;
1404 vr.obytes = vif->bytes_out;
1da177e4 1405 read_unlock(&mrt_lock);
1da177e4 1406
c354e124 1407 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1408 return -EFAULT;
1409 return 0;
1410 }
1411 read_unlock(&mrt_lock);
1412 return -EADDRNOTAVAIL;
1413 case SIOCGETSGCNT:
c354e124 1414 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1415 return -EFAULT;
1416
1417 read_lock(&mrt_lock);
0c12295a 1418 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1419 if (c) {
1420 sr.pktcnt = c->mfc_un.res.pkt;
1421 sr.bytecnt = c->mfc_un.res.bytes;
1422 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1423 read_unlock(&mrt_lock);
132adf54 1424
c354e124 1425 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1426 return -EFAULT;
1427 return 0;
1428 }
1429 read_unlock(&mrt_lock);
1430 return -EADDRNOTAVAIL;
1431 default:
1432 return -ENOIOCTLCMD;
1da177e4
LT
1433 }
1434}
1435
1436
1437static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1438{
e9dc8653 1439 struct net_device *dev = ptr;
4feb88e5 1440 struct net *net = dev_net(dev);
f0ad0860 1441 struct mr_table *mrt;
1da177e4
LT
1442 struct vif_device *v;
1443 int ct;
d17fa6fa 1444 LIST_HEAD(list);
e9dc8653 1445
1da177e4
LT
1446 if (event != NETDEV_UNREGISTER)
1447 return NOTIFY_DONE;
f0ad0860
PM
1448
1449 ipmr_for_each_table(mrt, net) {
1450 v = &mrt->vif_table[0];
1451 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1452 if (v->dev == dev)
1453 vif_delete(mrt, ct, 1, &list);
1454 }
1da177e4 1455 }
d17fa6fa 1456 unregister_netdevice_many(&list);
1da177e4
LT
1457 return NOTIFY_DONE;
1458}
1459
1460
c354e124 1461static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1462 .notifier_call = ipmr_device_event,
1463};
1464
1465/*
1466 * Encapsulate a packet by attaching a valid IPIP header to it.
1467 * This avoids tunnel drivers and other mess and gives us the speed so
1468 * important for multicast video.
1469 */
e905a9ed 1470
114c7844 1471static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1472{
8856dfa3 1473 struct iphdr *iph;
eddc9ec5 1474 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1475
1476 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1477 skb->transport_header = skb->network_header;
8856dfa3 1478 skb_reset_network_header(skb);
eddc9ec5 1479 iph = ip_hdr(skb);
1da177e4
LT
1480
1481 iph->version = 4;
e023dd64
ACM
1482 iph->tos = old_iph->tos;
1483 iph->ttl = old_iph->ttl;
1da177e4
LT
1484 iph->frag_off = 0;
1485 iph->daddr = daddr;
1486 iph->saddr = saddr;
1487 iph->protocol = IPPROTO_IPIP;
1488 iph->ihl = 5;
1489 iph->tot_len = htons(skb->len);
adf30907 1490 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1491 ip_send_check(iph);
1492
1da177e4
LT
1493 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1494 nf_reset(skb);
1495}
1496
1497static inline int ipmr_forward_finish(struct sk_buff *skb)
1498{
1499 struct ip_options * opt = &(IPCB(skb)->opt);
1500
adf30907 1501 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1502
1503 if (unlikely(opt->optlen))
1504 ip_forward_options(skb);
1505
1506 return dst_output(skb);
1507}
1508
1509/*
1510 * Processing handlers for ipmr_forward
1511 */
1512
0c12295a
PM
1513static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1514 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1da177e4 1515{
eddc9ec5 1516 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1517 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1518 struct net_device *dev;
1519 struct rtable *rt;
1520 int encap = 0;
1521
1522 if (vif->dev == NULL)
1523 goto out_free;
1524
1525#ifdef CONFIG_IP_PIMSM
1526 if (vif->flags & VIFF_REGISTER) {
1527 vif->pkt_out++;
c354e124 1528 vif->bytes_out += skb->len;
cf3677ae
PE
1529 vif->dev->stats.tx_bytes += skb->len;
1530 vif->dev->stats.tx_packets++;
0c12295a 1531 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1532 goto out_free;
1da177e4
LT
1533 }
1534#endif
1535
1536 if (vif->flags&VIFF_TUNNEL) {
1537 struct flowi fl = { .oif = vif->link,
1538 .nl_u = { .ip4_u =
1539 { .daddr = vif->remote,
1540 .saddr = vif->local,
1541 .tos = RT_TOS(iph->tos) } },
1542 .proto = IPPROTO_IPIP };
4feb88e5 1543 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1544 goto out_free;
1545 encap = sizeof(struct iphdr);
1546 } else {
1547 struct flowi fl = { .oif = vif->link,
1548 .nl_u = { .ip4_u =
1549 { .daddr = iph->daddr,
1550 .tos = RT_TOS(iph->tos) } },
1551 .proto = IPPROTO_IPIP };
4feb88e5 1552 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1553 goto out_free;
1554 }
1555
1556 dev = rt->u.dst.dev;
1557
1558 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1559 /* Do not fragment multicasts. Alas, IPv4 does not
1560 allow to send ICMP, so that packets will disappear
1561 to blackhole.
1562 */
1563
7c73a6fa 1564 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1565 ip_rt_put(rt);
1566 goto out_free;
1567 }
1568
1569 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1570
1571 if (skb_cow(skb, encap)) {
e905a9ed 1572 ip_rt_put(rt);
1da177e4
LT
1573 goto out_free;
1574 }
1575
1576 vif->pkt_out++;
c354e124 1577 vif->bytes_out += skb->len;
1da177e4 1578
adf30907
ED
1579 skb_dst_drop(skb);
1580 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1581 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1582
1583 /* FIXME: forward and output firewalls used to be called here.
1584 * What do we do with netfilter? -- RR */
1585 if (vif->flags & VIFF_TUNNEL) {
1586 ip_encap(skb, vif->local, vif->remote);
1587 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1588 vif->dev->stats.tx_packets++;
1589 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1590 }
1591
1592 IPCB(skb)->flags |= IPSKB_FORWARDED;
1593
1594 /*
1595 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1596 * not only before forwarding, but after forwarding on all output
1597 * interfaces. It is clear, if mrouter runs a multicasting
1598 * program, it should receive packets not depending to what interface
1599 * program is joined.
1600 * If we will not make it, the program will have to join on all
1601 * interfaces. On the other hand, multihoming host (or router, but
1602 * not mrouter) cannot join to more than one interface - it will
1603 * result in receiving multiple packets.
1604 */
9bbc768a 1605 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1606 ipmr_forward_finish);
1607 return;
1608
1609out_free:
1610 kfree_skb(skb);
1da177e4
LT
1611}
1612
0c12295a 1613static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1614{
1615 int ct;
0c12295a
PM
1616
1617 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1618 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
1619 break;
1620 }
1621 return ct;
1622}
1623
1624/* "local" means that we should preserve one skb (for local delivery) */
1625
0c12295a
PM
1626static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1627 struct sk_buff *skb, struct mfc_cache *cache,
1628 int local)
1da177e4
LT
1629{
1630 int psend = -1;
1631 int vif, ct;
1632
1633 vif = cache->mfc_parent;
1634 cache->mfc_un.res.pkt++;
1635 cache->mfc_un.res.bytes += skb->len;
1636
1637 /*
1638 * Wrong interface: drop packet and (maybe) send PIM assert.
1639 */
0c12295a 1640 if (mrt->vif_table[vif].dev != skb->dev) {
1da177e4
LT
1641 int true_vifi;
1642
511c3f92 1643 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1644 /* It is our own packet, looped back.
1645 Very complicated situation...
1646
1647 The best workaround until routing daemons will be
1648 fixed is not to redistribute packet, if it was
1649 send through wrong interface. It means, that
1650 multicast applications WILL NOT work for
1651 (S,G), which have default multicast route pointing
1652 to wrong oif. In any case, it is not a good
1653 idea to use multicasting applications on router.
1654 */
1655 goto dont_forward;
1656 }
1657
1658 cache->mfc_un.res.wrong_if++;
0c12295a 1659 true_vifi = ipmr_find_vif(mrt, skb->dev);
1da177e4 1660
0c12295a 1661 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4
LT
1662 /* pimsm uses asserts, when switching from RPT to SPT,
1663 so that we cannot check that packet arrived on an oif.
1664 It is bad, but otherwise we would need to move pretty
1665 large chunk of pimd to kernel. Ough... --ANK
1666 */
0c12295a 1667 (mrt->mroute_do_pim ||
6f9374a9 1668 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1669 time_after(jiffies,
1da177e4
LT
1670 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1671 cache->mfc_un.res.last_assert = jiffies;
0c12295a 1672 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1673 }
1674 goto dont_forward;
1675 }
1676
0c12295a
PM
1677 mrt->vif_table[vif].pkt_in++;
1678 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1679
1680 /*
1681 * Forward the frame
1682 */
1683 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1684 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1685 if (psend != -1) {
1686 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1687 if (skb2)
0c12295a
PM
1688 ipmr_queue_xmit(net, mrt, skb2, cache,
1689 psend);
1da177e4 1690 }
c354e124 1691 psend = ct;
1da177e4
LT
1692 }
1693 }
1694 if (psend != -1) {
1695 if (local) {
1696 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1697 if (skb2)
0c12295a 1698 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1da177e4 1699 } else {
0c12295a 1700 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1da177e4
LT
1701 return 0;
1702 }
1703 }
1704
1705dont_forward:
1706 if (!local)
1707 kfree_skb(skb);
1708 return 0;
1709}
1710
1711
1712/*
1713 * Multicast packets for forwarding arrive here
1714 */
1715
1716int ip_mr_input(struct sk_buff *skb)
1717{
1718 struct mfc_cache *cache;
4feb88e5 1719 struct net *net = dev_net(skb->dev);
511c3f92 1720 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
f0ad0860
PM
1721 struct mr_table *mrt;
1722 int err;
1da177e4
LT
1723
1724 /* Packet is looped back after forward, it should not be
1725 forwarded second time, but still can be delivered locally.
1726 */
1727 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1728 goto dont_forward;
1729
f0ad0860
PM
1730 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1731 if (err < 0)
1732 return err;
1733
1da177e4
LT
1734 if (!local) {
1735 if (IPCB(skb)->opt.router_alert) {
1736 if (ip_call_ra_chain(skb))
1737 return 0;
eddc9ec5 1738 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1739 /* IGMPv1 (and broken IGMPv2 implementations sort of
1740 Cisco IOS <= 11.2(8)) do not put router alert
1741 option to IGMP packets destined to routable
1742 groups. It is very bad, because it means
1743 that we can forward NO IGMP messages.
1744 */
1745 read_lock(&mrt_lock);
0c12295a 1746 if (mrt->mroute_sk) {
2715bcf9 1747 nf_reset(skb);
0c12295a 1748 raw_rcv(mrt->mroute_sk, skb);
1da177e4
LT
1749 read_unlock(&mrt_lock);
1750 return 0;
1751 }
1752 read_unlock(&mrt_lock);
1753 }
1754 }
1755
1756 read_lock(&mrt_lock);
0c12295a 1757 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1758
1759 /*
1760 * No usable cache entry
1761 */
c354e124 1762 if (cache == NULL) {
1da177e4
LT
1763 int vif;
1764
1765 if (local) {
1766 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1767 ip_local_deliver(skb);
1768 if (skb2 == NULL) {
1769 read_unlock(&mrt_lock);
1770 return -ENOBUFS;
1771 }
1772 skb = skb2;
1773 }
1774
0c12295a 1775 vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 1776 if (vif >= 0) {
0eae88f3 1777 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1da177e4
LT
1778 read_unlock(&mrt_lock);
1779
0eae88f3 1780 return err2;
1da177e4
LT
1781 }
1782 read_unlock(&mrt_lock);
1783 kfree_skb(skb);
1784 return -ENODEV;
1785 }
1786
0c12295a 1787 ip_mr_forward(net, mrt, skb, cache, local);
1da177e4
LT
1788
1789 read_unlock(&mrt_lock);
1790
1791 if (local)
1792 return ip_local_deliver(skb);
1793
1794 return 0;
1795
1796dont_forward:
1797 if (local)
1798 return ip_local_deliver(skb);
1799 kfree_skb(skb);
1800 return 0;
1801}
1802
b1879204 1803#ifdef CONFIG_IP_PIMSM
f0ad0860
PM
1804static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1805 unsigned int pimlen)
1da177e4 1806{
b1879204
IJ
1807 struct net_device *reg_dev = NULL;
1808 struct iphdr *encap;
1da177e4 1809
b1879204 1810 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1811 /*
1812 Check that:
1813 a. packet is really destinted to a multicast group
1814 b. packet is not a NULL-REGISTER
1815 c. packet is not truncated
1816 */
f97c1e0c 1817 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1818 encap->tot_len == 0 ||
b1879204
IJ
1819 ntohs(encap->tot_len) + pimlen > skb->len)
1820 return 1;
1da177e4
LT
1821
1822 read_lock(&mrt_lock);
0c12295a
PM
1823 if (mrt->mroute_reg_vif_num >= 0)
1824 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1da177e4
LT
1825 if (reg_dev)
1826 dev_hold(reg_dev);
1827 read_unlock(&mrt_lock);
1828
e905a9ed 1829 if (reg_dev == NULL)
b1879204 1830 return 1;
1da177e4 1831
b0e380b1 1832 skb->mac_header = skb->network_header;
1da177e4 1833 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1834 skb_reset_network_header(skb);
1da177e4
LT
1835 skb->protocol = htons(ETH_P_IP);
1836 skb->ip_summed = 0;
1837 skb->pkt_type = PACKET_HOST;
d19d56dd
ED
1838
1839 skb_tunnel_rx(skb, reg_dev);
1840
1da177e4
LT
1841 netif_rx(skb);
1842 dev_put(reg_dev);
b1879204 1843
1da177e4 1844 return 0;
b1879204
IJ
1845}
1846#endif
1847
1848#ifdef CONFIG_IP_PIMSM_V1
1849/*
1850 * Handle IGMP messages of PIMv1
1851 */
1852
1853int pim_rcv_v1(struct sk_buff * skb)
1854{
1855 struct igmphdr *pim;
4feb88e5 1856 struct net *net = dev_net(skb->dev);
f0ad0860 1857 struct mr_table *mrt;
b1879204
IJ
1858
1859 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1860 goto drop;
1861
1862 pim = igmp_hdr(skb);
1863
f0ad0860
PM
1864 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1865 goto drop;
1866
0c12295a 1867 if (!mrt->mroute_do_pim ||
b1879204
IJ
1868 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1869 goto drop;
1870
f0ad0860 1871 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1872drop:
1873 kfree_skb(skb);
1874 }
1da177e4
LT
1875 return 0;
1876}
1877#endif
1878
1879#ifdef CONFIG_IP_PIMSM_V2
1880static int pim_rcv(struct sk_buff * skb)
1881{
1882 struct pimreghdr *pim;
f0ad0860
PM
1883 struct net *net = dev_net(skb->dev);
1884 struct mr_table *mrt;
1da177e4 1885
b1879204 1886 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1887 goto drop;
1888
9c70220b 1889 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1890 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1891 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1892 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1893 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1894 goto drop;
1895
f0ad0860
PM
1896 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1897 goto drop;
1898
1899 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1900drop:
1901 kfree_skb(skb);
1902 }
1da177e4
LT
1903 return 0;
1904}
1905#endif
1906
cb6a4e46
PM
1907static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1908 struct mfc_cache *c, struct rtmsg *rtm)
1da177e4
LT
1909{
1910 int ct;
1911 struct rtnexthop *nhp;
27a884dc 1912 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1913 struct rtattr *mp_head;
1914
7438189b 1915 /* If cache is unresolved, don't try to parse IIF and OIF */
ed0f160a 1916 if (c->mfc_parent >= MAXVIFS)
7438189b
ND
1917 return -ENOENT;
1918
0c12295a
PM
1919 if (VIF_EXISTS(mrt, c->mfc_parent))
1920 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1921
c354e124 1922 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1923
1924 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 1925 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1926 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1927 goto rtattr_failure;
c354e124 1928 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1929 nhp->rtnh_flags = 0;
1930 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 1931 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
1932 nhp->rtnh_len = sizeof(*nhp);
1933 }
1934 }
1935 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1936 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1937 rtm->rtm_type = RTN_MULTICAST;
1938 return 1;
1939
1940rtattr_failure:
dc5fc579 1941 nlmsg_trim(skb, b);
1da177e4
LT
1942 return -EMSGSIZE;
1943}
1944
4feb88e5
BT
1945int ipmr_get_route(struct net *net,
1946 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1947{
1948 int err;
f0ad0860 1949 struct mr_table *mrt;
1da177e4 1950 struct mfc_cache *cache;
511c3f92 1951 struct rtable *rt = skb_rtable(skb);
1da177e4 1952
f0ad0860
PM
1953 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1954 if (mrt == NULL)
1955 return -ENOENT;
1956
1da177e4 1957 read_lock(&mrt_lock);
0c12295a 1958 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1da177e4 1959
c354e124 1960 if (cache == NULL) {
72287490 1961 struct sk_buff *skb2;
eddc9ec5 1962 struct iphdr *iph;
1da177e4
LT
1963 struct net_device *dev;
1964 int vif;
1965
1966 if (nowait) {
1967 read_unlock(&mrt_lock);
1968 return -EAGAIN;
1969 }
1970
1971 dev = skb->dev;
0c12295a 1972 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1da177e4
LT
1973 read_unlock(&mrt_lock);
1974 return -ENODEV;
1975 }
72287490
AK
1976 skb2 = skb_clone(skb, GFP_ATOMIC);
1977 if (!skb2) {
1978 read_unlock(&mrt_lock);
1979 return -ENOMEM;
1980 }
1981
e2d1bca7
ACM
1982 skb_push(skb2, sizeof(struct iphdr));
1983 skb_reset_network_header(skb2);
eddc9ec5
ACM
1984 iph = ip_hdr(skb2);
1985 iph->ihl = sizeof(struct iphdr) >> 2;
1986 iph->saddr = rt->rt_src;
1987 iph->daddr = rt->rt_dst;
1988 iph->version = 0;
0c12295a 1989 err = ipmr_cache_unresolved(mrt, vif, skb2);
1da177e4
LT
1990 read_unlock(&mrt_lock);
1991 return err;
1992 }
1993
1994 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1995 cache->mfc_flags |= MFC_NOTIFY;
cb6a4e46 1996 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4
LT
1997 read_unlock(&mrt_lock);
1998 return err;
1999}
2000
cb6a4e46
PM
2001static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2002 u32 pid, u32 seq, struct mfc_cache *c)
2003{
2004 struct nlmsghdr *nlh;
2005 struct rtmsg *rtm;
2006
2007 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2008 if (nlh == NULL)
2009 return -EMSGSIZE;
2010
2011 rtm = nlmsg_data(nlh);
2012 rtm->rtm_family = RTNL_FAMILY_IPMR;
2013 rtm->rtm_dst_len = 32;
2014 rtm->rtm_src_len = 32;
2015 rtm->rtm_tos = 0;
2016 rtm->rtm_table = mrt->id;
2017 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2018 rtm->rtm_type = RTN_MULTICAST;
2019 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2020 rtm->rtm_protocol = RTPROT_UNSPEC;
2021 rtm->rtm_flags = 0;
2022
2023 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2024 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2025
2026 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2027 goto nla_put_failure;
2028
2029 return nlmsg_end(skb, nlh);
2030
2031nla_put_failure:
2032 nlmsg_cancel(skb, nlh);
2033 return -EMSGSIZE;
2034}
2035
2036static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2037{
2038 struct net *net = sock_net(skb->sk);
2039 struct mr_table *mrt;
2040 struct mfc_cache *mfc;
2041 unsigned int t = 0, s_t;
2042 unsigned int h = 0, s_h;
2043 unsigned int e = 0, s_e;
2044
2045 s_t = cb->args[0];
2046 s_h = cb->args[1];
2047 s_e = cb->args[2];
2048
2049 read_lock(&mrt_lock);
2050 ipmr_for_each_table(mrt, net) {
2051 if (t < s_t)
2052 goto next_table;
2053 if (t > s_t)
2054 s_h = 0;
2055 for (h = s_h; h < MFC_LINES; h++) {
2056 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2057 if (e < s_e)
2058 goto next_entry;
2059 if (ipmr_fill_mroute(mrt, skb,
2060 NETLINK_CB(cb->skb).pid,
2061 cb->nlh->nlmsg_seq,
2062 mfc) < 0)
2063 goto done;
2064next_entry:
2065 e++;
2066 }
2067 e = s_e = 0;
2068 }
2069 s_h = 0;
2070next_table:
2071 t++;
2072 }
2073done:
2074 read_unlock(&mrt_lock);
2075
2076 cb->args[2] = e;
2077 cb->args[1] = h;
2078 cb->args[0] = t;
2079
2080 return skb->len;
2081}
2082
e905a9ed 2083#ifdef CONFIG_PROC_FS
1da177e4
LT
2084/*
2085 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2086 */
2087struct ipmr_vif_iter {
f6bb4514 2088 struct seq_net_private p;
f0ad0860 2089 struct mr_table *mrt;
1da177e4
LT
2090 int ct;
2091};
2092
f6bb4514
BT
2093static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2094 struct ipmr_vif_iter *iter,
1da177e4
LT
2095 loff_t pos)
2096{
f0ad0860 2097 struct mr_table *mrt = iter->mrt;
0c12295a
PM
2098
2099 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2100 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2101 continue;
e905a9ed 2102 if (pos-- == 0)
0c12295a 2103 return &mrt->vif_table[iter->ct];
1da177e4
LT
2104 }
2105 return NULL;
2106}
2107
2108static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 2109 __acquires(mrt_lock)
1da177e4 2110{
f0ad0860 2111 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2112 struct net *net = seq_file_net(seq);
f0ad0860
PM
2113 struct mr_table *mrt;
2114
2115 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2116 if (mrt == NULL)
2117 return ERR_PTR(-ENOENT);
2118
2119 iter->mrt = mrt;
f6bb4514 2120
1da177e4 2121 read_lock(&mrt_lock);
f6bb4514 2122 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2123 : SEQ_START_TOKEN;
2124}
2125
2126static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2127{
2128 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2129 struct net *net = seq_file_net(seq);
f0ad0860 2130 struct mr_table *mrt = iter->mrt;
1da177e4
LT
2131
2132 ++*pos;
2133 if (v == SEQ_START_TOKEN)
f6bb4514 2134 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 2135
0c12295a
PM
2136 while (++iter->ct < mrt->maxvif) {
2137 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2138 continue;
0c12295a 2139 return &mrt->vif_table[iter->ct];
1da177e4
LT
2140 }
2141 return NULL;
2142}
2143
2144static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 2145 __releases(mrt_lock)
1da177e4
LT
2146{
2147 read_unlock(&mrt_lock);
2148}
2149
2150static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2151{
f0ad0860
PM
2152 struct ipmr_vif_iter *iter = seq->private;
2153 struct mr_table *mrt = iter->mrt;
f6bb4514 2154
1da177e4 2155 if (v == SEQ_START_TOKEN) {
e905a9ed 2156 seq_puts(seq,
1da177e4
LT
2157 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2158 } else {
2159 const struct vif_device *vif = v;
2160 const char *name = vif->dev ? vif->dev->name : "none";
2161
2162 seq_printf(seq,
2163 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 2164 vif - mrt->vif_table,
e905a9ed 2165 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
2166 vif->bytes_out, vif->pkt_out,
2167 vif->flags, vif->local, vif->remote);
2168 }
2169 return 0;
2170}
2171
f690808e 2172static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
2173 .start = ipmr_vif_seq_start,
2174 .next = ipmr_vif_seq_next,
2175 .stop = ipmr_vif_seq_stop,
2176 .show = ipmr_vif_seq_show,
2177};
2178
2179static int ipmr_vif_open(struct inode *inode, struct file *file)
2180{
f6bb4514
BT
2181 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2182 sizeof(struct ipmr_vif_iter));
1da177e4
LT
2183}
2184
9a32144e 2185static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
2186 .owner = THIS_MODULE,
2187 .open = ipmr_vif_open,
2188 .read = seq_read,
2189 .llseek = seq_lseek,
f6bb4514 2190 .release = seq_release_net,
1da177e4
LT
2191};
2192
2193struct ipmr_mfc_iter {
f6bb4514 2194 struct seq_net_private p;
f0ad0860 2195 struct mr_table *mrt;
862465f2 2196 struct list_head *cache;
1da177e4
LT
2197 int ct;
2198};
2199
2200
f6bb4514
BT
2201static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2202 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 2203{
f0ad0860 2204 struct mr_table *mrt = it->mrt;
1da177e4
LT
2205 struct mfc_cache *mfc;
2206
1da177e4 2207 read_lock(&mrt_lock);
862465f2 2208 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
0c12295a 2209 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2 2210 list_for_each_entry(mfc, it->cache, list)
e905a9ed 2211 if (pos-- == 0)
1da177e4 2212 return mfc;
862465f2 2213 }
1da177e4
LT
2214 read_unlock(&mrt_lock);
2215
1da177e4 2216 spin_lock_bh(&mfc_unres_lock);
0c12295a 2217 it->cache = &mrt->mfc_unres_queue;
862465f2 2218 list_for_each_entry(mfc, it->cache, list)
e258beb2 2219 if (pos-- == 0)
1da177e4
LT
2220 return mfc;
2221 spin_unlock_bh(&mfc_unres_lock);
2222
2223 it->cache = NULL;
2224 return NULL;
2225}
2226
2227
2228static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2229{
2230 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2231 struct net *net = seq_file_net(seq);
f0ad0860 2232 struct mr_table *mrt;
f6bb4514 2233
f0ad0860
PM
2234 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2235 if (mrt == NULL)
2236 return ERR_PTR(-ENOENT);
f6bb4514 2237
f0ad0860 2238 it->mrt = mrt;
1da177e4
LT
2239 it->cache = NULL;
2240 it->ct = 0;
f6bb4514 2241 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2242 : SEQ_START_TOKEN;
2243}
2244
2245static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2246{
2247 struct mfc_cache *mfc = v;
2248 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2249 struct net *net = seq_file_net(seq);
f0ad0860 2250 struct mr_table *mrt = it->mrt;
1da177e4
LT
2251
2252 ++*pos;
2253
2254 if (v == SEQ_START_TOKEN)
f6bb4514 2255 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 2256
862465f2
PM
2257 if (mfc->list.next != it->cache)
2258 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 2259
0c12295a 2260 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
2261 goto end_of_list;
2262
0c12295a 2263 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1da177e4
LT
2264
2265 while (++it->ct < MFC_LINES) {
0c12295a 2266 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2
PM
2267 if (list_empty(it->cache))
2268 continue;
2269 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2270 }
2271
2272 /* exhausted cache_array, show unresolved */
2273 read_unlock(&mrt_lock);
0c12295a 2274 it->cache = &mrt->mfc_unres_queue;
1da177e4 2275 it->ct = 0;
e905a9ed 2276
1da177e4 2277 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
2278 if (!list_empty(it->cache))
2279 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2280
2281 end_of_list:
2282 spin_unlock_bh(&mfc_unres_lock);
2283 it->cache = NULL;
2284
2285 return NULL;
2286}
2287
2288static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2289{
2290 struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2291 struct mr_table *mrt = it->mrt;
1da177e4 2292
0c12295a 2293 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 2294 spin_unlock_bh(&mfc_unres_lock);
0c12295a 2295 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1da177e4
LT
2296 read_unlock(&mrt_lock);
2297}
2298
2299static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2300{
2301 int n;
2302
2303 if (v == SEQ_START_TOKEN) {
e905a9ed 2304 seq_puts(seq,
1da177e4
LT
2305 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2306 } else {
2307 const struct mfc_cache *mfc = v;
2308 const struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2309 const struct mr_table *mrt = it->mrt;
e905a9ed 2310
0eae88f3
ED
2311 seq_printf(seq, "%08X %08X %-3hd",
2312 (__force u32) mfc->mfc_mcastgrp,
2313 (__force u32) mfc->mfc_origin,
1ea472e2 2314 mfc->mfc_parent);
1da177e4 2315
0c12295a 2316 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
2317 seq_printf(seq, " %8lu %8lu %8lu",
2318 mfc->mfc_un.res.pkt,
2319 mfc->mfc_un.res.bytes,
2320 mfc->mfc_un.res.wrong_if);
132adf54
SH
2321 for (n = mfc->mfc_un.res.minvif;
2322 n < mfc->mfc_un.res.maxvif; n++ ) {
0c12295a 2323 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
2324 mfc->mfc_un.res.ttls[n] < 255)
2325 seq_printf(seq,
e905a9ed 2326 " %2d:%-3d",
1da177e4
LT
2327 n, mfc->mfc_un.res.ttls[n]);
2328 }
1ea472e2
BT
2329 } else {
2330 /* unresolved mfc_caches don't contain
2331 * pkt, bytes and wrong_if values
2332 */
2333 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
2334 }
2335 seq_putc(seq, '\n');
2336 }
2337 return 0;
2338}
2339
f690808e 2340static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
2341 .start = ipmr_mfc_seq_start,
2342 .next = ipmr_mfc_seq_next,
2343 .stop = ipmr_mfc_seq_stop,
2344 .show = ipmr_mfc_seq_show,
2345};
2346
2347static int ipmr_mfc_open(struct inode *inode, struct file *file)
2348{
f6bb4514
BT
2349 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2350 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
2351}
2352
9a32144e 2353static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
2354 .owner = THIS_MODULE,
2355 .open = ipmr_mfc_open,
2356 .read = seq_read,
2357 .llseek = seq_lseek,
f6bb4514 2358 .release = seq_release_net,
1da177e4 2359};
e905a9ed 2360#endif
1da177e4
LT
2361
2362#ifdef CONFIG_IP_PIMSM_V2
32613090 2363static const struct net_protocol pim_protocol = {
1da177e4 2364 .handler = pim_rcv,
403dbb97 2365 .netns_ok = 1,
1da177e4
LT
2366};
2367#endif
2368
2369
2370/*
2371 * Setup for IP multicast routing
2372 */
cf958ae3
BT
2373static int __net_init ipmr_net_init(struct net *net)
2374{
f0ad0860 2375 int err;
cf958ae3 2376
f0ad0860
PM
2377 err = ipmr_rules_init(net);
2378 if (err < 0)
cf958ae3 2379 goto fail;
f6bb4514
BT
2380
2381#ifdef CONFIG_PROC_FS
2382 err = -ENOMEM;
2383 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2384 goto proc_vif_fail;
2385 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2386 goto proc_cache_fail;
2387#endif
2bb8b26c
BT
2388 return 0;
2389
f6bb4514
BT
2390#ifdef CONFIG_PROC_FS
2391proc_cache_fail:
2392 proc_net_remove(net, "ip_mr_vif");
2393proc_vif_fail:
f0ad0860 2394 ipmr_rules_exit(net);
f6bb4514 2395#endif
cf958ae3
BT
2396fail:
2397 return err;
2398}
2399
2400static void __net_exit ipmr_net_exit(struct net *net)
2401{
f6bb4514
BT
2402#ifdef CONFIG_PROC_FS
2403 proc_net_remove(net, "ip_mr_cache");
2404 proc_net_remove(net, "ip_mr_vif");
2405#endif
f0ad0860 2406 ipmr_rules_exit(net);
cf958ae3
BT
2407}
2408
2409static struct pernet_operations ipmr_net_ops = {
2410 .init = ipmr_net_init,
2411 .exit = ipmr_net_exit,
2412};
e905a9ed 2413
03d2f897 2414int __init ip_mr_init(void)
1da177e4 2415{
03d2f897
WC
2416 int err;
2417
1da177e4
LT
2418 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2419 sizeof(struct mfc_cache),
e5d679f3 2420 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2421 NULL);
03d2f897
WC
2422 if (!mrt_cachep)
2423 return -ENOMEM;
2424
cf958ae3
BT
2425 err = register_pernet_subsys(&ipmr_net_ops);
2426 if (err)
2427 goto reg_pernet_fail;
2428
03d2f897
WC
2429 err = register_netdevice_notifier(&ip_mr_notifier);
2430 if (err)
2431 goto reg_notif_fail;
403dbb97
TG
2432#ifdef CONFIG_IP_PIMSM_V2
2433 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2434 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2435 err = -EAGAIN;
2436 goto add_proto_fail;
2437 }
2438#endif
cb6a4e46 2439 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
03d2f897 2440 return 0;
f6bb4514 2441
403dbb97
TG
2442#ifdef CONFIG_IP_PIMSM_V2
2443add_proto_fail:
2444 unregister_netdevice_notifier(&ip_mr_notifier);
2445#endif
c3e38896 2446reg_notif_fail:
cf958ae3
BT
2447 unregister_pernet_subsys(&ipmr_net_ops);
2448reg_pernet_fail:
c3e38896 2449 kmem_cache_destroy(mrt_cachep);
03d2f897 2450 return err;
1da177e4 2451}