]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
rps: optimize rps_get_cpu()
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
f0ad0860 66#include <net/fib_rules.h>
1da177e4
LT
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
0c12295a 72struct mr_table {
f0ad0860 73 struct list_head list;
8de53dfb
PM
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
f0ad0860 77 u32 id;
0c12295a
PM
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
f0ad0860
PM
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
1da177e4
LT
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
0c12295a 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
1da177e4 111
1da177e4
LT
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
e18b890b 123static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 124
f0ad0860 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
0c12295a
PM
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 130 struct sk_buff *pkt, vifi_t vifi, int assert);
0c12295a 131static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
d658f8a0 132 struct mfc_cache *c, struct rtmsg *rtm);
f0ad0860
PM
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
219static struct fib_rules_ops ipmr_rules_ops_template = {
220 .family = FIB_RULES_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273}
274#else
275#define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279{
280 return net->ipv4.mrt;
281}
282
283static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
285{
286 *mrt = net->ipv4.mrt;
287 return 0;
288}
289
290static int __net_init ipmr_rules_init(struct net *net)
291{
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
294}
295
296static void __net_exit ipmr_rules_exit(struct net *net)
297{
298 kfree(net->ipv4.mrt);
299}
300#endif
301
302static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303{
304 struct mr_table *mrt;
305 unsigned int i;
306
307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL)
313 return NULL;
8de53dfb 314 write_pnet(&mrt->net, net);
f0ad0860
PM
315 mrt->id = id;
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326#ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331#endif
332 return mrt;
333}
1da177e4 334
1da177e4
LT
335/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
336
d607032d
WC
337static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
338{
4feb88e5
BT
339 struct net *net = dev_net(dev);
340
d607032d
WC
341 dev_close(dev);
342
4feb88e5 343 dev = __dev_get_by_name(net, "tunl0");
d607032d 344 if (dev) {
5bc3eb7e 345 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 346 struct ifreq ifr;
d607032d
WC
347 struct ip_tunnel_parm p;
348
349 memset(&p, 0, sizeof(p));
350 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351 p.iph.saddr = v->vifc_lcl_addr.s_addr;
352 p.iph.version = 4;
353 p.iph.ihl = 5;
354 p.iph.protocol = IPPROTO_IPIP;
355 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
357
5bc3eb7e
SH
358 if (ops->ndo_do_ioctl) {
359 mm_segment_t oldfs = get_fs();
360
361 set_fs(KERNEL_DS);
362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
363 set_fs(oldfs);
364 }
d607032d
WC
365 }
366}
367
1da177e4 368static
4feb88e5 369struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
370{
371 struct net_device *dev;
372
4feb88e5 373 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
374
375 if (dev) {
5bc3eb7e 376 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
377 int err;
378 struct ifreq ifr;
1da177e4
LT
379 struct ip_tunnel_parm p;
380 struct in_device *in_dev;
381
382 memset(&p, 0, sizeof(p));
383 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384 p.iph.saddr = v->vifc_lcl_addr.s_addr;
385 p.iph.version = 4;
386 p.iph.ihl = 5;
387 p.iph.protocol = IPPROTO_IPIP;
388 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 390
5bc3eb7e
SH
391 if (ops->ndo_do_ioctl) {
392 mm_segment_t oldfs = get_fs();
393
394 set_fs(KERNEL_DS);
395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
396 set_fs(oldfs);
397 } else
398 err = -EOPNOTSUPP;
1da177e4
LT
399
400 dev = NULL;
401
4feb88e5
BT
402 if (err == 0 &&
403 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
404 dev->flags |= IFF_MULTICAST;
405
e5ed6399 406 in_dev = __in_dev_get_rtnl(dev);
71e27da9 407 if (in_dev == NULL)
1da177e4 408 goto failure;
71e27da9
HX
409
410 ipv4_devconf_setall(in_dev);
411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
412
413 if (dev_open(dev))
414 goto failure;
7dc00c82 415 dev_hold(dev);
1da177e4
LT
416 }
417 }
418 return dev;
419
420failure:
421 /* allow the register to be completed before unregistering. */
422 rtnl_unlock();
423 rtnl_lock();
424
425 unregister_netdevice(dev);
426 return NULL;
427}
428
429#ifdef CONFIG_IP_PIMSM
430
6fef4c0c 431static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 432{
4feb88e5 433 struct net *net = dev_net(dev);
f0ad0860
PM
434 struct mr_table *mrt;
435 struct flowi fl = {
436 .oif = dev->ifindex,
437 .iif = skb->skb_iif,
438 .mark = skb->mark,
439 };
440 int err;
441
442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0)
444 return err;
4feb88e5 445
1da177e4 446 read_lock(&mrt_lock);
cf3677ae
PE
447 dev->stats.tx_bytes += skb->len;
448 dev->stats.tx_packets++;
0c12295a 449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
450 read_unlock(&mrt_lock);
451 kfree_skb(skb);
6ed10654 452 return NETDEV_TX_OK;
1da177e4
LT
453}
454
007c3838
SH
455static const struct net_device_ops reg_vif_netdev_ops = {
456 .ndo_start_xmit = reg_vif_xmit,
457};
458
1da177e4
LT
459static void reg_vif_setup(struct net_device *dev)
460{
461 dev->type = ARPHRD_PIMREG;
46f25dff 462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 463 dev->flags = IFF_NOARP;
007c3838 464 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 465 dev->destructor = free_netdev;
403dbb97 466 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
467}
468
f0ad0860 469static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
1da177e4
LT
470{
471 struct net_device *dev;
472 struct in_device *in_dev;
f0ad0860
PM
473 char name[IFNAMSIZ];
474
475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
477 else
478 sprintf(name, "pimreg%u", mrt->id);
1da177e4 479
f0ad0860 480 dev = alloc_netdev(0, name, reg_vif_setup);
1da177e4
LT
481
482 if (dev == NULL)
483 return NULL;
484
403dbb97
TG
485 dev_net_set(dev, net);
486
1da177e4
LT
487 if (register_netdevice(dev)) {
488 free_netdev(dev);
489 return NULL;
490 }
491 dev->iflink = 0;
492
71e27da9
HX
493 rcu_read_lock();
494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 rcu_read_unlock();
1da177e4 496 goto failure;
71e27da9 497 }
1da177e4 498
71e27da9
HX
499 ipv4_devconf_setall(in_dev);
500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
501 rcu_read_unlock();
1da177e4
LT
502
503 if (dev_open(dev))
504 goto failure;
505
7dc00c82
WC
506 dev_hold(dev);
507
1da177e4
LT
508 return dev;
509
510failure:
511 /* allow the register to be completed before unregistering. */
512 rtnl_unlock();
513 rtnl_lock();
514
515 unregister_netdevice(dev);
516 return NULL;
517}
518#endif
519
520/*
521 * Delete a VIF entry
7dc00c82 522 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 523 */
e905a9ed 524
0c12295a 525static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 526 struct list_head *head)
1da177e4
LT
527{
528 struct vif_device *v;
529 struct net_device *dev;
530 struct in_device *in_dev;
531
0c12295a 532 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
533 return -EADDRNOTAVAIL;
534
0c12295a 535 v = &mrt->vif_table[vifi];
1da177e4
LT
536
537 write_lock_bh(&mrt_lock);
538 dev = v->dev;
539 v->dev = NULL;
540
541 if (!dev) {
542 write_unlock_bh(&mrt_lock);
543 return -EADDRNOTAVAIL;
544 }
545
546#ifdef CONFIG_IP_PIMSM
0c12295a
PM
547 if (vifi == mrt->mroute_reg_vif_num)
548 mrt->mroute_reg_vif_num = -1;
1da177e4
LT
549#endif
550
0c12295a 551 if (vifi+1 == mrt->maxvif) {
1da177e4
LT
552 int tmp;
553 for (tmp=vifi-1; tmp>=0; tmp--) {
0c12295a 554 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
555 break;
556 }
0c12295a 557 mrt->maxvif = tmp+1;
1da177e4
LT
558 }
559
560 write_unlock_bh(&mrt_lock);
561
562 dev_set_allmulti(dev, -1);
563
e5ed6399 564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
566 ip_rt_multicast_event(in_dev);
567 }
568
7dc00c82 569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 570 unregister_netdevice_queue(dev, head);
1da177e4
LT
571
572 dev_put(dev);
573 return 0;
574}
575
5c0a66f5
BT
576static inline void ipmr_cache_free(struct mfc_cache *c)
577{
5c0a66f5
BT
578 kmem_cache_free(mrt_cachep, c);
579}
580
1da177e4
LT
581/* Destroy an unresolved cache entry, killing queued skbs
582 and reporting error to netlink readers.
583 */
584
0c12295a 585static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 586{
8de53dfb 587 struct net *net = read_pnet(&mrt->net);
1da177e4 588 struct sk_buff *skb;
9ef1d4c7 589 struct nlmsgerr *e;
1da177e4 590
0c12295a 591 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 592
c354e124 593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 594 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596 nlh->nlmsg_type = NLMSG_ERROR;
597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
599 e = NLMSG_DATA(nlh);
600 e->error = -ETIMEDOUT;
601 memset(&e->msg, 0, sizeof(e->msg));
2942e900 602
4feb88e5 603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
604 } else
605 kfree_skb(skb);
606 }
607
5c0a66f5 608 ipmr_cache_free(c);
1da177e4
LT
609}
610
611
e258beb2 612/* Timer process for the unresolved queue. */
1da177e4 613
e258beb2 614static void ipmr_expire_process(unsigned long arg)
1da177e4 615{
0c12295a 616 struct mr_table *mrt = (struct mr_table *)arg;
1da177e4
LT
617 unsigned long now;
618 unsigned long expires;
862465f2 619 struct mfc_cache *c, *next;
1da177e4
LT
620
621 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
623 return;
624 }
625
0c12295a 626 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
627 goto out;
628
629 now = jiffies;
630 expires = 10*HZ;
1da177e4 631
0c12295a 632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
633 if (time_after(c->mfc_un.unres.expires, now)) {
634 unsigned long interval = c->mfc_un.unres.expires - now;
635 if (interval < expires)
636 expires = interval;
1da177e4
LT
637 continue;
638 }
639
862465f2 640 list_del(&c->list);
0c12295a 641 ipmr_destroy_unres(mrt, c);
1da177e4
LT
642 }
643
0c12295a
PM
644 if (!list_empty(&mrt->mfc_unres_queue))
645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
646
647out:
648 spin_unlock(&mfc_unres_lock);
649}
650
651/* Fill oifs list. It is called under write locked mrt_lock. */
652
0c12295a 653static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 654 unsigned char *ttls)
1da177e4
LT
655{
656 int vifi;
657
658 cache->mfc_un.res.minvif = MAXVIFS;
659 cache->mfc_un.res.maxvif = 0;
660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
661
0c12295a
PM
662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 664 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666 if (cache->mfc_un.res.minvif > vifi)
667 cache->mfc_un.res.minvif = vifi;
668 if (cache->mfc_un.res.maxvif <= vifi)
669 cache->mfc_un.res.maxvif = vifi + 1;
670 }
671 }
672}
673
0c12295a
PM
674static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
1da177e4
LT
676{
677 int vifi = vifc->vifc_vifi;
0c12295a 678 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
679 struct net_device *dev;
680 struct in_device *in_dev;
d607032d 681 int err;
1da177e4
LT
682
683 /* Is vif busy ? */
0c12295a 684 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
685 return -EADDRINUSE;
686
687 switch (vifc->vifc_flags) {
688#ifdef CONFIG_IP_PIMSM
689 case VIFF_REGISTER:
690 /*
691 * Special Purpose VIF in PIM
692 * All the packets will be sent to the daemon
693 */
0c12295a 694 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 695 return -EADDRINUSE;
f0ad0860 696 dev = ipmr_reg_vif(net, mrt);
1da177e4
LT
697 if (!dev)
698 return -ENOBUFS;
d607032d
WC
699 err = dev_set_allmulti(dev, 1);
700 if (err) {
701 unregister_netdevice(dev);
7dc00c82 702 dev_put(dev);
d607032d
WC
703 return err;
704 }
1da177e4
LT
705 break;
706#endif
e905a9ed 707 case VIFF_TUNNEL:
4feb88e5 708 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
709 if (!dev)
710 return -ENOBUFS;
d607032d
WC
711 err = dev_set_allmulti(dev, 1);
712 if (err) {
713 ipmr_del_tunnel(dev, vifc);
7dc00c82 714 dev_put(dev);
d607032d
WC
715 return err;
716 }
1da177e4 717 break;
ee5e81f0
I
718
719 case VIFF_USE_IFINDEX:
1da177e4 720 case 0:
ee5e81f0
I
721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723 if (dev && dev->ip_ptr == NULL) {
724 dev_put(dev);
725 return -EADDRNOTAVAIL;
726 }
727 } else
728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
729
1da177e4
LT
730 if (!dev)
731 return -EADDRNOTAVAIL;
d607032d 732 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
733 if (err) {
734 dev_put(dev);
d607032d 735 return err;
7dc00c82 736 }
1da177e4
LT
737 break;
738 default:
739 return -EINVAL;
740 }
741
d0490cfd
DC
742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
743 dev_put(dev);
1da177e4 744 return -EADDRNOTAVAIL;
d0490cfd 745 }
42f811b8 746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
747 ip_rt_multicast_event(in_dev);
748
749 /*
750 * Fill in the VIF structures
751 */
c354e124
JK
752 v->rate_limit = vifc->vifc_rate_limit;
753 v->local = vifc->vifc_lcl_addr.s_addr;
754 v->remote = vifc->vifc_rmt_addr.s_addr;
755 v->flags = vifc->vifc_flags;
1da177e4
LT
756 if (!mrtsock)
757 v->flags |= VIFF_STATIC;
c354e124 758 v->threshold = vifc->vifc_threshold;
1da177e4
LT
759 v->bytes_in = 0;
760 v->bytes_out = 0;
761 v->pkt_in = 0;
762 v->pkt_out = 0;
763 v->link = dev->ifindex;
764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765 v->link = dev->iflink;
766
767 /* And finish update writing critical data */
768 write_lock_bh(&mrt_lock);
c354e124 769 v->dev = dev;
1da177e4
LT
770#ifdef CONFIG_IP_PIMSM
771 if (v->flags&VIFF_REGISTER)
0c12295a 772 mrt->mroute_reg_vif_num = vifi;
1da177e4 773#endif
0c12295a
PM
774 if (vifi+1 > mrt->maxvif)
775 mrt->maxvif = vifi+1;
1da177e4
LT
776 write_unlock_bh(&mrt_lock);
777 return 0;
778}
779
0c12295a 780static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
781 __be32 origin,
782 __be32 mcastgrp)
1da177e4 783{
c354e124 784 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
785 struct mfc_cache *c;
786
0c12295a 787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
862465f2
PM
788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
789 return c;
1da177e4 790 }
862465f2 791 return NULL;
1da177e4
LT
792}
793
794/*
795 * Allocate a multicast cache entry
796 */
d658f8a0 797static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 798{
c354e124
JK
799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
800 if (c == NULL)
1da177e4 801 return NULL;
1da177e4
LT
802 c->mfc_un.res.minvif = MAXVIFS;
803 return c;
804}
805
d658f8a0 806static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 807{
c354e124
JK
808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
809 if (c == NULL)
1da177e4 810 return NULL;
1da177e4
LT
811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
812 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 return c;
814}
815
816/*
817 * A cache entry has gone into a resolved state from queued
818 */
e905a9ed 819
0c12295a
PM
820static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
822{
823 struct sk_buff *skb;
9ef1d4c7 824 struct nlmsgerr *e;
1da177e4
LT
825
826 /*
827 * Play the pending entries through our router
828 */
829
c354e124 830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 831 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
833
0c12295a 834 if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
836 (u8 *)nlh);
1da177e4
LT
837 } else {
838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
841 e = NLMSG_DATA(nlh);
842 e->error = -EMSGSIZE;
843 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 844 }
2942e900 845
d658f8a0 846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 847 } else
0c12295a 848 ip_mr_forward(net, mrt, skb, c, 0);
1da177e4
LT
849 }
850}
851
852/*
853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854 * expects the following bizarre scheme.
855 *
856 * Called under mrt_lock.
857 */
e905a9ed 858
0c12295a 859static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 860 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
861{
862 struct sk_buff *skb;
c9bdd4b5 863 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
864 struct igmphdr *igmp;
865 struct igmpmsg *msg;
866 int ret;
867
868#ifdef CONFIG_IP_PIMSM
869 if (assert == IGMPMSG_WHOLEPKT)
870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
871 else
872#endif
873 skb = alloc_skb(128, GFP_ATOMIC);
874
132adf54 875 if (!skb)
1da177e4
LT
876 return -ENOBUFS;
877
878#ifdef CONFIG_IP_PIMSM
879 if (assert == IGMPMSG_WHOLEPKT) {
880 /* Ugly, but we have no choice with this interface.
881 Duplicate old header, fix ihl, length etc.
882 And all this only to mangle msg->im_msgtype and
883 to set msg->im_mbz to "mbz" :-)
884 */
878c8145
ACM
885 skb_push(skb, sizeof(struct iphdr));
886 skb_reset_network_header(skb);
badff6d0 887 skb_reset_transport_header(skb);
0272ffc4 888 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
891 msg->im_mbz = 0;
0c12295a 892 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895 sizeof(struct iphdr));
e905a9ed 896 } else
1da177e4 897#endif
e905a9ed
YH
898 {
899
1da177e4
LT
900 /*
901 * Copy the IP header
902 */
903
27a884dc 904 skb->network_header = skb->tail;
ddc7b8e3 905 skb_put(skb, ihl);
27d7ff46 906 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
908 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 909 msg->im_vif = vifi;
adf30907 910 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
911
912 /*
913 * Add our header
914 */
915
c354e124 916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
917 igmp->type =
918 msg->im_msgtype = assert;
919 igmp->code = 0;
eddc9ec5 920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 921 skb->transport_header = skb->network_header;
e905a9ed 922 }
1da177e4 923
0c12295a 924 if (mrt->mroute_sk == NULL) {
1da177e4
LT
925 kfree_skb(skb);
926 return -EINVAL;
927 }
928
929 /*
930 * Deliver to mrouted
931 */
0c12295a 932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
70a269e6 933 if (ret < 0) {
1da177e4
LT
934 if (net_ratelimit())
935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
936 kfree_skb(skb);
937 }
938
939 return ret;
940}
941
942/*
943 * Queue a packet for resolution. It gets locked cache entry!
944 */
e905a9ed 945
1da177e4 946static int
0c12295a 947ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1da177e4 948{
862465f2 949 bool found = false;
1da177e4
LT
950 int err;
951 struct mfc_cache *c;
eddc9ec5 952 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
953
954 spin_lock_bh(&mfc_unres_lock);
0c12295a 955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 956 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
957 c->mfc_origin == iph->saddr) {
958 found = true;
1da177e4 959 break;
862465f2 960 }
1da177e4
LT
961 }
962
862465f2 963 if (!found) {
1da177e4
LT
964 /*
965 * Create a new entry if allowable
966 */
967
0c12295a 968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 969 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
970 spin_unlock_bh(&mfc_unres_lock);
971
972 kfree_skb(skb);
973 return -ENOBUFS;
974 }
975
976 /*
977 * Fill in the new cache entry
978 */
eddc9ec5
ACM
979 c->mfc_parent = -1;
980 c->mfc_origin = iph->saddr;
981 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
982
983 /*
984 * Reflect first query at mrouted.
985 */
0c12295a 986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 987 if (err < 0) {
e905a9ed 988 /* If the report failed throw the cache entry
1da177e4
LT
989 out - Brad Parker
990 */
991 spin_unlock_bh(&mfc_unres_lock);
992
5c0a66f5 993 ipmr_cache_free(c);
1da177e4
LT
994 kfree_skb(skb);
995 return err;
996 }
997
0c12295a
PM
998 atomic_inc(&mrt->cache_resolve_queue_len);
999 list_add(&c->list, &mrt->mfc_unres_queue);
1da177e4 1000
0c12295a 1001 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
1002 }
1003
1004 /*
1005 * See if we can append the packet
1006 */
1007 if (c->mfc_un.unres.unresolved.qlen>3) {
1008 kfree_skb(skb);
1009 err = -ENOBUFS;
1010 } else {
c354e124 1011 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
1012 err = 0;
1013 }
1014
1015 spin_unlock_bh(&mfc_unres_lock);
1016 return err;
1017}
1018
1019/*
1020 * MFC cache manipulation by user space mroute daemon
1021 */
1022
0c12295a 1023static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1da177e4
LT
1024{
1025 int line;
862465f2 1026 struct mfc_cache *c, *next;
1da177e4 1027
c354e124 1028 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1029
0c12295a 1030 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1da177e4
LT
1031 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1032 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1033 write_lock_bh(&mrt_lock);
862465f2 1034 list_del(&c->list);
1da177e4
LT
1035 write_unlock_bh(&mrt_lock);
1036
5c0a66f5 1037 ipmr_cache_free(c);
1da177e4
LT
1038 return 0;
1039 }
1040 }
1041 return -ENOENT;
1042}
1043
0c12295a
PM
1044static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1045 struct mfcctl *mfc, int mrtsock)
1da177e4 1046{
862465f2 1047 bool found = false;
1da177e4 1048 int line;
862465f2 1049 struct mfc_cache *uc, *c;
1da177e4 1050
a50436f2
PM
1051 if (mfc->mfcc_parent >= MAXVIFS)
1052 return -ENFILE;
1053
c354e124 1054 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1055
0c12295a 1056 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1da177e4 1057 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
1058 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1059 found = true;
1da177e4 1060 break;
862465f2 1061 }
1da177e4
LT
1062 }
1063
862465f2 1064 if (found) {
1da177e4
LT
1065 write_lock_bh(&mrt_lock);
1066 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1067 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1068 if (!mrtsock)
1069 c->mfc_flags |= MFC_STATIC;
1070 write_unlock_bh(&mrt_lock);
1071 return 0;
1072 }
1073
f97c1e0c 1074 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
1075 return -EINVAL;
1076
d658f8a0 1077 c = ipmr_cache_alloc();
c354e124 1078 if (c == NULL)
1da177e4
LT
1079 return -ENOMEM;
1080
c354e124
JK
1081 c->mfc_origin = mfc->mfcc_origin.s_addr;
1082 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1083 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1084 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1085 if (!mrtsock)
1086 c->mfc_flags |= MFC_STATIC;
1087
1088 write_lock_bh(&mrt_lock);
0c12295a 1089 list_add(&c->list, &mrt->mfc_cache_array[line]);
1da177e4
LT
1090 write_unlock_bh(&mrt_lock);
1091
1092 /*
1093 * Check to see if we resolved a queued list. If so we
1094 * need to send on the frames and tidy up.
1095 */
b0ebb739 1096 found = false;
1da177e4 1097 spin_lock_bh(&mfc_unres_lock);
0c12295a 1098 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 1099 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 1100 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 1101 list_del(&uc->list);
0c12295a 1102 atomic_dec(&mrt->cache_resolve_queue_len);
b0ebb739 1103 found = true;
1da177e4
LT
1104 break;
1105 }
1106 }
0c12295a
PM
1107 if (list_empty(&mrt->mfc_unres_queue))
1108 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
1109 spin_unlock_bh(&mfc_unres_lock);
1110
b0ebb739 1111 if (found) {
0c12295a 1112 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 1113 ipmr_cache_free(uc);
1da177e4
LT
1114 }
1115 return 0;
1116}
1117
1118/*
1119 * Close the multicast socket, and clear the vif tables etc
1120 */
e905a9ed 1121
0c12295a 1122static void mroute_clean_tables(struct mr_table *mrt)
1da177e4
LT
1123{
1124 int i;
d17fa6fa 1125 LIST_HEAD(list);
862465f2 1126 struct mfc_cache *c, *next;
e905a9ed 1127
1da177e4
LT
1128 /*
1129 * Shut down all active vif entries
1130 */
0c12295a
PM
1131 for (i = 0; i < mrt->maxvif; i++) {
1132 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1133 vif_delete(mrt, i, 0, &list);
1da177e4 1134 }
d17fa6fa 1135 unregister_netdevice_many(&list);
1da177e4
LT
1136
1137 /*
1138 * Wipe the cache
1139 */
862465f2 1140 for (i = 0; i < MFC_LINES; i++) {
0c12295a 1141 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
862465f2 1142 if (c->mfc_flags&MFC_STATIC)
1da177e4 1143 continue;
1da177e4 1144 write_lock_bh(&mrt_lock);
862465f2 1145 list_del(&c->list);
1da177e4
LT
1146 write_unlock_bh(&mrt_lock);
1147
5c0a66f5 1148 ipmr_cache_free(c);
1da177e4
LT
1149 }
1150 }
1151
0c12295a 1152 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 1153 spin_lock_bh(&mfc_unres_lock);
0c12295a 1154 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
862465f2 1155 list_del(&c->list);
0c12295a 1156 ipmr_destroy_unres(mrt, c);
1da177e4
LT
1157 }
1158 spin_unlock_bh(&mfc_unres_lock);
1159 }
1160}
1161
1162static void mrtsock_destruct(struct sock *sk)
1163{
4feb88e5 1164 struct net *net = sock_net(sk);
f0ad0860 1165 struct mr_table *mrt;
4feb88e5 1166
1da177e4 1167 rtnl_lock();
f0ad0860
PM
1168 ipmr_for_each_table(mrt, net) {
1169 if (sk == mrt->mroute_sk) {
1170 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4 1171
f0ad0860
PM
1172 write_lock_bh(&mrt_lock);
1173 mrt->mroute_sk = NULL;
1174 write_unlock_bh(&mrt_lock);
1da177e4 1175
f0ad0860
PM
1176 mroute_clean_tables(mrt);
1177 }
1da177e4
LT
1178 }
1179 rtnl_unlock();
1180}
1181
1182/*
1183 * Socket options and virtual interface manipulation. The whole
1184 * virtual interface system is a complete heap, but unfortunately
1185 * that's how BSD mrouted happens to think. Maybe one day with a proper
1186 * MOSPF/PIM router set up we can clean this up.
1187 */
e905a9ed 1188
b7058842 1189int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1190{
1191 int ret;
1192 struct vifctl vif;
1193 struct mfcctl mfc;
4feb88e5 1194 struct net *net = sock_net(sk);
f0ad0860
PM
1195 struct mr_table *mrt;
1196
1197 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1198 if (mrt == NULL)
1199 return -ENOENT;
e905a9ed 1200
132adf54 1201 if (optname != MRT_INIT) {
0c12295a 1202 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
1203 return -EACCES;
1204 }
1205
132adf54
SH
1206 switch (optname) {
1207 case MRT_INIT:
1208 if (sk->sk_type != SOCK_RAW ||
c720c7e8 1209 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 1210 return -EOPNOTSUPP;
c354e124 1211 if (optlen != sizeof(int))
132adf54 1212 return -ENOPROTOOPT;
1da177e4 1213
132adf54 1214 rtnl_lock();
0c12295a 1215 if (mrt->mroute_sk) {
1da177e4 1216 rtnl_unlock();
132adf54
SH
1217 return -EADDRINUSE;
1218 }
1219
1220 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1221 if (ret == 0) {
1222 write_lock_bh(&mrt_lock);
0c12295a 1223 mrt->mroute_sk = sk;
132adf54
SH
1224 write_unlock_bh(&mrt_lock);
1225
4feb88e5 1226 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
1227 }
1228 rtnl_unlock();
1229 return ret;
1230 case MRT_DONE:
0c12295a 1231 if (sk != mrt->mroute_sk)
132adf54
SH
1232 return -EACCES;
1233 return ip_ra_control(sk, 0, NULL);
1234 case MRT_ADD_VIF:
1235 case MRT_DEL_VIF:
c354e124 1236 if (optlen != sizeof(vif))
132adf54 1237 return -EINVAL;
c354e124 1238 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
1239 return -EFAULT;
1240 if (vif.vifc_vifi >= MAXVIFS)
1241 return -ENFILE;
1242 rtnl_lock();
c354e124 1243 if (optname == MRT_ADD_VIF) {
0c12295a 1244 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
132adf54 1245 } else {
0c12295a 1246 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54
SH
1247 }
1248 rtnl_unlock();
1249 return ret;
1da177e4
LT
1250
1251 /*
1252 * Manipulate the forwarding caches. These live
1253 * in a sort of kernel/user symbiosis.
1254 */
132adf54
SH
1255 case MRT_ADD_MFC:
1256 case MRT_DEL_MFC:
c354e124 1257 if (optlen != sizeof(mfc))
132adf54 1258 return -EINVAL;
c354e124 1259 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1260 return -EFAULT;
1261 rtnl_lock();
c354e124 1262 if (optname == MRT_DEL_MFC)
0c12295a 1263 ret = ipmr_mfc_delete(mrt, &mfc);
132adf54 1264 else
0c12295a 1265 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
132adf54
SH
1266 rtnl_unlock();
1267 return ret;
1da177e4
LT
1268 /*
1269 * Control PIM assert.
1270 */
132adf54
SH
1271 case MRT_ASSERT:
1272 {
1273 int v;
1274 if (get_user(v,(int __user *)optval))
1275 return -EFAULT;
0c12295a 1276 mrt->mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1277 return 0;
1278 }
1da177e4 1279#ifdef CONFIG_IP_PIMSM
132adf54
SH
1280 case MRT_PIM:
1281 {
ba93ef74
SH
1282 int v;
1283
132adf54
SH
1284 if (get_user(v,(int __user *)optval))
1285 return -EFAULT;
ba93ef74
SH
1286 v = (v) ? 1 : 0;
1287
132adf54
SH
1288 rtnl_lock();
1289 ret = 0;
0c12295a
PM
1290 if (v != mrt->mroute_do_pim) {
1291 mrt->mroute_do_pim = v;
1292 mrt->mroute_do_assert = v;
1da177e4 1293 }
132adf54
SH
1294 rtnl_unlock();
1295 return ret;
1296 }
f0ad0860
PM
1297#endif
1298#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1299 case MRT_TABLE:
1300 {
1301 u32 v;
1302
1303 if (optlen != sizeof(u32))
1304 return -EINVAL;
1305 if (get_user(v, (u32 __user *)optval))
1306 return -EFAULT;
1307 if (sk == mrt->mroute_sk)
1308 return -EBUSY;
1309
1310 rtnl_lock();
1311 ret = 0;
1312 if (!ipmr_new_table(net, v))
1313 ret = -ENOMEM;
1314 raw_sk(sk)->ipmr_table = v;
1315 rtnl_unlock();
1316 return ret;
1317 }
1da177e4 1318#endif
132adf54
SH
1319 /*
1320 * Spurious command, or MRT_VERSION which you cannot
1321 * set.
1322 */
1323 default:
1324 return -ENOPROTOOPT;
1da177e4
LT
1325 }
1326}
1327
1328/*
1329 * Getsock opt support for the multicast routing system.
1330 */
e905a9ed 1331
c354e124 1332int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1333{
1334 int olr;
1335 int val;
4feb88e5 1336 struct net *net = sock_net(sk);
f0ad0860
PM
1337 struct mr_table *mrt;
1338
1339 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1340 if (mrt == NULL)
1341 return -ENOENT;
1da177e4 1342
c354e124 1343 if (optname != MRT_VERSION &&
1da177e4
LT
1344#ifdef CONFIG_IP_PIMSM
1345 optname!=MRT_PIM &&
1346#endif
1347 optname!=MRT_ASSERT)
1348 return -ENOPROTOOPT;
1349
1350 if (get_user(olr, optlen))
1351 return -EFAULT;
1352
1353 olr = min_t(unsigned int, olr, sizeof(int));
1354 if (olr < 0)
1355 return -EINVAL;
e905a9ed 1356
c354e124 1357 if (put_user(olr, optlen))
1da177e4 1358 return -EFAULT;
c354e124
JK
1359 if (optname == MRT_VERSION)
1360 val = 0x0305;
1da177e4 1361#ifdef CONFIG_IP_PIMSM
c354e124 1362 else if (optname == MRT_PIM)
0c12295a 1363 val = mrt->mroute_do_pim;
1da177e4
LT
1364#endif
1365 else
0c12295a 1366 val = mrt->mroute_do_assert;
c354e124 1367 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1368 return -EFAULT;
1369 return 0;
1370}
1371
1372/*
1373 * The IP multicast ioctl support routines.
1374 */
e905a9ed 1375
1da177e4
LT
1376int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1377{
1378 struct sioc_sg_req sr;
1379 struct sioc_vif_req vr;
1380 struct vif_device *vif;
1381 struct mfc_cache *c;
4feb88e5 1382 struct net *net = sock_net(sk);
f0ad0860
PM
1383 struct mr_table *mrt;
1384
1385 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1386 if (mrt == NULL)
1387 return -ENOENT;
e905a9ed 1388
132adf54
SH
1389 switch (cmd) {
1390 case SIOCGETVIFCNT:
c354e124 1391 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1392 return -EFAULT;
0c12295a 1393 if (vr.vifi >= mrt->maxvif)
132adf54
SH
1394 return -EINVAL;
1395 read_lock(&mrt_lock);
0c12295a
PM
1396 vif = &mrt->vif_table[vr.vifi];
1397 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1398 vr.icount = vif->pkt_in;
1399 vr.ocount = vif->pkt_out;
1400 vr.ibytes = vif->bytes_in;
1401 vr.obytes = vif->bytes_out;
1da177e4 1402 read_unlock(&mrt_lock);
1da177e4 1403
c354e124 1404 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1405 return -EFAULT;
1406 return 0;
1407 }
1408 read_unlock(&mrt_lock);
1409 return -EADDRNOTAVAIL;
1410 case SIOCGETSGCNT:
c354e124 1411 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1412 return -EFAULT;
1413
1414 read_lock(&mrt_lock);
0c12295a 1415 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1416 if (c) {
1417 sr.pktcnt = c->mfc_un.res.pkt;
1418 sr.bytecnt = c->mfc_un.res.bytes;
1419 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1420 read_unlock(&mrt_lock);
132adf54 1421
c354e124 1422 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1423 return -EFAULT;
1424 return 0;
1425 }
1426 read_unlock(&mrt_lock);
1427 return -EADDRNOTAVAIL;
1428 default:
1429 return -ENOIOCTLCMD;
1da177e4
LT
1430 }
1431}
1432
1433
1434static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1435{
e9dc8653 1436 struct net_device *dev = ptr;
4feb88e5 1437 struct net *net = dev_net(dev);
f0ad0860 1438 struct mr_table *mrt;
1da177e4
LT
1439 struct vif_device *v;
1440 int ct;
d17fa6fa 1441 LIST_HEAD(list);
e9dc8653 1442
1da177e4
LT
1443 if (event != NETDEV_UNREGISTER)
1444 return NOTIFY_DONE;
f0ad0860
PM
1445
1446 ipmr_for_each_table(mrt, net) {
1447 v = &mrt->vif_table[0];
1448 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1449 if (v->dev == dev)
1450 vif_delete(mrt, ct, 1, &list);
1451 }
1da177e4 1452 }
d17fa6fa 1453 unregister_netdevice_many(&list);
1da177e4
LT
1454 return NOTIFY_DONE;
1455}
1456
1457
c354e124 1458static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1459 .notifier_call = ipmr_device_event,
1460};
1461
1462/*
1463 * Encapsulate a packet by attaching a valid IPIP header to it.
1464 * This avoids tunnel drivers and other mess and gives us the speed so
1465 * important for multicast video.
1466 */
e905a9ed 1467
114c7844 1468static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1469{
8856dfa3 1470 struct iphdr *iph;
eddc9ec5 1471 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1472
1473 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1474 skb->transport_header = skb->network_header;
8856dfa3 1475 skb_reset_network_header(skb);
eddc9ec5 1476 iph = ip_hdr(skb);
1da177e4
LT
1477
1478 iph->version = 4;
e023dd64
ACM
1479 iph->tos = old_iph->tos;
1480 iph->ttl = old_iph->ttl;
1da177e4
LT
1481 iph->frag_off = 0;
1482 iph->daddr = daddr;
1483 iph->saddr = saddr;
1484 iph->protocol = IPPROTO_IPIP;
1485 iph->ihl = 5;
1486 iph->tot_len = htons(skb->len);
adf30907 1487 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1488 ip_send_check(iph);
1489
1da177e4
LT
1490 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1491 nf_reset(skb);
1492}
1493
1494static inline int ipmr_forward_finish(struct sk_buff *skb)
1495{
1496 struct ip_options * opt = &(IPCB(skb)->opt);
1497
adf30907 1498 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1499
1500 if (unlikely(opt->optlen))
1501 ip_forward_options(skb);
1502
1503 return dst_output(skb);
1504}
1505
1506/*
1507 * Processing handlers for ipmr_forward
1508 */
1509
0c12295a
PM
1510static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1511 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1da177e4 1512{
eddc9ec5 1513 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1514 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1515 struct net_device *dev;
1516 struct rtable *rt;
1517 int encap = 0;
1518
1519 if (vif->dev == NULL)
1520 goto out_free;
1521
1522#ifdef CONFIG_IP_PIMSM
1523 if (vif->flags & VIFF_REGISTER) {
1524 vif->pkt_out++;
c354e124 1525 vif->bytes_out += skb->len;
cf3677ae
PE
1526 vif->dev->stats.tx_bytes += skb->len;
1527 vif->dev->stats.tx_packets++;
0c12295a 1528 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1529 goto out_free;
1da177e4
LT
1530 }
1531#endif
1532
1533 if (vif->flags&VIFF_TUNNEL) {
1534 struct flowi fl = { .oif = vif->link,
1535 .nl_u = { .ip4_u =
1536 { .daddr = vif->remote,
1537 .saddr = vif->local,
1538 .tos = RT_TOS(iph->tos) } },
1539 .proto = IPPROTO_IPIP };
4feb88e5 1540 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1541 goto out_free;
1542 encap = sizeof(struct iphdr);
1543 } else {
1544 struct flowi fl = { .oif = vif->link,
1545 .nl_u = { .ip4_u =
1546 { .daddr = iph->daddr,
1547 .tos = RT_TOS(iph->tos) } },
1548 .proto = IPPROTO_IPIP };
4feb88e5 1549 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1550 goto out_free;
1551 }
1552
1553 dev = rt->u.dst.dev;
1554
1555 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1556 /* Do not fragment multicasts. Alas, IPv4 does not
1557 allow to send ICMP, so that packets will disappear
1558 to blackhole.
1559 */
1560
7c73a6fa 1561 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1562 ip_rt_put(rt);
1563 goto out_free;
1564 }
1565
1566 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1567
1568 if (skb_cow(skb, encap)) {
e905a9ed 1569 ip_rt_put(rt);
1da177e4
LT
1570 goto out_free;
1571 }
1572
1573 vif->pkt_out++;
c354e124 1574 vif->bytes_out += skb->len;
1da177e4 1575
adf30907
ED
1576 skb_dst_drop(skb);
1577 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1578 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1579
1580 /* FIXME: forward and output firewalls used to be called here.
1581 * What do we do with netfilter? -- RR */
1582 if (vif->flags & VIFF_TUNNEL) {
1583 ip_encap(skb, vif->local, vif->remote);
1584 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1585 vif->dev->stats.tx_packets++;
1586 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1587 }
1588
1589 IPCB(skb)->flags |= IPSKB_FORWARDED;
1590
1591 /*
1592 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1593 * not only before forwarding, but after forwarding on all output
1594 * interfaces. It is clear, if mrouter runs a multicasting
1595 * program, it should receive packets not depending to what interface
1596 * program is joined.
1597 * If we will not make it, the program will have to join on all
1598 * interfaces. On the other hand, multihoming host (or router, but
1599 * not mrouter) cannot join to more than one interface - it will
1600 * result in receiving multiple packets.
1601 */
6e23ae2a 1602 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1603 ipmr_forward_finish);
1604 return;
1605
1606out_free:
1607 kfree_skb(skb);
1608 return;
1609}
1610
0c12295a 1611static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1612{
1613 int ct;
0c12295a
PM
1614
1615 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1616 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
1617 break;
1618 }
1619 return ct;
1620}
1621
1622/* "local" means that we should preserve one skb (for local delivery) */
1623
0c12295a
PM
1624static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1625 struct sk_buff *skb, struct mfc_cache *cache,
1626 int local)
1da177e4
LT
1627{
1628 int psend = -1;
1629 int vif, ct;
1630
1631 vif = cache->mfc_parent;
1632 cache->mfc_un.res.pkt++;
1633 cache->mfc_un.res.bytes += skb->len;
1634
1635 /*
1636 * Wrong interface: drop packet and (maybe) send PIM assert.
1637 */
0c12295a 1638 if (mrt->vif_table[vif].dev != skb->dev) {
1da177e4
LT
1639 int true_vifi;
1640
511c3f92 1641 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1642 /* It is our own packet, looped back.
1643 Very complicated situation...
1644
1645 The best workaround until routing daemons will be
1646 fixed is not to redistribute packet, if it was
1647 send through wrong interface. It means, that
1648 multicast applications WILL NOT work for
1649 (S,G), which have default multicast route pointing
1650 to wrong oif. In any case, it is not a good
1651 idea to use multicasting applications on router.
1652 */
1653 goto dont_forward;
1654 }
1655
1656 cache->mfc_un.res.wrong_if++;
0c12295a 1657 true_vifi = ipmr_find_vif(mrt, skb->dev);
1da177e4 1658
0c12295a 1659 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4
LT
1660 /* pimsm uses asserts, when switching from RPT to SPT,
1661 so that we cannot check that packet arrived on an oif.
1662 It is bad, but otherwise we would need to move pretty
1663 large chunk of pimd to kernel. Ough... --ANK
1664 */
0c12295a 1665 (mrt->mroute_do_pim ||
6f9374a9 1666 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1667 time_after(jiffies,
1da177e4
LT
1668 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1669 cache->mfc_un.res.last_assert = jiffies;
0c12295a 1670 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1671 }
1672 goto dont_forward;
1673 }
1674
0c12295a
PM
1675 mrt->vif_table[vif].pkt_in++;
1676 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1677
1678 /*
1679 * Forward the frame
1680 */
1681 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1682 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1683 if (psend != -1) {
1684 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1685 if (skb2)
0c12295a
PM
1686 ipmr_queue_xmit(net, mrt, skb2, cache,
1687 psend);
1da177e4 1688 }
c354e124 1689 psend = ct;
1da177e4
LT
1690 }
1691 }
1692 if (psend != -1) {
1693 if (local) {
1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1695 if (skb2)
0c12295a 1696 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1da177e4 1697 } else {
0c12295a 1698 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1da177e4
LT
1699 return 0;
1700 }
1701 }
1702
1703dont_forward:
1704 if (!local)
1705 kfree_skb(skb);
1706 return 0;
1707}
1708
1709
1710/*
1711 * Multicast packets for forwarding arrive here
1712 */
1713
1714int ip_mr_input(struct sk_buff *skb)
1715{
1716 struct mfc_cache *cache;
4feb88e5 1717 struct net *net = dev_net(skb->dev);
511c3f92 1718 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
f0ad0860
PM
1719 struct mr_table *mrt;
1720 int err;
1da177e4
LT
1721
1722 /* Packet is looped back after forward, it should not be
1723 forwarded second time, but still can be delivered locally.
1724 */
1725 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1726 goto dont_forward;
1727
f0ad0860
PM
1728 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729 if (err < 0)
1730 return err;
1731
1da177e4
LT
1732 if (!local) {
1733 if (IPCB(skb)->opt.router_alert) {
1734 if (ip_call_ra_chain(skb))
1735 return 0;
eddc9ec5 1736 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1737 /* IGMPv1 (and broken IGMPv2 implementations sort of
1738 Cisco IOS <= 11.2(8)) do not put router alert
1739 option to IGMP packets destined to routable
1740 groups. It is very bad, because it means
1741 that we can forward NO IGMP messages.
1742 */
1743 read_lock(&mrt_lock);
0c12295a 1744 if (mrt->mroute_sk) {
2715bcf9 1745 nf_reset(skb);
0c12295a 1746 raw_rcv(mrt->mroute_sk, skb);
1da177e4
LT
1747 read_unlock(&mrt_lock);
1748 return 0;
1749 }
1750 read_unlock(&mrt_lock);
1751 }
1752 }
1753
1754 read_lock(&mrt_lock);
0c12295a 1755 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1756
1757 /*
1758 * No usable cache entry
1759 */
c354e124 1760 if (cache == NULL) {
1da177e4
LT
1761 int vif;
1762
1763 if (local) {
1764 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1765 ip_local_deliver(skb);
1766 if (skb2 == NULL) {
1767 read_unlock(&mrt_lock);
1768 return -ENOBUFS;
1769 }
1770 skb = skb2;
1771 }
1772
0c12295a 1773 vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 1774 if (vif >= 0) {
0eae88f3 1775 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1da177e4
LT
1776 read_unlock(&mrt_lock);
1777
0eae88f3 1778 return err2;
1da177e4
LT
1779 }
1780 read_unlock(&mrt_lock);
1781 kfree_skb(skb);
1782 return -ENODEV;
1783 }
1784
0c12295a 1785 ip_mr_forward(net, mrt, skb, cache, local);
1da177e4
LT
1786
1787 read_unlock(&mrt_lock);
1788
1789 if (local)
1790 return ip_local_deliver(skb);
1791
1792 return 0;
1793
1794dont_forward:
1795 if (local)
1796 return ip_local_deliver(skb);
1797 kfree_skb(skb);
1798 return 0;
1799}
1800
b1879204 1801#ifdef CONFIG_IP_PIMSM
f0ad0860
PM
1802static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1803 unsigned int pimlen)
1da177e4 1804{
b1879204
IJ
1805 struct net_device *reg_dev = NULL;
1806 struct iphdr *encap;
1da177e4 1807
b1879204 1808 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1809 /*
1810 Check that:
1811 a. packet is really destinted to a multicast group
1812 b. packet is not a NULL-REGISTER
1813 c. packet is not truncated
1814 */
f97c1e0c 1815 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1816 encap->tot_len == 0 ||
b1879204
IJ
1817 ntohs(encap->tot_len) + pimlen > skb->len)
1818 return 1;
1da177e4
LT
1819
1820 read_lock(&mrt_lock);
0c12295a
PM
1821 if (mrt->mroute_reg_vif_num >= 0)
1822 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1da177e4
LT
1823 if (reg_dev)
1824 dev_hold(reg_dev);
1825 read_unlock(&mrt_lock);
1826
e905a9ed 1827 if (reg_dev == NULL)
b1879204 1828 return 1;
1da177e4 1829
b0e380b1 1830 skb->mac_header = skb->network_header;
1da177e4 1831 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1832 skb_reset_network_header(skb);
1da177e4 1833 skb->dev = reg_dev;
1da177e4
LT
1834 skb->protocol = htons(ETH_P_IP);
1835 skb->ip_summed = 0;
1836 skb->pkt_type = PACKET_HOST;
adf30907 1837 skb_dst_drop(skb);
cf3677ae
PE
1838 reg_dev->stats.rx_bytes += skb->len;
1839 reg_dev->stats.rx_packets++;
1da177e4
LT
1840 nf_reset(skb);
1841 netif_rx(skb);
1842 dev_put(reg_dev);
b1879204 1843
1da177e4 1844 return 0;
b1879204
IJ
1845}
1846#endif
1847
1848#ifdef CONFIG_IP_PIMSM_V1
1849/*
1850 * Handle IGMP messages of PIMv1
1851 */
1852
1853int pim_rcv_v1(struct sk_buff * skb)
1854{
1855 struct igmphdr *pim;
4feb88e5 1856 struct net *net = dev_net(skb->dev);
f0ad0860 1857 struct mr_table *mrt;
b1879204
IJ
1858
1859 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1860 goto drop;
1861
1862 pim = igmp_hdr(skb);
1863
f0ad0860
PM
1864 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1865 goto drop;
1866
0c12295a 1867 if (!mrt->mroute_do_pim ||
b1879204
IJ
1868 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1869 goto drop;
1870
f0ad0860 1871 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1872drop:
1873 kfree_skb(skb);
1874 }
1da177e4
LT
1875 return 0;
1876}
1877#endif
1878
1879#ifdef CONFIG_IP_PIMSM_V2
1880static int pim_rcv(struct sk_buff * skb)
1881{
1882 struct pimreghdr *pim;
f0ad0860
PM
1883 struct net *net = dev_net(skb->dev);
1884 struct mr_table *mrt;
1da177e4 1885
b1879204 1886 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1887 goto drop;
1888
9c70220b 1889 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1890 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1891 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1892 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1893 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1894 goto drop;
1895
f0ad0860
PM
1896 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1897 goto drop;
1898
1899 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1900drop:
1901 kfree_skb(skb);
1902 }
1da177e4
LT
1903 return 0;
1904}
1905#endif
1906
1907static int
0c12295a 1908ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
d658f8a0 1909 struct rtmsg *rtm)
1da177e4
LT
1910{
1911 int ct;
1912 struct rtnexthop *nhp;
27a884dc 1913 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1914 struct rtattr *mp_head;
1915
7438189b
ND
1916 /* If cache is unresolved, don't try to parse IIF and OIF */
1917 if (c->mfc_parent > MAXVIFS)
1918 return -ENOENT;
1919
0c12295a
PM
1920 if (VIF_EXISTS(mrt, c->mfc_parent))
1921 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1922
c354e124 1923 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1924
1925 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 1926 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1927 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1928 goto rtattr_failure;
c354e124 1929 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1930 nhp->rtnh_flags = 0;
1931 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 1932 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
1933 nhp->rtnh_len = sizeof(*nhp);
1934 }
1935 }
1936 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1937 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1938 rtm->rtm_type = RTN_MULTICAST;
1939 return 1;
1940
1941rtattr_failure:
dc5fc579 1942 nlmsg_trim(skb, b);
1da177e4
LT
1943 return -EMSGSIZE;
1944}
1945
4feb88e5
BT
1946int ipmr_get_route(struct net *net,
1947 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1948{
1949 int err;
f0ad0860 1950 struct mr_table *mrt;
1da177e4 1951 struct mfc_cache *cache;
511c3f92 1952 struct rtable *rt = skb_rtable(skb);
1da177e4 1953
f0ad0860
PM
1954 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1955 if (mrt == NULL)
1956 return -ENOENT;
1957
1da177e4 1958 read_lock(&mrt_lock);
0c12295a 1959 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1da177e4 1960
c354e124 1961 if (cache == NULL) {
72287490 1962 struct sk_buff *skb2;
eddc9ec5 1963 struct iphdr *iph;
1da177e4
LT
1964 struct net_device *dev;
1965 int vif;
1966
1967 if (nowait) {
1968 read_unlock(&mrt_lock);
1969 return -EAGAIN;
1970 }
1971
1972 dev = skb->dev;
0c12295a 1973 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1da177e4
LT
1974 read_unlock(&mrt_lock);
1975 return -ENODEV;
1976 }
72287490
AK
1977 skb2 = skb_clone(skb, GFP_ATOMIC);
1978 if (!skb2) {
1979 read_unlock(&mrt_lock);
1980 return -ENOMEM;
1981 }
1982
e2d1bca7
ACM
1983 skb_push(skb2, sizeof(struct iphdr));
1984 skb_reset_network_header(skb2);
eddc9ec5
ACM
1985 iph = ip_hdr(skb2);
1986 iph->ihl = sizeof(struct iphdr) >> 2;
1987 iph->saddr = rt->rt_src;
1988 iph->daddr = rt->rt_dst;
1989 iph->version = 0;
0c12295a 1990 err = ipmr_cache_unresolved(mrt, vif, skb2);
1da177e4
LT
1991 read_unlock(&mrt_lock);
1992 return err;
1993 }
1994
1995 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1996 cache->mfc_flags |= MFC_NOTIFY;
0c12295a 1997 err = ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4
LT
1998 read_unlock(&mrt_lock);
1999 return err;
2000}
2001
e905a9ed 2002#ifdef CONFIG_PROC_FS
1da177e4
LT
2003/*
2004 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2005 */
2006struct ipmr_vif_iter {
f6bb4514 2007 struct seq_net_private p;
f0ad0860 2008 struct mr_table *mrt;
1da177e4
LT
2009 int ct;
2010};
2011
f6bb4514
BT
2012static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2013 struct ipmr_vif_iter *iter,
1da177e4
LT
2014 loff_t pos)
2015{
f0ad0860 2016 struct mr_table *mrt = iter->mrt;
0c12295a
PM
2017
2018 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2019 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2020 continue;
e905a9ed 2021 if (pos-- == 0)
0c12295a 2022 return &mrt->vif_table[iter->ct];
1da177e4
LT
2023 }
2024 return NULL;
2025}
2026
2027static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 2028 __acquires(mrt_lock)
1da177e4 2029{
f0ad0860 2030 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2031 struct net *net = seq_file_net(seq);
f0ad0860
PM
2032 struct mr_table *mrt;
2033
2034 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2035 if (mrt == NULL)
2036 return ERR_PTR(-ENOENT);
2037
2038 iter->mrt = mrt;
f6bb4514 2039
1da177e4 2040 read_lock(&mrt_lock);
f6bb4514 2041 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2042 : SEQ_START_TOKEN;
2043}
2044
2045static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2046{
2047 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2048 struct net *net = seq_file_net(seq);
f0ad0860 2049 struct mr_table *mrt = iter->mrt;
1da177e4
LT
2050
2051 ++*pos;
2052 if (v == SEQ_START_TOKEN)
f6bb4514 2053 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 2054
0c12295a
PM
2055 while (++iter->ct < mrt->maxvif) {
2056 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2057 continue;
0c12295a 2058 return &mrt->vif_table[iter->ct];
1da177e4
LT
2059 }
2060 return NULL;
2061}
2062
2063static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 2064 __releases(mrt_lock)
1da177e4
LT
2065{
2066 read_unlock(&mrt_lock);
2067}
2068
2069static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2070{
f0ad0860
PM
2071 struct ipmr_vif_iter *iter = seq->private;
2072 struct mr_table *mrt = iter->mrt;
f6bb4514 2073
1da177e4 2074 if (v == SEQ_START_TOKEN) {
e905a9ed 2075 seq_puts(seq,
1da177e4
LT
2076 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2077 } else {
2078 const struct vif_device *vif = v;
2079 const char *name = vif->dev ? vif->dev->name : "none";
2080
2081 seq_printf(seq,
2082 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 2083 vif - mrt->vif_table,
e905a9ed 2084 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
2085 vif->bytes_out, vif->pkt_out,
2086 vif->flags, vif->local, vif->remote);
2087 }
2088 return 0;
2089}
2090
f690808e 2091static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
2092 .start = ipmr_vif_seq_start,
2093 .next = ipmr_vif_seq_next,
2094 .stop = ipmr_vif_seq_stop,
2095 .show = ipmr_vif_seq_show,
2096};
2097
2098static int ipmr_vif_open(struct inode *inode, struct file *file)
2099{
f6bb4514
BT
2100 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2101 sizeof(struct ipmr_vif_iter));
1da177e4
LT
2102}
2103
9a32144e 2104static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
2105 .owner = THIS_MODULE,
2106 .open = ipmr_vif_open,
2107 .read = seq_read,
2108 .llseek = seq_lseek,
f6bb4514 2109 .release = seq_release_net,
1da177e4
LT
2110};
2111
2112struct ipmr_mfc_iter {
f6bb4514 2113 struct seq_net_private p;
f0ad0860 2114 struct mr_table *mrt;
862465f2 2115 struct list_head *cache;
1da177e4
LT
2116 int ct;
2117};
2118
2119
f6bb4514
BT
2120static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2121 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 2122{
f0ad0860 2123 struct mr_table *mrt = it->mrt;
1da177e4
LT
2124 struct mfc_cache *mfc;
2125
1da177e4 2126 read_lock(&mrt_lock);
862465f2 2127 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
0c12295a 2128 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2 2129 list_for_each_entry(mfc, it->cache, list)
e905a9ed 2130 if (pos-- == 0)
1da177e4 2131 return mfc;
862465f2 2132 }
1da177e4
LT
2133 read_unlock(&mrt_lock);
2134
1da177e4 2135 spin_lock_bh(&mfc_unres_lock);
0c12295a 2136 it->cache = &mrt->mfc_unres_queue;
862465f2 2137 list_for_each_entry(mfc, it->cache, list)
e258beb2 2138 if (pos-- == 0)
1da177e4
LT
2139 return mfc;
2140 spin_unlock_bh(&mfc_unres_lock);
2141
2142 it->cache = NULL;
2143 return NULL;
2144}
2145
2146
2147static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2148{
2149 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2150 struct net *net = seq_file_net(seq);
f0ad0860
PM
2151 struct mr_table *mrt;
2152
2153 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2154 if (mrt == NULL)
2155 return ERR_PTR(-ENOENT);
f6bb4514 2156
f0ad0860 2157 it->mrt = mrt;
1da177e4
LT
2158 it->cache = NULL;
2159 it->ct = 0;
f6bb4514 2160 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2161 : SEQ_START_TOKEN;
2162}
2163
2164static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2165{
2166 struct mfc_cache *mfc = v;
2167 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2168 struct net *net = seq_file_net(seq);
f0ad0860 2169 struct mr_table *mrt = it->mrt;
1da177e4
LT
2170
2171 ++*pos;
2172
2173 if (v == SEQ_START_TOKEN)
f6bb4514 2174 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 2175
862465f2
PM
2176 if (mfc->list.next != it->cache)
2177 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 2178
0c12295a 2179 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
2180 goto end_of_list;
2181
0c12295a 2182 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1da177e4
LT
2183
2184 while (++it->ct < MFC_LINES) {
0c12295a 2185 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2
PM
2186 if (list_empty(it->cache))
2187 continue;
2188 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2189 }
2190
2191 /* exhausted cache_array, show unresolved */
2192 read_unlock(&mrt_lock);
0c12295a 2193 it->cache = &mrt->mfc_unres_queue;
1da177e4 2194 it->ct = 0;
e905a9ed 2195
1da177e4 2196 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
2197 if (!list_empty(it->cache))
2198 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2199
2200 end_of_list:
2201 spin_unlock_bh(&mfc_unres_lock);
2202 it->cache = NULL;
2203
2204 return NULL;
2205}
2206
2207static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2208{
2209 struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2210 struct mr_table *mrt = it->mrt;
1da177e4 2211
0c12295a 2212 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 2213 spin_unlock_bh(&mfc_unres_lock);
0c12295a 2214 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1da177e4
LT
2215 read_unlock(&mrt_lock);
2216}
2217
2218static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2219{
2220 int n;
2221
2222 if (v == SEQ_START_TOKEN) {
e905a9ed 2223 seq_puts(seq,
1da177e4
LT
2224 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2225 } else {
2226 const struct mfc_cache *mfc = v;
2227 const struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2228 const struct mr_table *mrt = it->mrt;
e905a9ed 2229
0eae88f3
ED
2230 seq_printf(seq, "%08X %08X %-3hd",
2231 (__force u32) mfc->mfc_mcastgrp,
2232 (__force u32) mfc->mfc_origin,
1ea472e2 2233 mfc->mfc_parent);
1da177e4 2234
0c12295a 2235 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
2236 seq_printf(seq, " %8lu %8lu %8lu",
2237 mfc->mfc_un.res.pkt,
2238 mfc->mfc_un.res.bytes,
2239 mfc->mfc_un.res.wrong_if);
132adf54
SH
2240 for (n = mfc->mfc_un.res.minvif;
2241 n < mfc->mfc_un.res.maxvif; n++ ) {
0c12295a 2242 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
2243 mfc->mfc_un.res.ttls[n] < 255)
2244 seq_printf(seq,
e905a9ed 2245 " %2d:%-3d",
1da177e4
LT
2246 n, mfc->mfc_un.res.ttls[n]);
2247 }
1ea472e2
BT
2248 } else {
2249 /* unresolved mfc_caches don't contain
2250 * pkt, bytes and wrong_if values
2251 */
2252 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
2253 }
2254 seq_putc(seq, '\n');
2255 }
2256 return 0;
2257}
2258
f690808e 2259static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
2260 .start = ipmr_mfc_seq_start,
2261 .next = ipmr_mfc_seq_next,
2262 .stop = ipmr_mfc_seq_stop,
2263 .show = ipmr_mfc_seq_show,
2264};
2265
2266static int ipmr_mfc_open(struct inode *inode, struct file *file)
2267{
f6bb4514
BT
2268 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2269 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
2270}
2271
9a32144e 2272static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
2273 .owner = THIS_MODULE,
2274 .open = ipmr_mfc_open,
2275 .read = seq_read,
2276 .llseek = seq_lseek,
f6bb4514 2277 .release = seq_release_net,
1da177e4 2278};
e905a9ed 2279#endif
1da177e4
LT
2280
2281#ifdef CONFIG_IP_PIMSM_V2
32613090 2282static const struct net_protocol pim_protocol = {
1da177e4 2283 .handler = pim_rcv,
403dbb97 2284 .netns_ok = 1,
1da177e4
LT
2285};
2286#endif
2287
2288
2289/*
2290 * Setup for IP multicast routing
2291 */
cf958ae3
BT
2292static int __net_init ipmr_net_init(struct net *net)
2293{
f0ad0860 2294 int err;
cf958ae3 2295
f0ad0860
PM
2296 err = ipmr_rules_init(net);
2297 if (err < 0)
cf958ae3 2298 goto fail;
f6bb4514
BT
2299
2300#ifdef CONFIG_PROC_FS
2301 err = -ENOMEM;
2302 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2303 goto proc_vif_fail;
2304 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2305 goto proc_cache_fail;
2306#endif
2bb8b26c
BT
2307 return 0;
2308
f6bb4514
BT
2309#ifdef CONFIG_PROC_FS
2310proc_cache_fail:
2311 proc_net_remove(net, "ip_mr_vif");
2312proc_vif_fail:
f0ad0860 2313 ipmr_rules_exit(net);
f6bb4514 2314#endif
cf958ae3
BT
2315fail:
2316 return err;
2317}
2318
2319static void __net_exit ipmr_net_exit(struct net *net)
2320{
f6bb4514
BT
2321#ifdef CONFIG_PROC_FS
2322 proc_net_remove(net, "ip_mr_cache");
2323 proc_net_remove(net, "ip_mr_vif");
2324#endif
f0ad0860 2325 ipmr_rules_exit(net);
cf958ae3
BT
2326}
2327
2328static struct pernet_operations ipmr_net_ops = {
2329 .init = ipmr_net_init,
2330 .exit = ipmr_net_exit,
2331};
e905a9ed 2332
03d2f897 2333int __init ip_mr_init(void)
1da177e4 2334{
03d2f897
WC
2335 int err;
2336
1da177e4
LT
2337 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2338 sizeof(struct mfc_cache),
e5d679f3 2339 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2340 NULL);
03d2f897
WC
2341 if (!mrt_cachep)
2342 return -ENOMEM;
2343
cf958ae3
BT
2344 err = register_pernet_subsys(&ipmr_net_ops);
2345 if (err)
2346 goto reg_pernet_fail;
2347
03d2f897
WC
2348 err = register_netdevice_notifier(&ip_mr_notifier);
2349 if (err)
2350 goto reg_notif_fail;
403dbb97
TG
2351#ifdef CONFIG_IP_PIMSM_V2
2352 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2353 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2354 err = -EAGAIN;
2355 goto add_proto_fail;
2356 }
2357#endif
03d2f897 2358 return 0;
f6bb4514 2359
403dbb97
TG
2360#ifdef CONFIG_IP_PIMSM_V2
2361add_proto_fail:
2362 unregister_netdevice_notifier(&ip_mr_notifier);
2363#endif
c3e38896 2364reg_notif_fail:
cf958ae3
BT
2365 unregister_pernet_subsys(&ipmr_net_ops);
2366reg_pernet_fail:
c3e38896 2367 kmem_cache_destroy(mrt_cachep);
03d2f897 2368 return err;
1da177e4 2369}