]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
8139too: fix buffer overrun in rtl8139_init_board
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
f77f13e2 25 * Relax this requirement to work with older peers.
1da177e4
LT
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
f0ad0860 66#include <net/fib_rules.h>
1da177e4
LT
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
0c12295a 72struct mr_table {
f0ad0860 73 struct list_head list;
8de53dfb
PM
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
f0ad0860 77 u32 id;
0c12295a
PM
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
f0ad0860
PM
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
1da177e4
LT
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
0c12295a 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
1da177e4
LT
111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
e18b890b 123static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 124
f0ad0860 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
0c12295a
PM
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 130 struct sk_buff *pkt, vifi_t vifi, int assert);
cb6a4e46
PM
131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
f0ad0860
PM
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
1da177e4 169
f0ad0860
PM
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
3d0c9c4e 219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
25239cee 220 .family = RTNL_FAMILY_IPMR,
f0ad0860
PM
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273}
274#else
275#define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279{
280 return net->ipv4.mrt;
281}
282
283static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
285{
286 *mrt = net->ipv4.mrt;
287 return 0;
288}
289
290static int __net_init ipmr_rules_init(struct net *net)
291{
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
294}
295
296static void __net_exit ipmr_rules_exit(struct net *net)
297{
298 kfree(net->ipv4.mrt);
299}
300#endif
301
302static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303{
304 struct mr_table *mrt;
305 unsigned int i;
1da177e4 306
f0ad0860
PM
307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL)
313 return NULL;
8de53dfb 314 write_pnet(&mrt->net, net);
f0ad0860
PM
315 mrt->id = id;
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326#ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331#endif
332 return mrt;
333}
1da177e4
LT
334
335/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
336
d607032d
WC
337static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
338{
4feb88e5
BT
339 struct net *net = dev_net(dev);
340
d607032d
WC
341 dev_close(dev);
342
4feb88e5 343 dev = __dev_get_by_name(net, "tunl0");
d607032d 344 if (dev) {
5bc3eb7e 345 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 346 struct ifreq ifr;
d607032d
WC
347 struct ip_tunnel_parm p;
348
349 memset(&p, 0, sizeof(p));
350 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351 p.iph.saddr = v->vifc_lcl_addr.s_addr;
352 p.iph.version = 4;
353 p.iph.ihl = 5;
354 p.iph.protocol = IPPROTO_IPIP;
355 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
357
5bc3eb7e
SH
358 if (ops->ndo_do_ioctl) {
359 mm_segment_t oldfs = get_fs();
360
361 set_fs(KERNEL_DS);
362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
363 set_fs(oldfs);
364 }
d607032d
WC
365 }
366}
367
1da177e4 368static
4feb88e5 369struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
370{
371 struct net_device *dev;
372
4feb88e5 373 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
374
375 if (dev) {
5bc3eb7e 376 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
377 int err;
378 struct ifreq ifr;
1da177e4
LT
379 struct ip_tunnel_parm p;
380 struct in_device *in_dev;
381
382 memset(&p, 0, sizeof(p));
383 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384 p.iph.saddr = v->vifc_lcl_addr.s_addr;
385 p.iph.version = 4;
386 p.iph.ihl = 5;
387 p.iph.protocol = IPPROTO_IPIP;
388 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 390
5bc3eb7e
SH
391 if (ops->ndo_do_ioctl) {
392 mm_segment_t oldfs = get_fs();
393
394 set_fs(KERNEL_DS);
395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
396 set_fs(oldfs);
397 } else
398 err = -EOPNOTSUPP;
1da177e4
LT
399
400 dev = NULL;
401
4feb88e5
BT
402 if (err == 0 &&
403 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
404 dev->flags |= IFF_MULTICAST;
405
e5ed6399 406 in_dev = __in_dev_get_rtnl(dev);
71e27da9 407 if (in_dev == NULL)
1da177e4 408 goto failure;
71e27da9
HX
409
410 ipv4_devconf_setall(in_dev);
411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
412
413 if (dev_open(dev))
414 goto failure;
7dc00c82 415 dev_hold(dev);
1da177e4
LT
416 }
417 }
418 return dev;
419
420failure:
421 /* allow the register to be completed before unregistering. */
422 rtnl_unlock();
423 rtnl_lock();
424
425 unregister_netdevice(dev);
426 return NULL;
427}
428
429#ifdef CONFIG_IP_PIMSM
430
6fef4c0c 431static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 432{
4feb88e5 433 struct net *net = dev_net(dev);
f0ad0860
PM
434 struct mr_table *mrt;
435 struct flowi fl = {
436 .oif = dev->ifindex,
437 .iif = skb->skb_iif,
438 .mark = skb->mark,
439 };
440 int err;
441
442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0)
444 return err;
4feb88e5 445
1da177e4 446 read_lock(&mrt_lock);
cf3677ae
PE
447 dev->stats.tx_bytes += skb->len;
448 dev->stats.tx_packets++;
0c12295a 449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
450 read_unlock(&mrt_lock);
451 kfree_skb(skb);
6ed10654 452 return NETDEV_TX_OK;
1da177e4
LT
453}
454
007c3838
SH
455static const struct net_device_ops reg_vif_netdev_ops = {
456 .ndo_start_xmit = reg_vif_xmit,
457};
458
1da177e4
LT
459static void reg_vif_setup(struct net_device *dev)
460{
461 dev->type = ARPHRD_PIMREG;
46f25dff 462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 463 dev->flags = IFF_NOARP;
007c3838 464 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 465 dev->destructor = free_netdev;
403dbb97 466 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
467}
468
f0ad0860 469static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
1da177e4
LT
470{
471 struct net_device *dev;
472 struct in_device *in_dev;
f0ad0860 473 char name[IFNAMSIZ];
1da177e4 474
f0ad0860
PM
475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
477 else
478 sprintf(name, "pimreg%u", mrt->id);
1da177e4 479
f0ad0860 480 dev = alloc_netdev(0, name, reg_vif_setup);
1da177e4
LT
481
482 if (dev == NULL)
483 return NULL;
484
403dbb97
TG
485 dev_net_set(dev, net);
486
1da177e4
LT
487 if (register_netdevice(dev)) {
488 free_netdev(dev);
489 return NULL;
490 }
491 dev->iflink = 0;
492
71e27da9
HX
493 rcu_read_lock();
494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 rcu_read_unlock();
1da177e4 496 goto failure;
71e27da9 497 }
1da177e4 498
71e27da9
HX
499 ipv4_devconf_setall(in_dev);
500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
501 rcu_read_unlock();
1da177e4
LT
502
503 if (dev_open(dev))
504 goto failure;
505
7dc00c82
WC
506 dev_hold(dev);
507
1da177e4
LT
508 return dev;
509
510failure:
511 /* allow the register to be completed before unregistering. */
512 rtnl_unlock();
513 rtnl_lock();
514
515 unregister_netdevice(dev);
516 return NULL;
517}
518#endif
519
520/*
521 * Delete a VIF entry
7dc00c82 522 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 523 */
e905a9ed 524
0c12295a 525static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 526 struct list_head *head)
1da177e4
LT
527{
528 struct vif_device *v;
529 struct net_device *dev;
530 struct in_device *in_dev;
531
0c12295a 532 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
533 return -EADDRNOTAVAIL;
534
0c12295a 535 v = &mrt->vif_table[vifi];
1da177e4
LT
536
537 write_lock_bh(&mrt_lock);
538 dev = v->dev;
539 v->dev = NULL;
540
541 if (!dev) {
542 write_unlock_bh(&mrt_lock);
543 return -EADDRNOTAVAIL;
544 }
545
546#ifdef CONFIG_IP_PIMSM
0c12295a
PM
547 if (vifi == mrt->mroute_reg_vif_num)
548 mrt->mroute_reg_vif_num = -1;
1da177e4
LT
549#endif
550
0c12295a 551 if (vifi+1 == mrt->maxvif) {
1da177e4
LT
552 int tmp;
553 for (tmp=vifi-1; tmp>=0; tmp--) {
0c12295a 554 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
555 break;
556 }
0c12295a 557 mrt->maxvif = tmp+1;
1da177e4
LT
558 }
559
560 write_unlock_bh(&mrt_lock);
561
562 dev_set_allmulti(dev, -1);
563
e5ed6399 564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
566 ip_rt_multicast_event(in_dev);
567 }
568
7dc00c82 569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 570 unregister_netdevice_queue(dev, head);
1da177e4
LT
571
572 dev_put(dev);
573 return 0;
574}
575
5c0a66f5
BT
576static inline void ipmr_cache_free(struct mfc_cache *c)
577{
5c0a66f5
BT
578 kmem_cache_free(mrt_cachep, c);
579}
580
1da177e4
LT
581/* Destroy an unresolved cache entry, killing queued skbs
582 and reporting error to netlink readers.
583 */
584
0c12295a 585static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 586{
8de53dfb 587 struct net *net = read_pnet(&mrt->net);
1da177e4 588 struct sk_buff *skb;
9ef1d4c7 589 struct nlmsgerr *e;
1da177e4 590
0c12295a 591 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 592
c354e124 593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 594 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596 nlh->nlmsg_type = NLMSG_ERROR;
597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
599 e = NLMSG_DATA(nlh);
600 e->error = -ETIMEDOUT;
601 memset(&e->msg, 0, sizeof(e->msg));
2942e900 602
4feb88e5 603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
604 } else
605 kfree_skb(skb);
606 }
607
5c0a66f5 608 ipmr_cache_free(c);
1da177e4
LT
609}
610
611
e258beb2 612/* Timer process for the unresolved queue. */
1da177e4 613
e258beb2 614static void ipmr_expire_process(unsigned long arg)
1da177e4 615{
0c12295a 616 struct mr_table *mrt = (struct mr_table *)arg;
1da177e4
LT
617 unsigned long now;
618 unsigned long expires;
862465f2 619 struct mfc_cache *c, *next;
1da177e4
LT
620
621 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
623 return;
624 }
625
0c12295a 626 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
627 goto out;
628
629 now = jiffies;
630 expires = 10*HZ;
1da177e4 631
0c12295a 632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
633 if (time_after(c->mfc_un.unres.expires, now)) {
634 unsigned long interval = c->mfc_un.unres.expires - now;
635 if (interval < expires)
636 expires = interval;
1da177e4
LT
637 continue;
638 }
639
862465f2 640 list_del(&c->list);
0c12295a 641 ipmr_destroy_unres(mrt, c);
1da177e4
LT
642 }
643
0c12295a
PM
644 if (!list_empty(&mrt->mfc_unres_queue))
645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
646
647out:
648 spin_unlock(&mfc_unres_lock);
649}
650
651/* Fill oifs list. It is called under write locked mrt_lock. */
652
0c12295a 653static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 654 unsigned char *ttls)
1da177e4
LT
655{
656 int vifi;
657
658 cache->mfc_un.res.minvif = MAXVIFS;
659 cache->mfc_un.res.maxvif = 0;
660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
661
0c12295a
PM
662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 664 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666 if (cache->mfc_un.res.minvif > vifi)
667 cache->mfc_un.res.minvif = vifi;
668 if (cache->mfc_un.res.maxvif <= vifi)
669 cache->mfc_un.res.maxvif = vifi + 1;
670 }
671 }
672}
673
0c12295a
PM
674static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
1da177e4
LT
676{
677 int vifi = vifc->vifc_vifi;
0c12295a 678 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
679 struct net_device *dev;
680 struct in_device *in_dev;
d607032d 681 int err;
1da177e4
LT
682
683 /* Is vif busy ? */
0c12295a 684 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
685 return -EADDRINUSE;
686
687 switch (vifc->vifc_flags) {
688#ifdef CONFIG_IP_PIMSM
689 case VIFF_REGISTER:
690 /*
691 * Special Purpose VIF in PIM
692 * All the packets will be sent to the daemon
693 */
0c12295a 694 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 695 return -EADDRINUSE;
f0ad0860 696 dev = ipmr_reg_vif(net, mrt);
1da177e4
LT
697 if (!dev)
698 return -ENOBUFS;
d607032d
WC
699 err = dev_set_allmulti(dev, 1);
700 if (err) {
701 unregister_netdevice(dev);
7dc00c82 702 dev_put(dev);
d607032d
WC
703 return err;
704 }
1da177e4
LT
705 break;
706#endif
e905a9ed 707 case VIFF_TUNNEL:
4feb88e5 708 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
709 if (!dev)
710 return -ENOBUFS;
d607032d
WC
711 err = dev_set_allmulti(dev, 1);
712 if (err) {
713 ipmr_del_tunnel(dev, vifc);
7dc00c82 714 dev_put(dev);
d607032d
WC
715 return err;
716 }
1da177e4 717 break;
ee5e81f0
I
718
719 case VIFF_USE_IFINDEX:
1da177e4 720 case 0:
ee5e81f0
I
721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723 if (dev && dev->ip_ptr == NULL) {
724 dev_put(dev);
725 return -EADDRNOTAVAIL;
726 }
727 } else
728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
729
1da177e4
LT
730 if (!dev)
731 return -EADDRNOTAVAIL;
d607032d 732 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
733 if (err) {
734 dev_put(dev);
d607032d 735 return err;
7dc00c82 736 }
1da177e4
LT
737 break;
738 default:
739 return -EINVAL;
740 }
741
d0490cfd
DC
742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
743 dev_put(dev);
1da177e4 744 return -EADDRNOTAVAIL;
d0490cfd 745 }
42f811b8 746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
747 ip_rt_multicast_event(in_dev);
748
749 /*
750 * Fill in the VIF structures
751 */
c354e124
JK
752 v->rate_limit = vifc->vifc_rate_limit;
753 v->local = vifc->vifc_lcl_addr.s_addr;
754 v->remote = vifc->vifc_rmt_addr.s_addr;
755 v->flags = vifc->vifc_flags;
1da177e4
LT
756 if (!mrtsock)
757 v->flags |= VIFF_STATIC;
c354e124 758 v->threshold = vifc->vifc_threshold;
1da177e4
LT
759 v->bytes_in = 0;
760 v->bytes_out = 0;
761 v->pkt_in = 0;
762 v->pkt_out = 0;
763 v->link = dev->ifindex;
764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765 v->link = dev->iflink;
766
767 /* And finish update writing critical data */
768 write_lock_bh(&mrt_lock);
c354e124 769 v->dev = dev;
1da177e4
LT
770#ifdef CONFIG_IP_PIMSM
771 if (v->flags&VIFF_REGISTER)
0c12295a 772 mrt->mroute_reg_vif_num = vifi;
1da177e4 773#endif
0c12295a
PM
774 if (vifi+1 > mrt->maxvif)
775 mrt->maxvif = vifi+1;
1da177e4
LT
776 write_unlock_bh(&mrt_lock);
777 return 0;
778}
779
0c12295a 780static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
781 __be32 origin,
782 __be32 mcastgrp)
1da177e4 783{
c354e124 784 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
785 struct mfc_cache *c;
786
0c12295a 787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
862465f2
PM
788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
789 return c;
1da177e4 790 }
862465f2 791 return NULL;
1da177e4
LT
792}
793
794/*
795 * Allocate a multicast cache entry
796 */
d658f8a0 797static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 798{
c354e124
JK
799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
800 if (c == NULL)
1da177e4 801 return NULL;
1da177e4
LT
802 c->mfc_un.res.minvif = MAXVIFS;
803 return c;
804}
805
d658f8a0 806static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 807{
c354e124
JK
808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
809 if (c == NULL)
1da177e4 810 return NULL;
1da177e4
LT
811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
812 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 return c;
814}
815
816/*
817 * A cache entry has gone into a resolved state from queued
818 */
e905a9ed 819
0c12295a
PM
820static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
822{
823 struct sk_buff *skb;
9ef1d4c7 824 struct nlmsgerr *e;
1da177e4
LT
825
826 /*
827 * Play the pending entries through our router
828 */
829
c354e124 830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 831 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
833
cb6a4e46 834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
836 (u8 *)nlh);
1da177e4
LT
837 } else {
838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
841 e = NLMSG_DATA(nlh);
842 e->error = -EMSGSIZE;
843 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 844 }
2942e900 845
d658f8a0 846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 847 } else
0c12295a 848 ip_mr_forward(net, mrt, skb, c, 0);
1da177e4
LT
849 }
850}
851
852/*
853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854 * expects the following bizarre scheme.
855 *
856 * Called under mrt_lock.
857 */
e905a9ed 858
0c12295a 859static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 860 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
861{
862 struct sk_buff *skb;
c9bdd4b5 863 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
864 struct igmphdr *igmp;
865 struct igmpmsg *msg;
866 int ret;
867
868#ifdef CONFIG_IP_PIMSM
869 if (assert == IGMPMSG_WHOLEPKT)
870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
871 else
872#endif
873 skb = alloc_skb(128, GFP_ATOMIC);
874
132adf54 875 if (!skb)
1da177e4
LT
876 return -ENOBUFS;
877
878#ifdef CONFIG_IP_PIMSM
879 if (assert == IGMPMSG_WHOLEPKT) {
880 /* Ugly, but we have no choice with this interface.
881 Duplicate old header, fix ihl, length etc.
882 And all this only to mangle msg->im_msgtype and
883 to set msg->im_mbz to "mbz" :-)
884 */
878c8145
ACM
885 skb_push(skb, sizeof(struct iphdr));
886 skb_reset_network_header(skb);
badff6d0 887 skb_reset_transport_header(skb);
0272ffc4 888 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
891 msg->im_mbz = 0;
0c12295a 892 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895 sizeof(struct iphdr));
e905a9ed 896 } else
1da177e4 897#endif
e905a9ed
YH
898 {
899
1da177e4
LT
900 /*
901 * Copy the IP header
902 */
903
27a884dc 904 skb->network_header = skb->tail;
ddc7b8e3 905 skb_put(skb, ihl);
27d7ff46 906 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
908 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 909 msg->im_vif = vifi;
adf30907 910 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
911
912 /*
913 * Add our header
914 */
915
c354e124 916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
917 igmp->type =
918 msg->im_msgtype = assert;
919 igmp->code = 0;
eddc9ec5 920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 921 skb->transport_header = skb->network_header;
e905a9ed 922 }
1da177e4 923
0c12295a 924 if (mrt->mroute_sk == NULL) {
1da177e4
LT
925 kfree_skb(skb);
926 return -EINVAL;
927 }
928
929 /*
930 * Deliver to mrouted
931 */
0c12295a 932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
70a269e6 933 if (ret < 0) {
1da177e4
LT
934 if (net_ratelimit())
935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
936 kfree_skb(skb);
937 }
938
939 return ret;
940}
941
942/*
943 * Queue a packet for resolution. It gets locked cache entry!
944 */
e905a9ed 945
1da177e4 946static int
0c12295a 947ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1da177e4 948{
862465f2 949 bool found = false;
1da177e4
LT
950 int err;
951 struct mfc_cache *c;
eddc9ec5 952 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
953
954 spin_lock_bh(&mfc_unres_lock);
0c12295a 955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 956 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
957 c->mfc_origin == iph->saddr) {
958 found = true;
1da177e4 959 break;
862465f2 960 }
1da177e4
LT
961 }
962
862465f2 963 if (!found) {
1da177e4
LT
964 /*
965 * Create a new entry if allowable
966 */
967
0c12295a 968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 969 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
970 spin_unlock_bh(&mfc_unres_lock);
971
972 kfree_skb(skb);
973 return -ENOBUFS;
974 }
975
976 /*
977 * Fill in the new cache entry
978 */
eddc9ec5
ACM
979 c->mfc_parent = -1;
980 c->mfc_origin = iph->saddr;
981 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
982
983 /*
984 * Reflect first query at mrouted.
985 */
0c12295a 986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 987 if (err < 0) {
e905a9ed 988 /* If the report failed throw the cache entry
1da177e4
LT
989 out - Brad Parker
990 */
991 spin_unlock_bh(&mfc_unres_lock);
992
5c0a66f5 993 ipmr_cache_free(c);
1da177e4
LT
994 kfree_skb(skb);
995 return err;
996 }
997
0c12295a
PM
998 atomic_inc(&mrt->cache_resolve_queue_len);
999 list_add(&c->list, &mrt->mfc_unres_queue);
1da177e4 1000
278554bd
DM
1001 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1002 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
1003 }
1004
1005 /*
1006 * See if we can append the packet
1007 */
1008 if (c->mfc_un.unres.unresolved.qlen>3) {
1009 kfree_skb(skb);
1010 err = -ENOBUFS;
1011 } else {
c354e124 1012 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
1013 err = 0;
1014 }
1015
1016 spin_unlock_bh(&mfc_unres_lock);
1017 return err;
1018}
1019
1020/*
1021 * MFC cache manipulation by user space mroute daemon
1022 */
1023
0c12295a 1024static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1da177e4
LT
1025{
1026 int line;
862465f2 1027 struct mfc_cache *c, *next;
1da177e4 1028
c354e124 1029 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1030
0c12295a 1031 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1da177e4
LT
1032 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1033 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1034 write_lock_bh(&mrt_lock);
862465f2 1035 list_del(&c->list);
1da177e4
LT
1036 write_unlock_bh(&mrt_lock);
1037
5c0a66f5 1038 ipmr_cache_free(c);
1da177e4
LT
1039 return 0;
1040 }
1041 }
1042 return -ENOENT;
1043}
1044
0c12295a
PM
1045static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1046 struct mfcctl *mfc, int mrtsock)
1da177e4 1047{
862465f2 1048 bool found = false;
1da177e4 1049 int line;
862465f2 1050 struct mfc_cache *uc, *c;
1da177e4 1051
a50436f2
PM
1052 if (mfc->mfcc_parent >= MAXVIFS)
1053 return -ENFILE;
1054
c354e124 1055 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1056
0c12295a 1057 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1da177e4 1058 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
1059 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1060 found = true;
1da177e4 1061 break;
862465f2 1062 }
1da177e4
LT
1063 }
1064
862465f2 1065 if (found) {
1da177e4
LT
1066 write_lock_bh(&mrt_lock);
1067 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1068 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1069 if (!mrtsock)
1070 c->mfc_flags |= MFC_STATIC;
1071 write_unlock_bh(&mrt_lock);
1072 return 0;
1073 }
1074
f97c1e0c 1075 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
1076 return -EINVAL;
1077
d658f8a0 1078 c = ipmr_cache_alloc();
c354e124 1079 if (c == NULL)
1da177e4
LT
1080 return -ENOMEM;
1081
c354e124
JK
1082 c->mfc_origin = mfc->mfcc_origin.s_addr;
1083 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1084 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1085 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1086 if (!mrtsock)
1087 c->mfc_flags |= MFC_STATIC;
1088
1089 write_lock_bh(&mrt_lock);
0c12295a 1090 list_add(&c->list, &mrt->mfc_cache_array[line]);
1da177e4
LT
1091 write_unlock_bh(&mrt_lock);
1092
1093 /*
1094 * Check to see if we resolved a queued list. If so we
1095 * need to send on the frames and tidy up.
1096 */
b0ebb739 1097 found = false;
1da177e4 1098 spin_lock_bh(&mfc_unres_lock);
0c12295a 1099 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 1100 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 1101 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 1102 list_del(&uc->list);
0c12295a 1103 atomic_dec(&mrt->cache_resolve_queue_len);
b0ebb739 1104 found = true;
1da177e4
LT
1105 break;
1106 }
1107 }
0c12295a
PM
1108 if (list_empty(&mrt->mfc_unres_queue))
1109 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
1110 spin_unlock_bh(&mfc_unres_lock);
1111
b0ebb739 1112 if (found) {
0c12295a 1113 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 1114 ipmr_cache_free(uc);
1da177e4
LT
1115 }
1116 return 0;
1117}
1118
1119/*
1120 * Close the multicast socket, and clear the vif tables etc
1121 */
e905a9ed 1122
0c12295a 1123static void mroute_clean_tables(struct mr_table *mrt)
1da177e4
LT
1124{
1125 int i;
d17fa6fa 1126 LIST_HEAD(list);
862465f2 1127 struct mfc_cache *c, *next;
e905a9ed 1128
1da177e4
LT
1129 /*
1130 * Shut down all active vif entries
1131 */
0c12295a
PM
1132 for (i = 0; i < mrt->maxvif; i++) {
1133 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1134 vif_delete(mrt, i, 0, &list);
1da177e4 1135 }
d17fa6fa 1136 unregister_netdevice_many(&list);
1da177e4
LT
1137
1138 /*
1139 * Wipe the cache
1140 */
862465f2 1141 for (i = 0; i < MFC_LINES; i++) {
0c12295a 1142 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
862465f2 1143 if (c->mfc_flags&MFC_STATIC)
1da177e4 1144 continue;
1da177e4 1145 write_lock_bh(&mrt_lock);
862465f2 1146 list_del(&c->list);
1da177e4
LT
1147 write_unlock_bh(&mrt_lock);
1148
5c0a66f5 1149 ipmr_cache_free(c);
1da177e4
LT
1150 }
1151 }
1152
0c12295a 1153 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 1154 spin_lock_bh(&mfc_unres_lock);
0c12295a 1155 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
862465f2 1156 list_del(&c->list);
0c12295a 1157 ipmr_destroy_unres(mrt, c);
1da177e4
LT
1158 }
1159 spin_unlock_bh(&mfc_unres_lock);
1160 }
1161}
1162
1163static void mrtsock_destruct(struct sock *sk)
1164{
4feb88e5 1165 struct net *net = sock_net(sk);
f0ad0860 1166 struct mr_table *mrt;
4feb88e5 1167
1da177e4 1168 rtnl_lock();
f0ad0860
PM
1169 ipmr_for_each_table(mrt, net) {
1170 if (sk == mrt->mroute_sk) {
1171 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4 1172
f0ad0860
PM
1173 write_lock_bh(&mrt_lock);
1174 mrt->mroute_sk = NULL;
1175 write_unlock_bh(&mrt_lock);
1da177e4 1176
f0ad0860
PM
1177 mroute_clean_tables(mrt);
1178 }
1da177e4
LT
1179 }
1180 rtnl_unlock();
1181}
1182
1183/*
1184 * Socket options and virtual interface manipulation. The whole
1185 * virtual interface system is a complete heap, but unfortunately
1186 * that's how BSD mrouted happens to think. Maybe one day with a proper
1187 * MOSPF/PIM router set up we can clean this up.
1188 */
e905a9ed 1189
b7058842 1190int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1191{
1192 int ret;
1193 struct vifctl vif;
1194 struct mfcctl mfc;
4feb88e5 1195 struct net *net = sock_net(sk);
f0ad0860
PM
1196 struct mr_table *mrt;
1197
1198 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1199 if (mrt == NULL)
1200 return -ENOENT;
e905a9ed 1201
132adf54 1202 if (optname != MRT_INIT) {
0c12295a 1203 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
1204 return -EACCES;
1205 }
1206
132adf54
SH
1207 switch (optname) {
1208 case MRT_INIT:
1209 if (sk->sk_type != SOCK_RAW ||
c720c7e8 1210 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 1211 return -EOPNOTSUPP;
c354e124 1212 if (optlen != sizeof(int))
132adf54 1213 return -ENOPROTOOPT;
1da177e4 1214
132adf54 1215 rtnl_lock();
0c12295a 1216 if (mrt->mroute_sk) {
1da177e4 1217 rtnl_unlock();
132adf54
SH
1218 return -EADDRINUSE;
1219 }
1220
1221 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1222 if (ret == 0) {
1223 write_lock_bh(&mrt_lock);
0c12295a 1224 mrt->mroute_sk = sk;
132adf54
SH
1225 write_unlock_bh(&mrt_lock);
1226
4feb88e5 1227 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
1228 }
1229 rtnl_unlock();
1230 return ret;
1231 case MRT_DONE:
0c12295a 1232 if (sk != mrt->mroute_sk)
132adf54
SH
1233 return -EACCES;
1234 return ip_ra_control(sk, 0, NULL);
1235 case MRT_ADD_VIF:
1236 case MRT_DEL_VIF:
c354e124 1237 if (optlen != sizeof(vif))
132adf54 1238 return -EINVAL;
c354e124 1239 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
1240 return -EFAULT;
1241 if (vif.vifc_vifi >= MAXVIFS)
1242 return -ENFILE;
1243 rtnl_lock();
c354e124 1244 if (optname == MRT_ADD_VIF) {
0c12295a 1245 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
132adf54 1246 } else {
0c12295a 1247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54
SH
1248 }
1249 rtnl_unlock();
1250 return ret;
1da177e4
LT
1251
1252 /*
1253 * Manipulate the forwarding caches. These live
1254 * in a sort of kernel/user symbiosis.
1255 */
132adf54
SH
1256 case MRT_ADD_MFC:
1257 case MRT_DEL_MFC:
c354e124 1258 if (optlen != sizeof(mfc))
132adf54 1259 return -EINVAL;
c354e124 1260 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1261 return -EFAULT;
1262 rtnl_lock();
c354e124 1263 if (optname == MRT_DEL_MFC)
0c12295a 1264 ret = ipmr_mfc_delete(mrt, &mfc);
132adf54 1265 else
0c12295a 1266 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
132adf54
SH
1267 rtnl_unlock();
1268 return ret;
1da177e4
LT
1269 /*
1270 * Control PIM assert.
1271 */
132adf54
SH
1272 case MRT_ASSERT:
1273 {
1274 int v;
1275 if (get_user(v,(int __user *)optval))
1276 return -EFAULT;
0c12295a 1277 mrt->mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1278 return 0;
1279 }
1da177e4 1280#ifdef CONFIG_IP_PIMSM
132adf54
SH
1281 case MRT_PIM:
1282 {
ba93ef74
SH
1283 int v;
1284
132adf54
SH
1285 if (get_user(v,(int __user *)optval))
1286 return -EFAULT;
ba93ef74
SH
1287 v = (v) ? 1 : 0;
1288
132adf54
SH
1289 rtnl_lock();
1290 ret = 0;
0c12295a
PM
1291 if (v != mrt->mroute_do_pim) {
1292 mrt->mroute_do_pim = v;
1293 mrt->mroute_do_assert = v;
1da177e4 1294 }
132adf54
SH
1295 rtnl_unlock();
1296 return ret;
1297 }
f0ad0860
PM
1298#endif
1299#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1300 case MRT_TABLE:
1301 {
1302 u32 v;
1303
1304 if (optlen != sizeof(u32))
1305 return -EINVAL;
1306 if (get_user(v, (u32 __user *)optval))
1307 return -EFAULT;
1308 if (sk == mrt->mroute_sk)
1309 return -EBUSY;
1310
1311 rtnl_lock();
1312 ret = 0;
1313 if (!ipmr_new_table(net, v))
1314 ret = -ENOMEM;
1315 raw_sk(sk)->ipmr_table = v;
1316 rtnl_unlock();
1317 return ret;
1318 }
1da177e4 1319#endif
132adf54
SH
1320 /*
1321 * Spurious command, or MRT_VERSION which you cannot
1322 * set.
1323 */
1324 default:
1325 return -ENOPROTOOPT;
1da177e4
LT
1326 }
1327}
1328
1329/*
1330 * Getsock opt support for the multicast routing system.
1331 */
e905a9ed 1332
c354e124 1333int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1334{
1335 int olr;
1336 int val;
4feb88e5 1337 struct net *net = sock_net(sk);
f0ad0860
PM
1338 struct mr_table *mrt;
1339
1340 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1341 if (mrt == NULL)
1342 return -ENOENT;
1da177e4 1343
c354e124 1344 if (optname != MRT_VERSION &&
1da177e4
LT
1345#ifdef CONFIG_IP_PIMSM
1346 optname!=MRT_PIM &&
1347#endif
1348 optname!=MRT_ASSERT)
1349 return -ENOPROTOOPT;
1350
1351 if (get_user(olr, optlen))
1352 return -EFAULT;
1353
1354 olr = min_t(unsigned int, olr, sizeof(int));
1355 if (olr < 0)
1356 return -EINVAL;
e905a9ed 1357
c354e124 1358 if (put_user(olr, optlen))
1da177e4 1359 return -EFAULT;
c354e124
JK
1360 if (optname == MRT_VERSION)
1361 val = 0x0305;
1da177e4 1362#ifdef CONFIG_IP_PIMSM
c354e124 1363 else if (optname == MRT_PIM)
0c12295a 1364 val = mrt->mroute_do_pim;
1da177e4
LT
1365#endif
1366 else
0c12295a 1367 val = mrt->mroute_do_assert;
c354e124 1368 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1369 return -EFAULT;
1370 return 0;
1371}
1372
1373/*
1374 * The IP multicast ioctl support routines.
1375 */
e905a9ed 1376
1da177e4
LT
1377int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1378{
1379 struct sioc_sg_req sr;
1380 struct sioc_vif_req vr;
1381 struct vif_device *vif;
1382 struct mfc_cache *c;
4feb88e5 1383 struct net *net = sock_net(sk);
f0ad0860
PM
1384 struct mr_table *mrt;
1385
1386 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1387 if (mrt == NULL)
1388 return -ENOENT;
e905a9ed 1389
132adf54
SH
1390 switch (cmd) {
1391 case SIOCGETVIFCNT:
c354e124 1392 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1393 return -EFAULT;
0c12295a 1394 if (vr.vifi >= mrt->maxvif)
132adf54
SH
1395 return -EINVAL;
1396 read_lock(&mrt_lock);
0c12295a
PM
1397 vif = &mrt->vif_table[vr.vifi];
1398 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1399 vr.icount = vif->pkt_in;
1400 vr.ocount = vif->pkt_out;
1401 vr.ibytes = vif->bytes_in;
1402 vr.obytes = vif->bytes_out;
1da177e4 1403 read_unlock(&mrt_lock);
1da177e4 1404
c354e124 1405 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1406 return -EFAULT;
1407 return 0;
1408 }
1409 read_unlock(&mrt_lock);
1410 return -EADDRNOTAVAIL;
1411 case SIOCGETSGCNT:
c354e124 1412 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1413 return -EFAULT;
1414
1415 read_lock(&mrt_lock);
0c12295a 1416 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1417 if (c) {
1418 sr.pktcnt = c->mfc_un.res.pkt;
1419 sr.bytecnt = c->mfc_un.res.bytes;
1420 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1421 read_unlock(&mrt_lock);
132adf54 1422
c354e124 1423 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1424 return -EFAULT;
1425 return 0;
1426 }
1427 read_unlock(&mrt_lock);
1428 return -EADDRNOTAVAIL;
1429 default:
1430 return -ENOIOCTLCMD;
1da177e4
LT
1431 }
1432}
1433
1434
1435static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1436{
e9dc8653 1437 struct net_device *dev = ptr;
4feb88e5 1438 struct net *net = dev_net(dev);
f0ad0860 1439 struct mr_table *mrt;
1da177e4
LT
1440 struct vif_device *v;
1441 int ct;
d17fa6fa 1442 LIST_HEAD(list);
e9dc8653 1443
1da177e4
LT
1444 if (event != NETDEV_UNREGISTER)
1445 return NOTIFY_DONE;
f0ad0860
PM
1446
1447 ipmr_for_each_table(mrt, net) {
1448 v = &mrt->vif_table[0];
1449 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1450 if (v->dev == dev)
1451 vif_delete(mrt, ct, 1, &list);
1452 }
1da177e4 1453 }
d17fa6fa 1454 unregister_netdevice_many(&list);
1da177e4
LT
1455 return NOTIFY_DONE;
1456}
1457
1458
c354e124 1459static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1460 .notifier_call = ipmr_device_event,
1461};
1462
1463/*
1464 * Encapsulate a packet by attaching a valid IPIP header to it.
1465 * This avoids tunnel drivers and other mess and gives us the speed so
1466 * important for multicast video.
1467 */
e905a9ed 1468
114c7844 1469static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1470{
8856dfa3 1471 struct iphdr *iph;
eddc9ec5 1472 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1473
1474 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1475 skb->transport_header = skb->network_header;
8856dfa3 1476 skb_reset_network_header(skb);
eddc9ec5 1477 iph = ip_hdr(skb);
1da177e4
LT
1478
1479 iph->version = 4;
e023dd64
ACM
1480 iph->tos = old_iph->tos;
1481 iph->ttl = old_iph->ttl;
1da177e4
LT
1482 iph->frag_off = 0;
1483 iph->daddr = daddr;
1484 iph->saddr = saddr;
1485 iph->protocol = IPPROTO_IPIP;
1486 iph->ihl = 5;
1487 iph->tot_len = htons(skb->len);
adf30907 1488 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1489 ip_send_check(iph);
1490
1da177e4
LT
1491 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1492 nf_reset(skb);
1493}
1494
1495static inline int ipmr_forward_finish(struct sk_buff *skb)
1496{
1497 struct ip_options * opt = &(IPCB(skb)->opt);
1498
adf30907 1499 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1500
1501 if (unlikely(opt->optlen))
1502 ip_forward_options(skb);
1503
1504 return dst_output(skb);
1505}
1506
1507/*
1508 * Processing handlers for ipmr_forward
1509 */
1510
0c12295a
PM
1511static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1512 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1da177e4 1513{
eddc9ec5 1514 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1515 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1516 struct net_device *dev;
1517 struct rtable *rt;
1518 int encap = 0;
1519
1520 if (vif->dev == NULL)
1521 goto out_free;
1522
1523#ifdef CONFIG_IP_PIMSM
1524 if (vif->flags & VIFF_REGISTER) {
1525 vif->pkt_out++;
c354e124 1526 vif->bytes_out += skb->len;
cf3677ae
PE
1527 vif->dev->stats.tx_bytes += skb->len;
1528 vif->dev->stats.tx_packets++;
0c12295a 1529 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1530 goto out_free;
1da177e4
LT
1531 }
1532#endif
1533
1534 if (vif->flags&VIFF_TUNNEL) {
1535 struct flowi fl = { .oif = vif->link,
1536 .nl_u = { .ip4_u =
1537 { .daddr = vif->remote,
1538 .saddr = vif->local,
1539 .tos = RT_TOS(iph->tos) } },
1540 .proto = IPPROTO_IPIP };
4feb88e5 1541 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1542 goto out_free;
1543 encap = sizeof(struct iphdr);
1544 } else {
1545 struct flowi fl = { .oif = vif->link,
1546 .nl_u = { .ip4_u =
1547 { .daddr = iph->daddr,
1548 .tos = RT_TOS(iph->tos) } },
1549 .proto = IPPROTO_IPIP };
4feb88e5 1550 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1551 goto out_free;
1552 }
1553
1554 dev = rt->u.dst.dev;
1555
1556 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1557 /* Do not fragment multicasts. Alas, IPv4 does not
1558 allow to send ICMP, so that packets will disappear
1559 to blackhole.
1560 */
1561
7c73a6fa 1562 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1563 ip_rt_put(rt);
1564 goto out_free;
1565 }
1566
1567 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1568
1569 if (skb_cow(skb, encap)) {
e905a9ed 1570 ip_rt_put(rt);
1da177e4
LT
1571 goto out_free;
1572 }
1573
1574 vif->pkt_out++;
c354e124 1575 vif->bytes_out += skb->len;
1da177e4 1576
adf30907
ED
1577 skb_dst_drop(skb);
1578 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1579 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1580
1581 /* FIXME: forward and output firewalls used to be called here.
1582 * What do we do with netfilter? -- RR */
1583 if (vif->flags & VIFF_TUNNEL) {
1584 ip_encap(skb, vif->local, vif->remote);
1585 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1586 vif->dev->stats.tx_packets++;
1587 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1588 }
1589
1590 IPCB(skb)->flags |= IPSKB_FORWARDED;
1591
1592 /*
1593 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1594 * not only before forwarding, but after forwarding on all output
1595 * interfaces. It is clear, if mrouter runs a multicasting
1596 * program, it should receive packets not depending to what interface
1597 * program is joined.
1598 * If we will not make it, the program will have to join on all
1599 * interfaces. On the other hand, multihoming host (or router, but
1600 * not mrouter) cannot join to more than one interface - it will
1601 * result in receiving multiple packets.
1602 */
9bbc768a 1603 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1604 ipmr_forward_finish);
1605 return;
1606
1607out_free:
1608 kfree_skb(skb);
1da177e4
LT
1609}
1610
0c12295a 1611static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1612{
1613 int ct;
0c12295a
PM
1614
1615 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1616 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
1617 break;
1618 }
1619 return ct;
1620}
1621
1622/* "local" means that we should preserve one skb (for local delivery) */
1623
0c12295a
PM
1624static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1625 struct sk_buff *skb, struct mfc_cache *cache,
1626 int local)
1da177e4
LT
1627{
1628 int psend = -1;
1629 int vif, ct;
1630
1631 vif = cache->mfc_parent;
1632 cache->mfc_un.res.pkt++;
1633 cache->mfc_un.res.bytes += skb->len;
1634
1635 /*
1636 * Wrong interface: drop packet and (maybe) send PIM assert.
1637 */
0c12295a 1638 if (mrt->vif_table[vif].dev != skb->dev) {
1da177e4
LT
1639 int true_vifi;
1640
511c3f92 1641 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1642 /* It is our own packet, looped back.
1643 Very complicated situation...
1644
1645 The best workaround until routing daemons will be
1646 fixed is not to redistribute packet, if it was
1647 send through wrong interface. It means, that
1648 multicast applications WILL NOT work for
1649 (S,G), which have default multicast route pointing
1650 to wrong oif. In any case, it is not a good
1651 idea to use multicasting applications on router.
1652 */
1653 goto dont_forward;
1654 }
1655
1656 cache->mfc_un.res.wrong_if++;
0c12295a 1657 true_vifi = ipmr_find_vif(mrt, skb->dev);
1da177e4 1658
0c12295a 1659 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4
LT
1660 /* pimsm uses asserts, when switching from RPT to SPT,
1661 so that we cannot check that packet arrived on an oif.
1662 It is bad, but otherwise we would need to move pretty
1663 large chunk of pimd to kernel. Ough... --ANK
1664 */
0c12295a 1665 (mrt->mroute_do_pim ||
6f9374a9 1666 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1667 time_after(jiffies,
1da177e4
LT
1668 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1669 cache->mfc_un.res.last_assert = jiffies;
0c12295a 1670 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1671 }
1672 goto dont_forward;
1673 }
1674
0c12295a
PM
1675 mrt->vif_table[vif].pkt_in++;
1676 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1677
1678 /*
1679 * Forward the frame
1680 */
1681 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1682 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1683 if (psend != -1) {
1684 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1685 if (skb2)
0c12295a
PM
1686 ipmr_queue_xmit(net, mrt, skb2, cache,
1687 psend);
1da177e4 1688 }
c354e124 1689 psend = ct;
1da177e4
LT
1690 }
1691 }
1692 if (psend != -1) {
1693 if (local) {
1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1695 if (skb2)
0c12295a 1696 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1da177e4 1697 } else {
0c12295a 1698 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1da177e4
LT
1699 return 0;
1700 }
1701 }
1702
1703dont_forward:
1704 if (!local)
1705 kfree_skb(skb);
1706 return 0;
1707}
1708
1709
1710/*
1711 * Multicast packets for forwarding arrive here
1712 */
1713
1714int ip_mr_input(struct sk_buff *skb)
1715{
1716 struct mfc_cache *cache;
4feb88e5 1717 struct net *net = dev_net(skb->dev);
511c3f92 1718 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
f0ad0860
PM
1719 struct mr_table *mrt;
1720 int err;
1da177e4
LT
1721
1722 /* Packet is looped back after forward, it should not be
1723 forwarded second time, but still can be delivered locally.
1724 */
1725 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1726 goto dont_forward;
1727
f0ad0860
PM
1728 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729 if (err < 0)
1730 return err;
1731
1da177e4
LT
1732 if (!local) {
1733 if (IPCB(skb)->opt.router_alert) {
1734 if (ip_call_ra_chain(skb))
1735 return 0;
eddc9ec5 1736 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1737 /* IGMPv1 (and broken IGMPv2 implementations sort of
1738 Cisco IOS <= 11.2(8)) do not put router alert
1739 option to IGMP packets destined to routable
1740 groups. It is very bad, because it means
1741 that we can forward NO IGMP messages.
1742 */
1743 read_lock(&mrt_lock);
0c12295a 1744 if (mrt->mroute_sk) {
2715bcf9 1745 nf_reset(skb);
0c12295a 1746 raw_rcv(mrt->mroute_sk, skb);
1da177e4
LT
1747 read_unlock(&mrt_lock);
1748 return 0;
1749 }
1750 read_unlock(&mrt_lock);
1751 }
1752 }
1753
1754 read_lock(&mrt_lock);
0c12295a 1755 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1756
1757 /*
1758 * No usable cache entry
1759 */
c354e124 1760 if (cache == NULL) {
1da177e4
LT
1761 int vif;
1762
1763 if (local) {
1764 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1765 ip_local_deliver(skb);
1766 if (skb2 == NULL) {
1767 read_unlock(&mrt_lock);
1768 return -ENOBUFS;
1769 }
1770 skb = skb2;
1771 }
1772
0c12295a 1773 vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 1774 if (vif >= 0) {
0eae88f3 1775 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1da177e4
LT
1776 read_unlock(&mrt_lock);
1777
0eae88f3 1778 return err2;
1da177e4
LT
1779 }
1780 read_unlock(&mrt_lock);
1781 kfree_skb(skb);
1782 return -ENODEV;
1783 }
1784
0c12295a 1785 ip_mr_forward(net, mrt, skb, cache, local);
1da177e4
LT
1786
1787 read_unlock(&mrt_lock);
1788
1789 if (local)
1790 return ip_local_deliver(skb);
1791
1792 return 0;
1793
1794dont_forward:
1795 if (local)
1796 return ip_local_deliver(skb);
1797 kfree_skb(skb);
1798 return 0;
1799}
1800
b1879204 1801#ifdef CONFIG_IP_PIMSM
f0ad0860
PM
1802static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1803 unsigned int pimlen)
1da177e4 1804{
b1879204
IJ
1805 struct net_device *reg_dev = NULL;
1806 struct iphdr *encap;
1da177e4 1807
b1879204 1808 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1809 /*
1810 Check that:
1811 a. packet is really destinted to a multicast group
1812 b. packet is not a NULL-REGISTER
1813 c. packet is not truncated
1814 */
f97c1e0c 1815 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1816 encap->tot_len == 0 ||
b1879204
IJ
1817 ntohs(encap->tot_len) + pimlen > skb->len)
1818 return 1;
1da177e4
LT
1819
1820 read_lock(&mrt_lock);
0c12295a
PM
1821 if (mrt->mroute_reg_vif_num >= 0)
1822 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1da177e4
LT
1823 if (reg_dev)
1824 dev_hold(reg_dev);
1825 read_unlock(&mrt_lock);
1826
e905a9ed 1827 if (reg_dev == NULL)
b1879204 1828 return 1;
1da177e4 1829
b0e380b1 1830 skb->mac_header = skb->network_header;
1da177e4 1831 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1832 skb_reset_network_header(skb);
1da177e4
LT
1833 skb->protocol = htons(ETH_P_IP);
1834 skb->ip_summed = 0;
1835 skb->pkt_type = PACKET_HOST;
d19d56dd
ED
1836
1837 skb_tunnel_rx(skb, reg_dev);
1838
1da177e4
LT
1839 netif_rx(skb);
1840 dev_put(reg_dev);
b1879204 1841
1da177e4 1842 return 0;
b1879204
IJ
1843}
1844#endif
1845
1846#ifdef CONFIG_IP_PIMSM_V1
1847/*
1848 * Handle IGMP messages of PIMv1
1849 */
1850
1851int pim_rcv_v1(struct sk_buff * skb)
1852{
1853 struct igmphdr *pim;
4feb88e5 1854 struct net *net = dev_net(skb->dev);
f0ad0860 1855 struct mr_table *mrt;
b1879204
IJ
1856
1857 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1858 goto drop;
1859
1860 pim = igmp_hdr(skb);
1861
f0ad0860
PM
1862 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1863 goto drop;
1864
0c12295a 1865 if (!mrt->mroute_do_pim ||
b1879204
IJ
1866 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1867 goto drop;
1868
f0ad0860 1869 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1870drop:
1871 kfree_skb(skb);
1872 }
1da177e4
LT
1873 return 0;
1874}
1875#endif
1876
1877#ifdef CONFIG_IP_PIMSM_V2
1878static int pim_rcv(struct sk_buff * skb)
1879{
1880 struct pimreghdr *pim;
f0ad0860
PM
1881 struct net *net = dev_net(skb->dev);
1882 struct mr_table *mrt;
1da177e4 1883
b1879204 1884 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1885 goto drop;
1886
9c70220b 1887 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1888 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1889 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1890 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1891 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1892 goto drop;
1893
f0ad0860
PM
1894 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1895 goto drop;
1896
1897 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1898drop:
1899 kfree_skb(skb);
1900 }
1da177e4
LT
1901 return 0;
1902}
1903#endif
1904
cb6a4e46
PM
1905static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1906 struct mfc_cache *c, struct rtmsg *rtm)
1da177e4
LT
1907{
1908 int ct;
1909 struct rtnexthop *nhp;
27a884dc 1910 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1911 struct rtattr *mp_head;
1912
7438189b 1913 /* If cache is unresolved, don't try to parse IIF and OIF */
ed0f160a 1914 if (c->mfc_parent >= MAXVIFS)
7438189b
ND
1915 return -ENOENT;
1916
0c12295a
PM
1917 if (VIF_EXISTS(mrt, c->mfc_parent))
1918 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1919
c354e124 1920 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1921
1922 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 1923 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1924 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1925 goto rtattr_failure;
c354e124 1926 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1927 nhp->rtnh_flags = 0;
1928 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 1929 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
1930 nhp->rtnh_len = sizeof(*nhp);
1931 }
1932 }
1933 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1934 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1935 rtm->rtm_type = RTN_MULTICAST;
1936 return 1;
1937
1938rtattr_failure:
dc5fc579 1939 nlmsg_trim(skb, b);
1da177e4
LT
1940 return -EMSGSIZE;
1941}
1942
4feb88e5
BT
1943int ipmr_get_route(struct net *net,
1944 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1945{
1946 int err;
f0ad0860 1947 struct mr_table *mrt;
1da177e4 1948 struct mfc_cache *cache;
511c3f92 1949 struct rtable *rt = skb_rtable(skb);
1da177e4 1950
f0ad0860
PM
1951 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1952 if (mrt == NULL)
1953 return -ENOENT;
1954
1da177e4 1955 read_lock(&mrt_lock);
0c12295a 1956 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1da177e4 1957
c354e124 1958 if (cache == NULL) {
72287490 1959 struct sk_buff *skb2;
eddc9ec5 1960 struct iphdr *iph;
1da177e4
LT
1961 struct net_device *dev;
1962 int vif;
1963
1964 if (nowait) {
1965 read_unlock(&mrt_lock);
1966 return -EAGAIN;
1967 }
1968
1969 dev = skb->dev;
0c12295a 1970 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1da177e4
LT
1971 read_unlock(&mrt_lock);
1972 return -ENODEV;
1973 }
72287490
AK
1974 skb2 = skb_clone(skb, GFP_ATOMIC);
1975 if (!skb2) {
1976 read_unlock(&mrt_lock);
1977 return -ENOMEM;
1978 }
1979
e2d1bca7
ACM
1980 skb_push(skb2, sizeof(struct iphdr));
1981 skb_reset_network_header(skb2);
eddc9ec5
ACM
1982 iph = ip_hdr(skb2);
1983 iph->ihl = sizeof(struct iphdr) >> 2;
1984 iph->saddr = rt->rt_src;
1985 iph->daddr = rt->rt_dst;
1986 iph->version = 0;
0c12295a 1987 err = ipmr_cache_unresolved(mrt, vif, skb2);
1da177e4
LT
1988 read_unlock(&mrt_lock);
1989 return err;
1990 }
1991
1992 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1993 cache->mfc_flags |= MFC_NOTIFY;
cb6a4e46 1994 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4
LT
1995 read_unlock(&mrt_lock);
1996 return err;
1997}
1998
cb6a4e46
PM
1999static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2000 u32 pid, u32 seq, struct mfc_cache *c)
2001{
2002 struct nlmsghdr *nlh;
2003 struct rtmsg *rtm;
2004
2005 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2006 if (nlh == NULL)
2007 return -EMSGSIZE;
2008
2009 rtm = nlmsg_data(nlh);
2010 rtm->rtm_family = RTNL_FAMILY_IPMR;
2011 rtm->rtm_dst_len = 32;
2012 rtm->rtm_src_len = 32;
2013 rtm->rtm_tos = 0;
2014 rtm->rtm_table = mrt->id;
2015 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2016 rtm->rtm_type = RTN_MULTICAST;
2017 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2018 rtm->rtm_protocol = RTPROT_UNSPEC;
2019 rtm->rtm_flags = 0;
2020
2021 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2022 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2023
2024 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2025 goto nla_put_failure;
2026
2027 return nlmsg_end(skb, nlh);
2028
2029nla_put_failure:
2030 nlmsg_cancel(skb, nlh);
2031 return -EMSGSIZE;
2032}
2033
2034static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2035{
2036 struct net *net = sock_net(skb->sk);
2037 struct mr_table *mrt;
2038 struct mfc_cache *mfc;
2039 unsigned int t = 0, s_t;
2040 unsigned int h = 0, s_h;
2041 unsigned int e = 0, s_e;
2042
2043 s_t = cb->args[0];
2044 s_h = cb->args[1];
2045 s_e = cb->args[2];
2046
2047 read_lock(&mrt_lock);
2048 ipmr_for_each_table(mrt, net) {
2049 if (t < s_t)
2050 goto next_table;
2051 if (t > s_t)
2052 s_h = 0;
2053 for (h = s_h; h < MFC_LINES; h++) {
2054 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2055 if (e < s_e)
2056 goto next_entry;
2057 if (ipmr_fill_mroute(mrt, skb,
2058 NETLINK_CB(cb->skb).pid,
2059 cb->nlh->nlmsg_seq,
2060 mfc) < 0)
2061 goto done;
2062next_entry:
2063 e++;
2064 }
2065 e = s_e = 0;
2066 }
2067 s_h = 0;
2068next_table:
2069 t++;
2070 }
2071done:
2072 read_unlock(&mrt_lock);
2073
2074 cb->args[2] = e;
2075 cb->args[1] = h;
2076 cb->args[0] = t;
2077
2078 return skb->len;
2079}
2080
e905a9ed 2081#ifdef CONFIG_PROC_FS
1da177e4
LT
2082/*
2083 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2084 */
2085struct ipmr_vif_iter {
f6bb4514 2086 struct seq_net_private p;
f0ad0860 2087 struct mr_table *mrt;
1da177e4
LT
2088 int ct;
2089};
2090
f6bb4514
BT
2091static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2092 struct ipmr_vif_iter *iter,
1da177e4
LT
2093 loff_t pos)
2094{
f0ad0860 2095 struct mr_table *mrt = iter->mrt;
0c12295a
PM
2096
2097 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2098 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2099 continue;
e905a9ed 2100 if (pos-- == 0)
0c12295a 2101 return &mrt->vif_table[iter->ct];
1da177e4
LT
2102 }
2103 return NULL;
2104}
2105
2106static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 2107 __acquires(mrt_lock)
1da177e4 2108{
f0ad0860 2109 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2110 struct net *net = seq_file_net(seq);
f0ad0860
PM
2111 struct mr_table *mrt;
2112
2113 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2114 if (mrt == NULL)
2115 return ERR_PTR(-ENOENT);
2116
2117 iter->mrt = mrt;
f6bb4514 2118
1da177e4 2119 read_lock(&mrt_lock);
f6bb4514 2120 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2121 : SEQ_START_TOKEN;
2122}
2123
2124static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2125{
2126 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2127 struct net *net = seq_file_net(seq);
f0ad0860 2128 struct mr_table *mrt = iter->mrt;
1da177e4
LT
2129
2130 ++*pos;
2131 if (v == SEQ_START_TOKEN)
f6bb4514 2132 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 2133
0c12295a
PM
2134 while (++iter->ct < mrt->maxvif) {
2135 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2136 continue;
0c12295a 2137 return &mrt->vif_table[iter->ct];
1da177e4
LT
2138 }
2139 return NULL;
2140}
2141
2142static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 2143 __releases(mrt_lock)
1da177e4
LT
2144{
2145 read_unlock(&mrt_lock);
2146}
2147
2148static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2149{
f0ad0860
PM
2150 struct ipmr_vif_iter *iter = seq->private;
2151 struct mr_table *mrt = iter->mrt;
f6bb4514 2152
1da177e4 2153 if (v == SEQ_START_TOKEN) {
e905a9ed 2154 seq_puts(seq,
1da177e4
LT
2155 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2156 } else {
2157 const struct vif_device *vif = v;
2158 const char *name = vif->dev ? vif->dev->name : "none";
2159
2160 seq_printf(seq,
2161 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 2162 vif - mrt->vif_table,
e905a9ed 2163 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
2164 vif->bytes_out, vif->pkt_out,
2165 vif->flags, vif->local, vif->remote);
2166 }
2167 return 0;
2168}
2169
f690808e 2170static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
2171 .start = ipmr_vif_seq_start,
2172 .next = ipmr_vif_seq_next,
2173 .stop = ipmr_vif_seq_stop,
2174 .show = ipmr_vif_seq_show,
2175};
2176
2177static int ipmr_vif_open(struct inode *inode, struct file *file)
2178{
f6bb4514
BT
2179 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2180 sizeof(struct ipmr_vif_iter));
1da177e4
LT
2181}
2182
9a32144e 2183static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
2184 .owner = THIS_MODULE,
2185 .open = ipmr_vif_open,
2186 .read = seq_read,
2187 .llseek = seq_lseek,
f6bb4514 2188 .release = seq_release_net,
1da177e4
LT
2189};
2190
2191struct ipmr_mfc_iter {
f6bb4514 2192 struct seq_net_private p;
f0ad0860 2193 struct mr_table *mrt;
862465f2 2194 struct list_head *cache;
1da177e4
LT
2195 int ct;
2196};
2197
2198
f6bb4514
BT
2199static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2200 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 2201{
f0ad0860 2202 struct mr_table *mrt = it->mrt;
1da177e4
LT
2203 struct mfc_cache *mfc;
2204
1da177e4 2205 read_lock(&mrt_lock);
862465f2 2206 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
0c12295a 2207 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2 2208 list_for_each_entry(mfc, it->cache, list)
e905a9ed 2209 if (pos-- == 0)
1da177e4 2210 return mfc;
862465f2 2211 }
1da177e4
LT
2212 read_unlock(&mrt_lock);
2213
1da177e4 2214 spin_lock_bh(&mfc_unres_lock);
0c12295a 2215 it->cache = &mrt->mfc_unres_queue;
862465f2 2216 list_for_each_entry(mfc, it->cache, list)
e258beb2 2217 if (pos-- == 0)
1da177e4
LT
2218 return mfc;
2219 spin_unlock_bh(&mfc_unres_lock);
2220
2221 it->cache = NULL;
2222 return NULL;
2223}
2224
2225
2226static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2227{
2228 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2229 struct net *net = seq_file_net(seq);
f0ad0860 2230 struct mr_table *mrt;
f6bb4514 2231
f0ad0860
PM
2232 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2233 if (mrt == NULL)
2234 return ERR_PTR(-ENOENT);
f6bb4514 2235
f0ad0860 2236 it->mrt = mrt;
1da177e4
LT
2237 it->cache = NULL;
2238 it->ct = 0;
f6bb4514 2239 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2240 : SEQ_START_TOKEN;
2241}
2242
2243static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2244{
2245 struct mfc_cache *mfc = v;
2246 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2247 struct net *net = seq_file_net(seq);
f0ad0860 2248 struct mr_table *mrt = it->mrt;
1da177e4
LT
2249
2250 ++*pos;
2251
2252 if (v == SEQ_START_TOKEN)
f6bb4514 2253 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 2254
862465f2
PM
2255 if (mfc->list.next != it->cache)
2256 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 2257
0c12295a 2258 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
2259 goto end_of_list;
2260
0c12295a 2261 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1da177e4
LT
2262
2263 while (++it->ct < MFC_LINES) {
0c12295a 2264 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2
PM
2265 if (list_empty(it->cache))
2266 continue;
2267 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2268 }
2269
2270 /* exhausted cache_array, show unresolved */
2271 read_unlock(&mrt_lock);
0c12295a 2272 it->cache = &mrt->mfc_unres_queue;
1da177e4 2273 it->ct = 0;
e905a9ed 2274
1da177e4 2275 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
2276 if (!list_empty(it->cache))
2277 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2278
2279 end_of_list:
2280 spin_unlock_bh(&mfc_unres_lock);
2281 it->cache = NULL;
2282
2283 return NULL;
2284}
2285
2286static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2287{
2288 struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2289 struct mr_table *mrt = it->mrt;
1da177e4 2290
0c12295a 2291 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 2292 spin_unlock_bh(&mfc_unres_lock);
0c12295a 2293 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1da177e4
LT
2294 read_unlock(&mrt_lock);
2295}
2296
2297static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2298{
2299 int n;
2300
2301 if (v == SEQ_START_TOKEN) {
e905a9ed 2302 seq_puts(seq,
1da177e4
LT
2303 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2304 } else {
2305 const struct mfc_cache *mfc = v;
2306 const struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2307 const struct mr_table *mrt = it->mrt;
e905a9ed 2308
0eae88f3
ED
2309 seq_printf(seq, "%08X %08X %-3hd",
2310 (__force u32) mfc->mfc_mcastgrp,
2311 (__force u32) mfc->mfc_origin,
1ea472e2 2312 mfc->mfc_parent);
1da177e4 2313
0c12295a 2314 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
2315 seq_printf(seq, " %8lu %8lu %8lu",
2316 mfc->mfc_un.res.pkt,
2317 mfc->mfc_un.res.bytes,
2318 mfc->mfc_un.res.wrong_if);
132adf54
SH
2319 for (n = mfc->mfc_un.res.minvif;
2320 n < mfc->mfc_un.res.maxvif; n++ ) {
0c12295a 2321 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
2322 mfc->mfc_un.res.ttls[n] < 255)
2323 seq_printf(seq,
e905a9ed 2324 " %2d:%-3d",
1da177e4
LT
2325 n, mfc->mfc_un.res.ttls[n]);
2326 }
1ea472e2
BT
2327 } else {
2328 /* unresolved mfc_caches don't contain
2329 * pkt, bytes and wrong_if values
2330 */
2331 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
2332 }
2333 seq_putc(seq, '\n');
2334 }
2335 return 0;
2336}
2337
f690808e 2338static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
2339 .start = ipmr_mfc_seq_start,
2340 .next = ipmr_mfc_seq_next,
2341 .stop = ipmr_mfc_seq_stop,
2342 .show = ipmr_mfc_seq_show,
2343};
2344
2345static int ipmr_mfc_open(struct inode *inode, struct file *file)
2346{
f6bb4514
BT
2347 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2348 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
2349}
2350
9a32144e 2351static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
2352 .owner = THIS_MODULE,
2353 .open = ipmr_mfc_open,
2354 .read = seq_read,
2355 .llseek = seq_lseek,
f6bb4514 2356 .release = seq_release_net,
1da177e4 2357};
e905a9ed 2358#endif
1da177e4
LT
2359
2360#ifdef CONFIG_IP_PIMSM_V2
32613090 2361static const struct net_protocol pim_protocol = {
1da177e4 2362 .handler = pim_rcv,
403dbb97 2363 .netns_ok = 1,
1da177e4
LT
2364};
2365#endif
2366
2367
2368/*
2369 * Setup for IP multicast routing
2370 */
cf958ae3
BT
2371static int __net_init ipmr_net_init(struct net *net)
2372{
f0ad0860 2373 int err;
cf958ae3 2374
f0ad0860
PM
2375 err = ipmr_rules_init(net);
2376 if (err < 0)
cf958ae3 2377 goto fail;
f6bb4514
BT
2378
2379#ifdef CONFIG_PROC_FS
2380 err = -ENOMEM;
2381 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2382 goto proc_vif_fail;
2383 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2384 goto proc_cache_fail;
2385#endif
2bb8b26c
BT
2386 return 0;
2387
f6bb4514
BT
2388#ifdef CONFIG_PROC_FS
2389proc_cache_fail:
2390 proc_net_remove(net, "ip_mr_vif");
2391proc_vif_fail:
f0ad0860 2392 ipmr_rules_exit(net);
f6bb4514 2393#endif
cf958ae3
BT
2394fail:
2395 return err;
2396}
2397
2398static void __net_exit ipmr_net_exit(struct net *net)
2399{
f6bb4514
BT
2400#ifdef CONFIG_PROC_FS
2401 proc_net_remove(net, "ip_mr_cache");
2402 proc_net_remove(net, "ip_mr_vif");
2403#endif
f0ad0860 2404 ipmr_rules_exit(net);
cf958ae3
BT
2405}
2406
2407static struct pernet_operations ipmr_net_ops = {
2408 .init = ipmr_net_init,
2409 .exit = ipmr_net_exit,
2410};
e905a9ed 2411
03d2f897 2412int __init ip_mr_init(void)
1da177e4 2413{
03d2f897
WC
2414 int err;
2415
1da177e4
LT
2416 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2417 sizeof(struct mfc_cache),
e5d679f3 2418 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2419 NULL);
03d2f897
WC
2420 if (!mrt_cachep)
2421 return -ENOMEM;
2422
cf958ae3
BT
2423 err = register_pernet_subsys(&ipmr_net_ops);
2424 if (err)
2425 goto reg_pernet_fail;
2426
03d2f897
WC
2427 err = register_netdevice_notifier(&ip_mr_notifier);
2428 if (err)
2429 goto reg_notif_fail;
403dbb97
TG
2430#ifdef CONFIG_IP_PIMSM_V2
2431 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2432 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2433 err = -EAGAIN;
2434 goto add_proto_fail;
2435 }
2436#endif
cb6a4e46 2437 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
03d2f897 2438 return 0;
f6bb4514 2439
403dbb97
TG
2440#ifdef CONFIG_IP_PIMSM_V2
2441add_proto_fail:
2442 unregister_netdevice_notifier(&ip_mr_notifier);
2443#endif
c3e38896 2444reg_notif_fail:
cf958ae3
BT
2445 unregister_pernet_subsys(&ipmr_net_ops);
2446reg_pernet_fail:
c3e38896 2447 kmem_cache_destroy(mrt_cachep);
03d2f897 2448 return err;
1da177e4 2449}