]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
tcp: tcp_synack_options() fix
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
f0ad0860 66#include <net/fib_rules.h>
1da177e4
LT
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
0c12295a 72struct mr_table {
f0ad0860 73 struct list_head list;
8de53dfb
PM
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
f0ad0860 77 u32 id;
0c12295a
PM
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
f0ad0860
PM
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
1da177e4
LT
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
0c12295a 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
1da177e4
LT
111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
e18b890b 123static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 124
f0ad0860 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
0c12295a
PM
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 130 struct sk_buff *pkt, vifi_t vifi, int assert);
cb6a4e46
PM
131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
f0ad0860
PM
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
3d0c9c4e 219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
25239cee 220 .family = RTNL_FAMILY_IPMR,
f0ad0860
PM
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273}
274#else
275#define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279{
280 return net->ipv4.mrt;
281}
282
283static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
285{
286 *mrt = net->ipv4.mrt;
287 return 0;
288}
289
290static int __net_init ipmr_rules_init(struct net *net)
291{
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
294}
295
296static void __net_exit ipmr_rules_exit(struct net *net)
297{
298 kfree(net->ipv4.mrt);
299}
300#endif
301
302static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303{
304 struct mr_table *mrt;
305 unsigned int i;
1da177e4 306
f0ad0860
PM
307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL)
313 return NULL;
8de53dfb 314 write_pnet(&mrt->net, net);
f0ad0860
PM
315 mrt->id = id;
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326#ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331#endif
332 return mrt;
333}
1da177e4
LT
334
335/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
336
d607032d
WC
337static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
338{
4feb88e5
BT
339 struct net *net = dev_net(dev);
340
d607032d
WC
341 dev_close(dev);
342
4feb88e5 343 dev = __dev_get_by_name(net, "tunl0");
d607032d 344 if (dev) {
5bc3eb7e 345 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 346 struct ifreq ifr;
d607032d
WC
347 struct ip_tunnel_parm p;
348
349 memset(&p, 0, sizeof(p));
350 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351 p.iph.saddr = v->vifc_lcl_addr.s_addr;
352 p.iph.version = 4;
353 p.iph.ihl = 5;
354 p.iph.protocol = IPPROTO_IPIP;
355 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
357
5bc3eb7e
SH
358 if (ops->ndo_do_ioctl) {
359 mm_segment_t oldfs = get_fs();
360
361 set_fs(KERNEL_DS);
362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
363 set_fs(oldfs);
364 }
d607032d
WC
365 }
366}
367
1da177e4 368static
4feb88e5 369struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
370{
371 struct net_device *dev;
372
4feb88e5 373 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
374
375 if (dev) {
5bc3eb7e 376 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
377 int err;
378 struct ifreq ifr;
1da177e4
LT
379 struct ip_tunnel_parm p;
380 struct in_device *in_dev;
381
382 memset(&p, 0, sizeof(p));
383 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384 p.iph.saddr = v->vifc_lcl_addr.s_addr;
385 p.iph.version = 4;
386 p.iph.ihl = 5;
387 p.iph.protocol = IPPROTO_IPIP;
388 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 390
5bc3eb7e
SH
391 if (ops->ndo_do_ioctl) {
392 mm_segment_t oldfs = get_fs();
393
394 set_fs(KERNEL_DS);
395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
396 set_fs(oldfs);
397 } else
398 err = -EOPNOTSUPP;
1da177e4
LT
399
400 dev = NULL;
401
4feb88e5
BT
402 if (err == 0 &&
403 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
404 dev->flags |= IFF_MULTICAST;
405
e5ed6399 406 in_dev = __in_dev_get_rtnl(dev);
71e27da9 407 if (in_dev == NULL)
1da177e4 408 goto failure;
71e27da9
HX
409
410 ipv4_devconf_setall(in_dev);
411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
412
413 if (dev_open(dev))
414 goto failure;
7dc00c82 415 dev_hold(dev);
1da177e4
LT
416 }
417 }
418 return dev;
419
420failure:
421 /* allow the register to be completed before unregistering. */
422 rtnl_unlock();
423 rtnl_lock();
424
425 unregister_netdevice(dev);
426 return NULL;
427}
428
429#ifdef CONFIG_IP_PIMSM
430
6fef4c0c 431static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 432{
4feb88e5 433 struct net *net = dev_net(dev);
f0ad0860
PM
434 struct mr_table *mrt;
435 struct flowi fl = {
436 .oif = dev->ifindex,
437 .iif = skb->skb_iif,
438 .mark = skb->mark,
439 };
440 int err;
441
442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0)
444 return err;
4feb88e5 445
1da177e4 446 read_lock(&mrt_lock);
cf3677ae
PE
447 dev->stats.tx_bytes += skb->len;
448 dev->stats.tx_packets++;
0c12295a 449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
450 read_unlock(&mrt_lock);
451 kfree_skb(skb);
6ed10654 452 return NETDEV_TX_OK;
1da177e4
LT
453}
454
007c3838
SH
455static const struct net_device_ops reg_vif_netdev_ops = {
456 .ndo_start_xmit = reg_vif_xmit,
457};
458
1da177e4
LT
459static void reg_vif_setup(struct net_device *dev)
460{
461 dev->type = ARPHRD_PIMREG;
46f25dff 462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 463 dev->flags = IFF_NOARP;
007c3838 464 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 465 dev->destructor = free_netdev;
403dbb97 466 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
467}
468
f0ad0860 469static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
1da177e4
LT
470{
471 struct net_device *dev;
472 struct in_device *in_dev;
f0ad0860 473 char name[IFNAMSIZ];
1da177e4 474
f0ad0860
PM
475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
477 else
478 sprintf(name, "pimreg%u", mrt->id);
1da177e4 479
f0ad0860 480 dev = alloc_netdev(0, name, reg_vif_setup);
1da177e4
LT
481
482 if (dev == NULL)
483 return NULL;
484
403dbb97
TG
485 dev_net_set(dev, net);
486
1da177e4
LT
487 if (register_netdevice(dev)) {
488 free_netdev(dev);
489 return NULL;
490 }
491 dev->iflink = 0;
492
71e27da9
HX
493 rcu_read_lock();
494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 rcu_read_unlock();
1da177e4 496 goto failure;
71e27da9 497 }
1da177e4 498
71e27da9
HX
499 ipv4_devconf_setall(in_dev);
500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
501 rcu_read_unlock();
1da177e4
LT
502
503 if (dev_open(dev))
504 goto failure;
505
7dc00c82
WC
506 dev_hold(dev);
507
1da177e4
LT
508 return dev;
509
510failure:
511 /* allow the register to be completed before unregistering. */
512 rtnl_unlock();
513 rtnl_lock();
514
515 unregister_netdevice(dev);
516 return NULL;
517}
518#endif
519
520/*
521 * Delete a VIF entry
7dc00c82 522 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 523 */
e905a9ed 524
0c12295a 525static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 526 struct list_head *head)
1da177e4
LT
527{
528 struct vif_device *v;
529 struct net_device *dev;
530 struct in_device *in_dev;
531
0c12295a 532 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
533 return -EADDRNOTAVAIL;
534
0c12295a 535 v = &mrt->vif_table[vifi];
1da177e4
LT
536
537 write_lock_bh(&mrt_lock);
538 dev = v->dev;
539 v->dev = NULL;
540
541 if (!dev) {
542 write_unlock_bh(&mrt_lock);
543 return -EADDRNOTAVAIL;
544 }
545
546#ifdef CONFIG_IP_PIMSM
0c12295a
PM
547 if (vifi == mrt->mroute_reg_vif_num)
548 mrt->mroute_reg_vif_num = -1;
1da177e4
LT
549#endif
550
0c12295a 551 if (vifi+1 == mrt->maxvif) {
1da177e4
LT
552 int tmp;
553 for (tmp=vifi-1; tmp>=0; tmp--) {
0c12295a 554 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
555 break;
556 }
0c12295a 557 mrt->maxvif = tmp+1;
1da177e4
LT
558 }
559
560 write_unlock_bh(&mrt_lock);
561
562 dev_set_allmulti(dev, -1);
563
e5ed6399 564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
566 ip_rt_multicast_event(in_dev);
567 }
568
7dc00c82 569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 570 unregister_netdevice_queue(dev, head);
1da177e4
LT
571
572 dev_put(dev);
573 return 0;
574}
575
5c0a66f5
BT
576static inline void ipmr_cache_free(struct mfc_cache *c)
577{
5c0a66f5
BT
578 kmem_cache_free(mrt_cachep, c);
579}
580
1da177e4
LT
581/* Destroy an unresolved cache entry, killing queued skbs
582 and reporting error to netlink readers.
583 */
584
0c12295a 585static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 586{
8de53dfb 587 struct net *net = read_pnet(&mrt->net);
1da177e4 588 struct sk_buff *skb;
9ef1d4c7 589 struct nlmsgerr *e;
1da177e4 590
0c12295a 591 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 592
c354e124 593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 594 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596 nlh->nlmsg_type = NLMSG_ERROR;
597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
599 e = NLMSG_DATA(nlh);
600 e->error = -ETIMEDOUT;
601 memset(&e->msg, 0, sizeof(e->msg));
2942e900 602
4feb88e5 603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
604 } else
605 kfree_skb(skb);
606 }
607
5c0a66f5 608 ipmr_cache_free(c);
1da177e4
LT
609}
610
611
e258beb2 612/* Timer process for the unresolved queue. */
1da177e4 613
e258beb2 614static void ipmr_expire_process(unsigned long arg)
1da177e4 615{
0c12295a 616 struct mr_table *mrt = (struct mr_table *)arg;
1da177e4
LT
617 unsigned long now;
618 unsigned long expires;
862465f2 619 struct mfc_cache *c, *next;
1da177e4
LT
620
621 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
623 return;
624 }
625
0c12295a 626 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
627 goto out;
628
629 now = jiffies;
630 expires = 10*HZ;
1da177e4 631
0c12295a 632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
633 if (time_after(c->mfc_un.unres.expires, now)) {
634 unsigned long interval = c->mfc_un.unres.expires - now;
635 if (interval < expires)
636 expires = interval;
1da177e4
LT
637 continue;
638 }
639
862465f2 640 list_del(&c->list);
0c12295a 641 ipmr_destroy_unres(mrt, c);
1da177e4
LT
642 }
643
0c12295a
PM
644 if (!list_empty(&mrt->mfc_unres_queue))
645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
646
647out:
648 spin_unlock(&mfc_unres_lock);
649}
650
651/* Fill oifs list. It is called under write locked mrt_lock. */
652
0c12295a 653static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 654 unsigned char *ttls)
1da177e4
LT
655{
656 int vifi;
657
658 cache->mfc_un.res.minvif = MAXVIFS;
659 cache->mfc_un.res.maxvif = 0;
660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
661
0c12295a
PM
662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 664 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666 if (cache->mfc_un.res.minvif > vifi)
667 cache->mfc_un.res.minvif = vifi;
668 if (cache->mfc_un.res.maxvif <= vifi)
669 cache->mfc_un.res.maxvif = vifi + 1;
670 }
671 }
672}
673
0c12295a
PM
674static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
1da177e4
LT
676{
677 int vifi = vifc->vifc_vifi;
0c12295a 678 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
679 struct net_device *dev;
680 struct in_device *in_dev;
d607032d 681 int err;
1da177e4
LT
682
683 /* Is vif busy ? */
0c12295a 684 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
685 return -EADDRINUSE;
686
687 switch (vifc->vifc_flags) {
688#ifdef CONFIG_IP_PIMSM
689 case VIFF_REGISTER:
690 /*
691 * Special Purpose VIF in PIM
692 * All the packets will be sent to the daemon
693 */
0c12295a 694 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 695 return -EADDRINUSE;
f0ad0860 696 dev = ipmr_reg_vif(net, mrt);
1da177e4
LT
697 if (!dev)
698 return -ENOBUFS;
d607032d
WC
699 err = dev_set_allmulti(dev, 1);
700 if (err) {
701 unregister_netdevice(dev);
7dc00c82 702 dev_put(dev);
d607032d
WC
703 return err;
704 }
1da177e4
LT
705 break;
706#endif
e905a9ed 707 case VIFF_TUNNEL:
4feb88e5 708 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
709 if (!dev)
710 return -ENOBUFS;
d607032d
WC
711 err = dev_set_allmulti(dev, 1);
712 if (err) {
713 ipmr_del_tunnel(dev, vifc);
7dc00c82 714 dev_put(dev);
d607032d
WC
715 return err;
716 }
1da177e4 717 break;
ee5e81f0
I
718
719 case VIFF_USE_IFINDEX:
1da177e4 720 case 0:
ee5e81f0
I
721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723 if (dev && dev->ip_ptr == NULL) {
724 dev_put(dev);
725 return -EADDRNOTAVAIL;
726 }
727 } else
728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
729
1da177e4
LT
730 if (!dev)
731 return -EADDRNOTAVAIL;
d607032d 732 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
733 if (err) {
734 dev_put(dev);
d607032d 735 return err;
7dc00c82 736 }
1da177e4
LT
737 break;
738 default:
739 return -EINVAL;
740 }
741
d0490cfd
DC
742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
743 dev_put(dev);
1da177e4 744 return -EADDRNOTAVAIL;
d0490cfd 745 }
42f811b8 746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
747 ip_rt_multicast_event(in_dev);
748
749 /*
750 * Fill in the VIF structures
751 */
c354e124
JK
752 v->rate_limit = vifc->vifc_rate_limit;
753 v->local = vifc->vifc_lcl_addr.s_addr;
754 v->remote = vifc->vifc_rmt_addr.s_addr;
755 v->flags = vifc->vifc_flags;
1da177e4
LT
756 if (!mrtsock)
757 v->flags |= VIFF_STATIC;
c354e124 758 v->threshold = vifc->vifc_threshold;
1da177e4
LT
759 v->bytes_in = 0;
760 v->bytes_out = 0;
761 v->pkt_in = 0;
762 v->pkt_out = 0;
763 v->link = dev->ifindex;
764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765 v->link = dev->iflink;
766
767 /* And finish update writing critical data */
768 write_lock_bh(&mrt_lock);
c354e124 769 v->dev = dev;
1da177e4
LT
770#ifdef CONFIG_IP_PIMSM
771 if (v->flags&VIFF_REGISTER)
0c12295a 772 mrt->mroute_reg_vif_num = vifi;
1da177e4 773#endif
0c12295a
PM
774 if (vifi+1 > mrt->maxvif)
775 mrt->maxvif = vifi+1;
1da177e4
LT
776 write_unlock_bh(&mrt_lock);
777 return 0;
778}
779
0c12295a 780static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
781 __be32 origin,
782 __be32 mcastgrp)
1da177e4 783{
c354e124 784 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
785 struct mfc_cache *c;
786
0c12295a 787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
862465f2
PM
788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
789 return c;
1da177e4 790 }
862465f2 791 return NULL;
1da177e4
LT
792}
793
794/*
795 * Allocate a multicast cache entry
796 */
d658f8a0 797static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 798{
c354e124
JK
799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
800 if (c == NULL)
1da177e4 801 return NULL;
1da177e4
LT
802 c->mfc_un.res.minvif = MAXVIFS;
803 return c;
804}
805
d658f8a0 806static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 807{
c354e124
JK
808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
809 if (c == NULL)
1da177e4 810 return NULL;
1da177e4
LT
811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
812 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 return c;
814}
815
816/*
817 * A cache entry has gone into a resolved state from queued
818 */
e905a9ed 819
0c12295a
PM
820static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
822{
823 struct sk_buff *skb;
9ef1d4c7 824 struct nlmsgerr *e;
1da177e4
LT
825
826 /*
827 * Play the pending entries through our router
828 */
829
c354e124 830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 831 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
833
cb6a4e46 834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
836 (u8 *)nlh);
1da177e4
LT
837 } else {
838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
841 e = NLMSG_DATA(nlh);
842 e->error = -EMSGSIZE;
843 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 844 }
2942e900 845
d658f8a0 846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 847 } else
0c12295a 848 ip_mr_forward(net, mrt, skb, c, 0);
1da177e4
LT
849 }
850}
851
852/*
853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854 * expects the following bizarre scheme.
855 *
856 * Called under mrt_lock.
857 */
e905a9ed 858
0c12295a 859static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 860 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
861{
862 struct sk_buff *skb;
c9bdd4b5 863 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
864 struct igmphdr *igmp;
865 struct igmpmsg *msg;
866 int ret;
867
868#ifdef CONFIG_IP_PIMSM
869 if (assert == IGMPMSG_WHOLEPKT)
870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
871 else
872#endif
873 skb = alloc_skb(128, GFP_ATOMIC);
874
132adf54 875 if (!skb)
1da177e4
LT
876 return -ENOBUFS;
877
878#ifdef CONFIG_IP_PIMSM
879 if (assert == IGMPMSG_WHOLEPKT) {
880 /* Ugly, but we have no choice with this interface.
881 Duplicate old header, fix ihl, length etc.
882 And all this only to mangle msg->im_msgtype and
883 to set msg->im_mbz to "mbz" :-)
884 */
878c8145
ACM
885 skb_push(skb, sizeof(struct iphdr));
886 skb_reset_network_header(skb);
badff6d0 887 skb_reset_transport_header(skb);
0272ffc4 888 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
891 msg->im_mbz = 0;
0c12295a 892 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895 sizeof(struct iphdr));
e905a9ed 896 } else
1da177e4 897#endif
e905a9ed
YH
898 {
899
1da177e4
LT
900 /*
901 * Copy the IP header
902 */
903
27a884dc 904 skb->network_header = skb->tail;
ddc7b8e3 905 skb_put(skb, ihl);
27d7ff46 906 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
908 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 909 msg->im_vif = vifi;
adf30907 910 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
911
912 /*
913 * Add our header
914 */
915
c354e124 916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
917 igmp->type =
918 msg->im_msgtype = assert;
919 igmp->code = 0;
eddc9ec5 920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 921 skb->transport_header = skb->network_header;
e905a9ed 922 }
1da177e4 923
0c12295a 924 if (mrt->mroute_sk == NULL) {
1da177e4
LT
925 kfree_skb(skb);
926 return -EINVAL;
927 }
928
929 /*
930 * Deliver to mrouted
931 */
0c12295a 932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
70a269e6 933 if (ret < 0) {
1da177e4
LT
934 if (net_ratelimit())
935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
936 kfree_skb(skb);
937 }
938
939 return ret;
940}
941
942/*
943 * Queue a packet for resolution. It gets locked cache entry!
944 */
e905a9ed 945
1da177e4 946static int
0c12295a 947ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1da177e4 948{
862465f2 949 bool found = false;
1da177e4
LT
950 int err;
951 struct mfc_cache *c;
eddc9ec5 952 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
953
954 spin_lock_bh(&mfc_unres_lock);
0c12295a 955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 956 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
957 c->mfc_origin == iph->saddr) {
958 found = true;
1da177e4 959 break;
862465f2 960 }
1da177e4
LT
961 }
962
862465f2 963 if (!found) {
1da177e4
LT
964 /*
965 * Create a new entry if allowable
966 */
967
0c12295a 968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 969 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
970 spin_unlock_bh(&mfc_unres_lock);
971
972 kfree_skb(skb);
973 return -ENOBUFS;
974 }
975
976 /*
977 * Fill in the new cache entry
978 */
eddc9ec5
ACM
979 c->mfc_parent = -1;
980 c->mfc_origin = iph->saddr;
981 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
982
983 /*
984 * Reflect first query at mrouted.
985 */
0c12295a 986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 987 if (err < 0) {
e905a9ed 988 /* If the report failed throw the cache entry
1da177e4
LT
989 out - Brad Parker
990 */
991 spin_unlock_bh(&mfc_unres_lock);
992
5c0a66f5 993 ipmr_cache_free(c);
1da177e4
LT
994 kfree_skb(skb);
995 return err;
996 }
997
0c12295a
PM
998 atomic_inc(&mrt->cache_resolve_queue_len);
999 list_add(&c->list, &mrt->mfc_unres_queue);
1da177e4 1000
278554bd
DM
1001 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1002 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
1003 }
1004
1005 /*
1006 * See if we can append the packet
1007 */
1008 if (c->mfc_un.unres.unresolved.qlen>3) {
1009 kfree_skb(skb);
1010 err = -ENOBUFS;
1011 } else {
c354e124 1012 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
1013 err = 0;
1014 }
1015
1016 spin_unlock_bh(&mfc_unres_lock);
1017 return err;
1018}
1019
1020/*
1021 * MFC cache manipulation by user space mroute daemon
1022 */
1023
0c12295a 1024static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1da177e4
LT
1025{
1026 int line;
862465f2 1027 struct mfc_cache *c, *next;
1da177e4 1028
c354e124 1029 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1030
0c12295a 1031 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1da177e4
LT
1032 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1033 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1034 write_lock_bh(&mrt_lock);
862465f2 1035 list_del(&c->list);
1da177e4
LT
1036 write_unlock_bh(&mrt_lock);
1037
5c0a66f5 1038 ipmr_cache_free(c);
1da177e4
LT
1039 return 0;
1040 }
1041 }
1042 return -ENOENT;
1043}
1044
0c12295a
PM
1045static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1046 struct mfcctl *mfc, int mrtsock)
1da177e4 1047{
862465f2 1048 bool found = false;
1da177e4 1049 int line;
862465f2 1050 struct mfc_cache *uc, *c;
1da177e4 1051
a50436f2
PM
1052 if (mfc->mfcc_parent >= MAXVIFS)
1053 return -ENFILE;
1da177e4 1054
c354e124 1055 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1056
0c12295a 1057 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1da177e4 1058 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
1059 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1060 found = true;
1da177e4 1061 break;
862465f2 1062 }
1da177e4
LT
1063 }
1064
862465f2 1065 if (found) {
1da177e4
LT
1066 write_lock_bh(&mrt_lock);
1067 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1068 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1069 if (!mrtsock)
1070 c->mfc_flags |= MFC_STATIC;
1071 write_unlock_bh(&mrt_lock);
1072 return 0;
1073 }
1074
f97c1e0c 1075 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
1076 return -EINVAL;
1077
d658f8a0 1078 c = ipmr_cache_alloc();
c354e124 1079 if (c == NULL)
1da177e4
LT
1080 return -ENOMEM;
1081
c354e124
JK
1082 c->mfc_origin = mfc->mfcc_origin.s_addr;
1083 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1084 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1085 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1086 if (!mrtsock)
1087 c->mfc_flags |= MFC_STATIC;
1088
1089 write_lock_bh(&mrt_lock);
0c12295a 1090 list_add(&c->list, &mrt->mfc_cache_array[line]);
1da177e4
LT
1091 write_unlock_bh(&mrt_lock);
1092
1093 /*
1094 * Check to see if we resolved a queued list. If so we
1095 * need to send on the frames and tidy up.
1096 */
b0ebb739 1097 found = false;
1da177e4 1098 spin_lock_bh(&mfc_unres_lock);
0c12295a 1099 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 1100 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 1101 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 1102 list_del(&uc->list);
0c12295a 1103 atomic_dec(&mrt->cache_resolve_queue_len);
b0ebb739 1104 found = true;
1da177e4
LT
1105 break;
1106 }
1107 }
0c12295a
PM
1108 if (list_empty(&mrt->mfc_unres_queue))
1109 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
1110 spin_unlock_bh(&mfc_unres_lock);
1111
b0ebb739 1112 if (found) {
0c12295a 1113 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 1114 ipmr_cache_free(uc);
1da177e4
LT
1115 }
1116 return 0;
1117}
1118
1119/*
1120 * Close the multicast socket, and clear the vif tables etc
1121 */
e905a9ed 1122
0c12295a 1123static void mroute_clean_tables(struct mr_table *mrt)
1da177e4
LT
1124{
1125 int i;
d17fa6fa 1126 LIST_HEAD(list);
862465f2 1127 struct mfc_cache *c, *next;
e905a9ed 1128
1da177e4
LT
1129 /*
1130 * Shut down all active vif entries
1131 */
0c12295a
PM
1132 for (i = 0; i < mrt->maxvif; i++) {
1133 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1134 vif_delete(mrt, i, 0, &list);
1da177e4 1135 }
d17fa6fa 1136 unregister_netdevice_many(&list);
1da177e4
LT
1137
1138 /*
1139 * Wipe the cache
1140 */
862465f2 1141 for (i = 0; i < MFC_LINES; i++) {
0c12295a 1142 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
862465f2 1143 if (c->mfc_flags&MFC_STATIC)
1da177e4 1144 continue;
1da177e4 1145 write_lock_bh(&mrt_lock);
862465f2 1146 list_del(&c->list);
1da177e4
LT
1147 write_unlock_bh(&mrt_lock);
1148
5c0a66f5 1149 ipmr_cache_free(c);
1da177e4
LT
1150 }
1151 }
1152
0c12295a 1153 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 1154 spin_lock_bh(&mfc_unres_lock);
0c12295a 1155 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
862465f2 1156 list_del(&c->list);
0c12295a 1157 ipmr_destroy_unres(mrt, c);
1da177e4
LT
1158 }
1159 spin_unlock_bh(&mfc_unres_lock);
1160 }
1161}
1162
1163static void mrtsock_destruct(struct sock *sk)
1164{
4feb88e5 1165 struct net *net = sock_net(sk);
f0ad0860 1166 struct mr_table *mrt;
4feb88e5 1167
1da177e4 1168 rtnl_lock();
f0ad0860
PM
1169 ipmr_for_each_table(mrt, net) {
1170 if (sk == mrt->mroute_sk) {
1171 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4 1172
f0ad0860
PM
1173 write_lock_bh(&mrt_lock);
1174 mrt->mroute_sk = NULL;
1175 write_unlock_bh(&mrt_lock);
1da177e4 1176
f0ad0860
PM
1177 mroute_clean_tables(mrt);
1178 }
1da177e4
LT
1179 }
1180 rtnl_unlock();
1181}
1182
1183/*
1184 * Socket options and virtual interface manipulation. The whole
1185 * virtual interface system is a complete heap, but unfortunately
1186 * that's how BSD mrouted happens to think. Maybe one day with a proper
1187 * MOSPF/PIM router set up we can clean this up.
1188 */
e905a9ed 1189
b7058842 1190int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1191{
1192 int ret;
1193 struct vifctl vif;
1194 struct mfcctl mfc;
4feb88e5 1195 struct net *net = sock_net(sk);
f0ad0860
PM
1196 struct mr_table *mrt;
1197
1198 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1199 if (mrt == NULL)
1200 return -ENOENT;
e905a9ed 1201
132adf54 1202 if (optname != MRT_INIT) {
0c12295a 1203 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
1204 return -EACCES;
1205 }
1206
132adf54
SH
1207 switch (optname) {
1208 case MRT_INIT:
1209 if (sk->sk_type != SOCK_RAW ||
c720c7e8 1210 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 1211 return -EOPNOTSUPP;
c354e124 1212 if (optlen != sizeof(int))
132adf54 1213 return -ENOPROTOOPT;
1da177e4 1214
132adf54 1215 rtnl_lock();
0c12295a 1216 if (mrt->mroute_sk) {
1da177e4 1217 rtnl_unlock();
132adf54
SH
1218 return -EADDRINUSE;
1219 }
1220
1221 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1222 if (ret == 0) {
1223 write_lock_bh(&mrt_lock);
0c12295a 1224 mrt->mroute_sk = sk;
132adf54
SH
1225 write_unlock_bh(&mrt_lock);
1226
4feb88e5 1227 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
1228 }
1229 rtnl_unlock();
1230 return ret;
1231 case MRT_DONE:
0c12295a 1232 if (sk != mrt->mroute_sk)
132adf54
SH
1233 return -EACCES;
1234 return ip_ra_control(sk, 0, NULL);
1235 case MRT_ADD_VIF:
1236 case MRT_DEL_VIF:
c354e124 1237 if (optlen != sizeof(vif))
132adf54 1238 return -EINVAL;
c354e124 1239 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
1240 return -EFAULT;
1241 if (vif.vifc_vifi >= MAXVIFS)
1242 return -ENFILE;
1243 rtnl_lock();
c354e124 1244 if (optname == MRT_ADD_VIF) {
0c12295a 1245 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
132adf54 1246 } else {
0c12295a 1247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54
SH
1248 }
1249 rtnl_unlock();
1250 return ret;
1da177e4
LT
1251
1252 /*
1253 * Manipulate the forwarding caches. These live
1254 * in a sort of kernel/user symbiosis.
1255 */
132adf54
SH
1256 case MRT_ADD_MFC:
1257 case MRT_DEL_MFC:
c354e124 1258 if (optlen != sizeof(mfc))
132adf54 1259 return -EINVAL;
c354e124 1260 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1261 return -EFAULT;
1262 rtnl_lock();
c354e124 1263 if (optname == MRT_DEL_MFC)
0c12295a 1264 ret = ipmr_mfc_delete(mrt, &mfc);
132adf54 1265 else
0c12295a 1266 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
132adf54
SH
1267 rtnl_unlock();
1268 return ret;
1da177e4
LT
1269 /*
1270 * Control PIM assert.
1271 */
132adf54
SH
1272 case MRT_ASSERT:
1273 {
1274 int v;
1275 if (get_user(v,(int __user *)optval))
1276 return -EFAULT;
0c12295a 1277 mrt->mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1278 return 0;
1279 }
1da177e4 1280#ifdef CONFIG_IP_PIMSM
132adf54
SH
1281 case MRT_PIM:
1282 {
ba93ef74
SH
1283 int v;
1284
132adf54
SH
1285 if (get_user(v,(int __user *)optval))
1286 return -EFAULT;
ba93ef74
SH
1287 v = (v) ? 1 : 0;
1288
132adf54
SH
1289 rtnl_lock();
1290 ret = 0;
0c12295a
PM
1291 if (v != mrt->mroute_do_pim) {
1292 mrt->mroute_do_pim = v;
1293 mrt->mroute_do_assert = v;
1da177e4 1294 }
132adf54
SH
1295 rtnl_unlock();
1296 return ret;
1297 }
f0ad0860
PM
1298#endif
1299#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1300 case MRT_TABLE:
1301 {
1302 u32 v;
1303
1304 if (optlen != sizeof(u32))
1305 return -EINVAL;
1306 if (get_user(v, (u32 __user *)optval))
1307 return -EFAULT;
1308 if (sk == mrt->mroute_sk)
1309 return -EBUSY;
1310
1311 rtnl_lock();
1312 ret = 0;
1313 if (!ipmr_new_table(net, v))
1314 ret = -ENOMEM;
1315 raw_sk(sk)->ipmr_table = v;
1316 rtnl_unlock();
1317 return ret;
1318 }
1da177e4 1319#endif
132adf54
SH
1320 /*
1321 * Spurious command, or MRT_VERSION which you cannot
1322 * set.
1323 */
1324 default:
1325 return -ENOPROTOOPT;
1da177e4
LT
1326 }
1327}
1328
1329/*
1330 * Getsock opt support for the multicast routing system.
1331 */
e905a9ed 1332
c354e124 1333int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1334{
1335 int olr;
1336 int val;
4feb88e5 1337 struct net *net = sock_net(sk);
f0ad0860
PM
1338 struct mr_table *mrt;
1339
1340 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1341 if (mrt == NULL)
1342 return -ENOENT;
1da177e4 1343
c354e124 1344 if (optname != MRT_VERSION &&
1da177e4
LT
1345#ifdef CONFIG_IP_PIMSM
1346 optname!=MRT_PIM &&
1347#endif
1348 optname!=MRT_ASSERT)
1349 return -ENOPROTOOPT;
1350
1351 if (get_user(olr, optlen))
1352 return -EFAULT;
1353
1354 olr = min_t(unsigned int, olr, sizeof(int));
1355 if (olr < 0)
1356 return -EINVAL;
e905a9ed 1357
c354e124 1358 if (put_user(olr, optlen))
1da177e4 1359 return -EFAULT;
c354e124
JK
1360 if (optname == MRT_VERSION)
1361 val = 0x0305;
1da177e4 1362#ifdef CONFIG_IP_PIMSM
c354e124 1363 else if (optname == MRT_PIM)
0c12295a 1364 val = mrt->mroute_do_pim;
1da177e4
LT
1365#endif
1366 else
0c12295a 1367 val = mrt->mroute_do_assert;
c354e124 1368 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1369 return -EFAULT;
1370 return 0;
1371}
1372
1373/*
1374 * The IP multicast ioctl support routines.
1375 */
e905a9ed 1376
1da177e4
LT
1377int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1378{
1379 struct sioc_sg_req sr;
1380 struct sioc_vif_req vr;
1381 struct vif_device *vif;
1382 struct mfc_cache *c;
4feb88e5 1383 struct net *net = sock_net(sk);
f0ad0860
PM
1384 struct mr_table *mrt;
1385
1386 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1387 if (mrt == NULL)
1388 return -ENOENT;
e905a9ed 1389
132adf54
SH
1390 switch (cmd) {
1391 case SIOCGETVIFCNT:
c354e124 1392 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1393 return -EFAULT;
0c12295a 1394 if (vr.vifi >= mrt->maxvif)
132adf54
SH
1395 return -EINVAL;
1396 read_lock(&mrt_lock);
0c12295a
PM
1397 vif = &mrt->vif_table[vr.vifi];
1398 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1399 vr.icount = vif->pkt_in;
1400 vr.ocount = vif->pkt_out;
1401 vr.ibytes = vif->bytes_in;
1402 vr.obytes = vif->bytes_out;
1da177e4 1403 read_unlock(&mrt_lock);
1da177e4 1404
c354e124 1405 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1406 return -EFAULT;
1407 return 0;
1408 }
1409 read_unlock(&mrt_lock);
1410 return -EADDRNOTAVAIL;
1411 case SIOCGETSGCNT:
c354e124 1412 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1413 return -EFAULT;
1414
1415 read_lock(&mrt_lock);
0c12295a 1416 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1417 if (c) {
1418 sr.pktcnt = c->mfc_un.res.pkt;
1419 sr.bytecnt = c->mfc_un.res.bytes;
1420 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1421 read_unlock(&mrt_lock);
132adf54 1422
c354e124 1423 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1424 return -EFAULT;
1425 return 0;
1426 }
1427 read_unlock(&mrt_lock);
1428 return -EADDRNOTAVAIL;
1429 default:
1430 return -ENOIOCTLCMD;
1da177e4
LT
1431 }
1432}
1433
1434
1435static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1436{
e9dc8653 1437 struct net_device *dev = ptr;
4feb88e5 1438 struct net *net = dev_net(dev);
f0ad0860 1439 struct mr_table *mrt;
1da177e4
LT
1440 struct vif_device *v;
1441 int ct;
d17fa6fa 1442 LIST_HEAD(list);
e9dc8653 1443
1da177e4
LT
1444 if (event != NETDEV_UNREGISTER)
1445 return NOTIFY_DONE;
f0ad0860
PM
1446
1447 ipmr_for_each_table(mrt, net) {
1448 v = &mrt->vif_table[0];
1449 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1450 if (v->dev == dev)
1451 vif_delete(mrt, ct, 1, &list);
1452 }
1da177e4 1453 }
d17fa6fa 1454 unregister_netdevice_many(&list);
1da177e4
LT
1455 return NOTIFY_DONE;
1456}
1457
1458
c354e124 1459static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1460 .notifier_call = ipmr_device_event,
1461};
1462
1463/*
1464 * Encapsulate a packet by attaching a valid IPIP header to it.
1465 * This avoids tunnel drivers and other mess and gives us the speed so
1466 * important for multicast video.
1467 */
e905a9ed 1468
114c7844 1469static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1470{
8856dfa3 1471 struct iphdr *iph;
eddc9ec5 1472 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1473
1474 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1475 skb->transport_header = skb->network_header;
8856dfa3 1476 skb_reset_network_header(skb);
eddc9ec5 1477 iph = ip_hdr(skb);
1da177e4
LT
1478
1479 iph->version = 4;
e023dd64
ACM
1480 iph->tos = old_iph->tos;
1481 iph->ttl = old_iph->ttl;
1da177e4
LT
1482 iph->frag_off = 0;
1483 iph->daddr = daddr;
1484 iph->saddr = saddr;
1485 iph->protocol = IPPROTO_IPIP;
1486 iph->ihl = 5;
1487 iph->tot_len = htons(skb->len);
adf30907 1488 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1489 ip_send_check(iph);
1490
1da177e4
LT
1491 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1492 nf_reset(skb);
1493}
1494
1495static inline int ipmr_forward_finish(struct sk_buff *skb)
1496{
1497 struct ip_options * opt = &(IPCB(skb)->opt);
1498
adf30907 1499 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1500
1501 if (unlikely(opt->optlen))
1502 ip_forward_options(skb);
1503
1504 return dst_output(skb);
1505}
1506
1507/*
1508 * Processing handlers for ipmr_forward
1509 */
1510
0c12295a
PM
1511static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1512 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1da177e4 1513{
eddc9ec5 1514 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1515 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1516 struct net_device *dev;
1517 struct rtable *rt;
1518 int encap = 0;
1519
1520 if (vif->dev == NULL)
1521 goto out_free;
1522
1523#ifdef CONFIG_IP_PIMSM
1524 if (vif->flags & VIFF_REGISTER) {
1525 vif->pkt_out++;
c354e124 1526 vif->bytes_out += skb->len;
cf3677ae
PE
1527 vif->dev->stats.tx_bytes += skb->len;
1528 vif->dev->stats.tx_packets++;
0c12295a 1529 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1530 goto out_free;
1da177e4
LT
1531 }
1532#endif
1533
1534 if (vif->flags&VIFF_TUNNEL) {
1535 struct flowi fl = { .oif = vif->link,
1536 .nl_u = { .ip4_u =
1537 { .daddr = vif->remote,
1538 .saddr = vif->local,
1539 .tos = RT_TOS(iph->tos) } },
1540 .proto = IPPROTO_IPIP };
4feb88e5 1541 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1542 goto out_free;
1543 encap = sizeof(struct iphdr);
1544 } else {
1545 struct flowi fl = { .oif = vif->link,
1546 .nl_u = { .ip4_u =
1547 { .daddr = iph->daddr,
1548 .tos = RT_TOS(iph->tos) } },
1549 .proto = IPPROTO_IPIP };
4feb88e5 1550 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1551 goto out_free;
1552 }
1553
1554 dev = rt->u.dst.dev;
1555
1556 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1557 /* Do not fragment multicasts. Alas, IPv4 does not
1558 allow to send ICMP, so that packets will disappear
1559 to blackhole.
1560 */
1561
7c73a6fa 1562 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1563 ip_rt_put(rt);
1564 goto out_free;
1565 }
1566
1567 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1568
1569 if (skb_cow(skb, encap)) {
e905a9ed 1570 ip_rt_put(rt);
1da177e4
LT
1571 goto out_free;
1572 }
1573
1574 vif->pkt_out++;
c354e124 1575 vif->bytes_out += skb->len;
1da177e4 1576
adf30907
ED
1577 skb_dst_drop(skb);
1578 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1579 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1580
1581 /* FIXME: forward and output firewalls used to be called here.
1582 * What do we do with netfilter? -- RR */
1583 if (vif->flags & VIFF_TUNNEL) {
1584 ip_encap(skb, vif->local, vif->remote);
1585 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1586 vif->dev->stats.tx_packets++;
1587 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1588 }
1589
1590 IPCB(skb)->flags |= IPSKB_FORWARDED;
1591
1592 /*
1593 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1594 * not only before forwarding, but after forwarding on all output
1595 * interfaces. It is clear, if mrouter runs a multicasting
1596 * program, it should receive packets not depending to what interface
1597 * program is joined.
1598 * If we will not make it, the program will have to join on all
1599 * interfaces. On the other hand, multihoming host (or router, but
1600 * not mrouter) cannot join to more than one interface - it will
1601 * result in receiving multiple packets.
1602 */
9bbc768a 1603 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1604 ipmr_forward_finish);
1605 return;
1606
1607out_free:
1608 kfree_skb(skb);
1609 return;
1610}
1611
0c12295a 1612static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1613{
1614 int ct;
0c12295a
PM
1615
1616 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1617 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
1618 break;
1619 }
1620 return ct;
1621}
1622
1623/* "local" means that we should preserve one skb (for local delivery) */
1624
0c12295a
PM
1625static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1626 struct sk_buff *skb, struct mfc_cache *cache,
1627 int local)
1da177e4
LT
1628{
1629 int psend = -1;
1630 int vif, ct;
1631
1632 vif = cache->mfc_parent;
1633 cache->mfc_un.res.pkt++;
1634 cache->mfc_un.res.bytes += skb->len;
1635
1636 /*
1637 * Wrong interface: drop packet and (maybe) send PIM assert.
1638 */
0c12295a 1639 if (mrt->vif_table[vif].dev != skb->dev) {
1da177e4
LT
1640 int true_vifi;
1641
511c3f92 1642 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1643 /* It is our own packet, looped back.
1644 Very complicated situation...
1645
1646 The best workaround until routing daemons will be
1647 fixed is not to redistribute packet, if it was
1648 send through wrong interface. It means, that
1649 multicast applications WILL NOT work for
1650 (S,G), which have default multicast route pointing
1651 to wrong oif. In any case, it is not a good
1652 idea to use multicasting applications on router.
1653 */
1654 goto dont_forward;
1655 }
1656
1657 cache->mfc_un.res.wrong_if++;
0c12295a 1658 true_vifi = ipmr_find_vif(mrt, skb->dev);
1da177e4 1659
0c12295a 1660 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4
LT
1661 /* pimsm uses asserts, when switching from RPT to SPT,
1662 so that we cannot check that packet arrived on an oif.
1663 It is bad, but otherwise we would need to move pretty
1664 large chunk of pimd to kernel. Ough... --ANK
1665 */
0c12295a 1666 (mrt->mroute_do_pim ||
6f9374a9 1667 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1668 time_after(jiffies,
1da177e4
LT
1669 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1670 cache->mfc_un.res.last_assert = jiffies;
0c12295a 1671 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1672 }
1673 goto dont_forward;
1674 }
1675
0c12295a
PM
1676 mrt->vif_table[vif].pkt_in++;
1677 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1678
1679 /*
1680 * Forward the frame
1681 */
1682 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1683 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1684 if (psend != -1) {
1685 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1686 if (skb2)
0c12295a
PM
1687 ipmr_queue_xmit(net, mrt, skb2, cache,
1688 psend);
1da177e4 1689 }
c354e124 1690 psend = ct;
1da177e4
LT
1691 }
1692 }
1693 if (psend != -1) {
1694 if (local) {
1695 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1696 if (skb2)
0c12295a 1697 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1da177e4 1698 } else {
0c12295a 1699 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1da177e4
LT
1700 return 0;
1701 }
1702 }
1703
1704dont_forward:
1705 if (!local)
1706 kfree_skb(skb);
1707 return 0;
1708}
1709
1710
1711/*
1712 * Multicast packets for forwarding arrive here
1713 */
1714
1715int ip_mr_input(struct sk_buff *skb)
1716{
1717 struct mfc_cache *cache;
4feb88e5 1718 struct net *net = dev_net(skb->dev);
511c3f92 1719 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
f0ad0860
PM
1720 struct mr_table *mrt;
1721 int err;
1da177e4
LT
1722
1723 /* Packet is looped back after forward, it should not be
1724 forwarded second time, but still can be delivered locally.
1725 */
1726 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1727 goto dont_forward;
1728
f0ad0860
PM
1729 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1730 if (err < 0)
1731 return err;
1732
1da177e4
LT
1733 if (!local) {
1734 if (IPCB(skb)->opt.router_alert) {
1735 if (ip_call_ra_chain(skb))
1736 return 0;
eddc9ec5 1737 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1738 /* IGMPv1 (and broken IGMPv2 implementations sort of
1739 Cisco IOS <= 11.2(8)) do not put router alert
1740 option to IGMP packets destined to routable
1741 groups. It is very bad, because it means
1742 that we can forward NO IGMP messages.
1743 */
1744 read_lock(&mrt_lock);
0c12295a 1745 if (mrt->mroute_sk) {
2715bcf9 1746 nf_reset(skb);
0c12295a 1747 raw_rcv(mrt->mroute_sk, skb);
1da177e4
LT
1748 read_unlock(&mrt_lock);
1749 return 0;
1750 }
1751 read_unlock(&mrt_lock);
1752 }
1753 }
1754
1755 read_lock(&mrt_lock);
0c12295a 1756 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1757
1758 /*
1759 * No usable cache entry
1760 */
c354e124 1761 if (cache == NULL) {
1da177e4
LT
1762 int vif;
1763
1764 if (local) {
1765 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1766 ip_local_deliver(skb);
1767 if (skb2 == NULL) {
1768 read_unlock(&mrt_lock);
1769 return -ENOBUFS;
1770 }
1771 skb = skb2;
1772 }
1773
0c12295a 1774 vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 1775 if (vif >= 0) {
0eae88f3 1776 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1da177e4
LT
1777 read_unlock(&mrt_lock);
1778
0eae88f3 1779 return err2;
1da177e4
LT
1780 }
1781 read_unlock(&mrt_lock);
1782 kfree_skb(skb);
1783 return -ENODEV;
1784 }
1785
0c12295a 1786 ip_mr_forward(net, mrt, skb, cache, local);
1da177e4
LT
1787
1788 read_unlock(&mrt_lock);
1789
1790 if (local)
1791 return ip_local_deliver(skb);
1792
1793 return 0;
1794
1795dont_forward:
1796 if (local)
1797 return ip_local_deliver(skb);
1798 kfree_skb(skb);
1799 return 0;
1800}
1801
b1879204 1802#ifdef CONFIG_IP_PIMSM
f0ad0860
PM
1803static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1804 unsigned int pimlen)
1da177e4 1805{
b1879204
IJ
1806 struct net_device *reg_dev = NULL;
1807 struct iphdr *encap;
1da177e4 1808
b1879204 1809 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1810 /*
1811 Check that:
1812 a. packet is really destinted to a multicast group
1813 b. packet is not a NULL-REGISTER
1814 c. packet is not truncated
1815 */
f97c1e0c 1816 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1817 encap->tot_len == 0 ||
b1879204
IJ
1818 ntohs(encap->tot_len) + pimlen > skb->len)
1819 return 1;
1da177e4
LT
1820
1821 read_lock(&mrt_lock);
0c12295a
PM
1822 if (mrt->mroute_reg_vif_num >= 0)
1823 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1da177e4
LT
1824 if (reg_dev)
1825 dev_hold(reg_dev);
1826 read_unlock(&mrt_lock);
1827
e905a9ed 1828 if (reg_dev == NULL)
b1879204 1829 return 1;
1da177e4 1830
b0e380b1 1831 skb->mac_header = skb->network_header;
1da177e4 1832 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1833 skb_reset_network_header(skb);
1da177e4 1834 skb->dev = reg_dev;
1da177e4
LT
1835 skb->protocol = htons(ETH_P_IP);
1836 skb->ip_summed = 0;
1837 skb->pkt_type = PACKET_HOST;
adf30907 1838 skb_dst_drop(skb);
cf3677ae
PE
1839 reg_dev->stats.rx_bytes += skb->len;
1840 reg_dev->stats.rx_packets++;
1da177e4
LT
1841 nf_reset(skb);
1842 netif_rx(skb);
1843 dev_put(reg_dev);
b1879204 1844
1da177e4 1845 return 0;
b1879204
IJ
1846}
1847#endif
1848
1849#ifdef CONFIG_IP_PIMSM_V1
1850/*
1851 * Handle IGMP messages of PIMv1
1852 */
1853
1854int pim_rcv_v1(struct sk_buff * skb)
1855{
1856 struct igmphdr *pim;
4feb88e5 1857 struct net *net = dev_net(skb->dev);
f0ad0860 1858 struct mr_table *mrt;
b1879204
IJ
1859
1860 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1861 goto drop;
1862
1863 pim = igmp_hdr(skb);
1864
f0ad0860
PM
1865 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1866 goto drop;
1867
0c12295a 1868 if (!mrt->mroute_do_pim ||
b1879204
IJ
1869 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1870 goto drop;
1871
f0ad0860 1872 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1873drop:
1874 kfree_skb(skb);
1875 }
1da177e4
LT
1876 return 0;
1877}
1878#endif
1879
1880#ifdef CONFIG_IP_PIMSM_V2
1881static int pim_rcv(struct sk_buff * skb)
1882{
1883 struct pimreghdr *pim;
f0ad0860
PM
1884 struct net *net = dev_net(skb->dev);
1885 struct mr_table *mrt;
1da177e4 1886
b1879204 1887 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1888 goto drop;
1889
9c70220b 1890 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1891 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1892 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1893 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1894 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1895 goto drop;
1896
f0ad0860
PM
1897 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1898 goto drop;
1899
1900 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1901drop:
1902 kfree_skb(skb);
1903 }
1da177e4
LT
1904 return 0;
1905}
1906#endif
1907
cb6a4e46
PM
1908static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1909 struct mfc_cache *c, struct rtmsg *rtm)
1da177e4
LT
1910{
1911 int ct;
1912 struct rtnexthop *nhp;
27a884dc 1913 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1914 struct rtattr *mp_head;
1915
7438189b
ND
1916 /* If cache is unresolved, don't try to parse IIF and OIF */
1917 if (c->mfc_parent > MAXVIFS)
1918 return -ENOENT;
1919
0c12295a
PM
1920 if (VIF_EXISTS(mrt, c->mfc_parent))
1921 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1922
c354e124 1923 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1924
1925 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 1926 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1927 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1928 goto rtattr_failure;
c354e124 1929 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1930 nhp->rtnh_flags = 0;
1931 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 1932 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
1933 nhp->rtnh_len = sizeof(*nhp);
1934 }
1935 }
1936 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1937 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1938 rtm->rtm_type = RTN_MULTICAST;
1939 return 1;
1940
1941rtattr_failure:
dc5fc579 1942 nlmsg_trim(skb, b);
1da177e4
LT
1943 return -EMSGSIZE;
1944}
1945
4feb88e5
BT
1946int ipmr_get_route(struct net *net,
1947 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1948{
1949 int err;
f0ad0860 1950 struct mr_table *mrt;
1da177e4 1951 struct mfc_cache *cache;
511c3f92 1952 struct rtable *rt = skb_rtable(skb);
1da177e4 1953
f0ad0860
PM
1954 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1955 if (mrt == NULL)
1956 return -ENOENT;
1957
1da177e4 1958 read_lock(&mrt_lock);
0c12295a 1959 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1da177e4 1960
c354e124 1961 if (cache == NULL) {
72287490 1962 struct sk_buff *skb2;
eddc9ec5 1963 struct iphdr *iph;
1da177e4
LT
1964 struct net_device *dev;
1965 int vif;
1966
1967 if (nowait) {
1968 read_unlock(&mrt_lock);
1969 return -EAGAIN;
1970 }
1971
1972 dev = skb->dev;
0c12295a 1973 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1da177e4
LT
1974 read_unlock(&mrt_lock);
1975 return -ENODEV;
1976 }
72287490
AK
1977 skb2 = skb_clone(skb, GFP_ATOMIC);
1978 if (!skb2) {
1979 read_unlock(&mrt_lock);
1980 return -ENOMEM;
1981 }
1982
e2d1bca7
ACM
1983 skb_push(skb2, sizeof(struct iphdr));
1984 skb_reset_network_header(skb2);
eddc9ec5
ACM
1985 iph = ip_hdr(skb2);
1986 iph->ihl = sizeof(struct iphdr) >> 2;
1987 iph->saddr = rt->rt_src;
1988 iph->daddr = rt->rt_dst;
1989 iph->version = 0;
0c12295a 1990 err = ipmr_cache_unresolved(mrt, vif, skb2);
1da177e4
LT
1991 read_unlock(&mrt_lock);
1992 return err;
1993 }
1994
1995 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1996 cache->mfc_flags |= MFC_NOTIFY;
cb6a4e46 1997 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4
LT
1998 read_unlock(&mrt_lock);
1999 return err;
2000}
2001
cb6a4e46
PM
2002static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2003 u32 pid, u32 seq, struct mfc_cache *c)
2004{
2005 struct nlmsghdr *nlh;
2006 struct rtmsg *rtm;
2007
2008 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2009 if (nlh == NULL)
2010 return -EMSGSIZE;
2011
2012 rtm = nlmsg_data(nlh);
2013 rtm->rtm_family = RTNL_FAMILY_IPMR;
2014 rtm->rtm_dst_len = 32;
2015 rtm->rtm_src_len = 32;
2016 rtm->rtm_tos = 0;
2017 rtm->rtm_table = mrt->id;
2018 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2019 rtm->rtm_type = RTN_MULTICAST;
2020 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2021 rtm->rtm_protocol = RTPROT_UNSPEC;
2022 rtm->rtm_flags = 0;
2023
2024 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2025 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2026
2027 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2028 goto nla_put_failure;
2029
2030 return nlmsg_end(skb, nlh);
2031
2032nla_put_failure:
2033 nlmsg_cancel(skb, nlh);
2034 return -EMSGSIZE;
2035}
2036
2037static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2038{
2039 struct net *net = sock_net(skb->sk);
2040 struct mr_table *mrt;
2041 struct mfc_cache *mfc;
2042 unsigned int t = 0, s_t;
2043 unsigned int h = 0, s_h;
2044 unsigned int e = 0, s_e;
2045
2046 s_t = cb->args[0];
2047 s_h = cb->args[1];
2048 s_e = cb->args[2];
2049
2050 read_lock(&mrt_lock);
2051 ipmr_for_each_table(mrt, net) {
2052 if (t < s_t)
2053 goto next_table;
2054 if (t > s_t)
2055 s_h = 0;
2056 for (h = s_h; h < MFC_LINES; h++) {
2057 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2058 if (e < s_e)
2059 goto next_entry;
2060 if (ipmr_fill_mroute(mrt, skb,
2061 NETLINK_CB(cb->skb).pid,
2062 cb->nlh->nlmsg_seq,
2063 mfc) < 0)
2064 goto done;
2065next_entry:
2066 e++;
2067 }
2068 e = s_e = 0;
2069 }
2070 s_h = 0;
2071next_table:
2072 t++;
2073 }
2074done:
2075 read_unlock(&mrt_lock);
2076
2077 cb->args[2] = e;
2078 cb->args[1] = h;
2079 cb->args[0] = t;
2080
2081 return skb->len;
2082}
2083
e905a9ed 2084#ifdef CONFIG_PROC_FS
1da177e4
LT
2085/*
2086 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2087 */
2088struct ipmr_vif_iter {
f6bb4514 2089 struct seq_net_private p;
f0ad0860 2090 struct mr_table *mrt;
1da177e4
LT
2091 int ct;
2092};
2093
f6bb4514
BT
2094static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2095 struct ipmr_vif_iter *iter,
1da177e4
LT
2096 loff_t pos)
2097{
f0ad0860 2098 struct mr_table *mrt = iter->mrt;
0c12295a
PM
2099
2100 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2101 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2102 continue;
e905a9ed 2103 if (pos-- == 0)
0c12295a 2104 return &mrt->vif_table[iter->ct];
1da177e4
LT
2105 }
2106 return NULL;
2107}
2108
2109static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 2110 __acquires(mrt_lock)
1da177e4 2111{
f0ad0860 2112 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2113 struct net *net = seq_file_net(seq);
f0ad0860
PM
2114 struct mr_table *mrt;
2115
2116 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2117 if (mrt == NULL)
2118 return ERR_PTR(-ENOENT);
2119
2120 iter->mrt = mrt;
f6bb4514 2121
1da177e4 2122 read_lock(&mrt_lock);
f6bb4514 2123 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2124 : SEQ_START_TOKEN;
2125}
2126
2127static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2128{
2129 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2130 struct net *net = seq_file_net(seq);
f0ad0860 2131 struct mr_table *mrt = iter->mrt;
1da177e4
LT
2132
2133 ++*pos;
2134 if (v == SEQ_START_TOKEN)
f6bb4514 2135 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 2136
0c12295a
PM
2137 while (++iter->ct < mrt->maxvif) {
2138 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2139 continue;
0c12295a 2140 return &mrt->vif_table[iter->ct];
1da177e4
LT
2141 }
2142 return NULL;
2143}
2144
2145static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 2146 __releases(mrt_lock)
1da177e4
LT
2147{
2148 read_unlock(&mrt_lock);
2149}
2150
2151static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2152{
f0ad0860
PM
2153 struct ipmr_vif_iter *iter = seq->private;
2154 struct mr_table *mrt = iter->mrt;
f6bb4514 2155
1da177e4 2156 if (v == SEQ_START_TOKEN) {
e905a9ed 2157 seq_puts(seq,
1da177e4
LT
2158 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2159 } else {
2160 const struct vif_device *vif = v;
2161 const char *name = vif->dev ? vif->dev->name : "none";
2162
2163 seq_printf(seq,
2164 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 2165 vif - mrt->vif_table,
e905a9ed 2166 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
2167 vif->bytes_out, vif->pkt_out,
2168 vif->flags, vif->local, vif->remote);
2169 }
2170 return 0;
2171}
2172
f690808e 2173static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
2174 .start = ipmr_vif_seq_start,
2175 .next = ipmr_vif_seq_next,
2176 .stop = ipmr_vif_seq_stop,
2177 .show = ipmr_vif_seq_show,
2178};
2179
2180static int ipmr_vif_open(struct inode *inode, struct file *file)
2181{
f6bb4514
BT
2182 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2183 sizeof(struct ipmr_vif_iter));
1da177e4
LT
2184}
2185
9a32144e 2186static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
2187 .owner = THIS_MODULE,
2188 .open = ipmr_vif_open,
2189 .read = seq_read,
2190 .llseek = seq_lseek,
f6bb4514 2191 .release = seq_release_net,
1da177e4
LT
2192};
2193
2194struct ipmr_mfc_iter {
f6bb4514 2195 struct seq_net_private p;
f0ad0860 2196 struct mr_table *mrt;
862465f2 2197 struct list_head *cache;
1da177e4
LT
2198 int ct;
2199};
2200
2201
f6bb4514
BT
2202static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2203 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 2204{
f0ad0860 2205 struct mr_table *mrt = it->mrt;
1da177e4
LT
2206 struct mfc_cache *mfc;
2207
1da177e4 2208 read_lock(&mrt_lock);
862465f2 2209 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
0c12295a 2210 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2 2211 list_for_each_entry(mfc, it->cache, list)
e905a9ed 2212 if (pos-- == 0)
1da177e4 2213 return mfc;
862465f2 2214 }
1da177e4
LT
2215 read_unlock(&mrt_lock);
2216
1da177e4 2217 spin_lock_bh(&mfc_unres_lock);
0c12295a 2218 it->cache = &mrt->mfc_unres_queue;
862465f2 2219 list_for_each_entry(mfc, it->cache, list)
e258beb2 2220 if (pos-- == 0)
1da177e4
LT
2221 return mfc;
2222 spin_unlock_bh(&mfc_unres_lock);
2223
2224 it->cache = NULL;
2225 return NULL;
2226}
2227
2228
2229static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2230{
2231 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2232 struct net *net = seq_file_net(seq);
f0ad0860
PM
2233 struct mr_table *mrt;
2234
2235 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2236 if (mrt == NULL)
2237 return ERR_PTR(-ENOENT);
f6bb4514 2238
f0ad0860 2239 it->mrt = mrt;
1da177e4
LT
2240 it->cache = NULL;
2241 it->ct = 0;
f6bb4514 2242 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2243 : SEQ_START_TOKEN;
2244}
2245
2246static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2247{
2248 struct mfc_cache *mfc = v;
2249 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2250 struct net *net = seq_file_net(seq);
f0ad0860 2251 struct mr_table *mrt = it->mrt;
1da177e4
LT
2252
2253 ++*pos;
2254
2255 if (v == SEQ_START_TOKEN)
f6bb4514 2256 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 2257
862465f2
PM
2258 if (mfc->list.next != it->cache)
2259 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 2260
0c12295a 2261 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
2262 goto end_of_list;
2263
0c12295a 2264 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1da177e4
LT
2265
2266 while (++it->ct < MFC_LINES) {
0c12295a 2267 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2
PM
2268 if (list_empty(it->cache))
2269 continue;
2270 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2271 }
2272
2273 /* exhausted cache_array, show unresolved */
2274 read_unlock(&mrt_lock);
0c12295a 2275 it->cache = &mrt->mfc_unres_queue;
1da177e4 2276 it->ct = 0;
e905a9ed 2277
1da177e4 2278 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
2279 if (!list_empty(it->cache))
2280 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2281
2282 end_of_list:
2283 spin_unlock_bh(&mfc_unres_lock);
2284 it->cache = NULL;
2285
2286 return NULL;
2287}
2288
2289static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2290{
2291 struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2292 struct mr_table *mrt = it->mrt;
1da177e4 2293
0c12295a 2294 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 2295 spin_unlock_bh(&mfc_unres_lock);
0c12295a 2296 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1da177e4
LT
2297 read_unlock(&mrt_lock);
2298}
2299
2300static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2301{
2302 int n;
2303
2304 if (v == SEQ_START_TOKEN) {
e905a9ed 2305 seq_puts(seq,
1da177e4
LT
2306 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2307 } else {
2308 const struct mfc_cache *mfc = v;
2309 const struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2310 const struct mr_table *mrt = it->mrt;
e905a9ed 2311
0eae88f3
ED
2312 seq_printf(seq, "%08X %08X %-3hd",
2313 (__force u32) mfc->mfc_mcastgrp,
2314 (__force u32) mfc->mfc_origin,
1ea472e2 2315 mfc->mfc_parent);
1da177e4 2316
0c12295a 2317 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
2318 seq_printf(seq, " %8lu %8lu %8lu",
2319 mfc->mfc_un.res.pkt,
2320 mfc->mfc_un.res.bytes,
2321 mfc->mfc_un.res.wrong_if);
132adf54
SH
2322 for (n = mfc->mfc_un.res.minvif;
2323 n < mfc->mfc_un.res.maxvif; n++ ) {
0c12295a 2324 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
2325 mfc->mfc_un.res.ttls[n] < 255)
2326 seq_printf(seq,
e905a9ed 2327 " %2d:%-3d",
1da177e4
LT
2328 n, mfc->mfc_un.res.ttls[n]);
2329 }
1ea472e2
BT
2330 } else {
2331 /* unresolved mfc_caches don't contain
2332 * pkt, bytes and wrong_if values
2333 */
2334 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
2335 }
2336 seq_putc(seq, '\n');
2337 }
2338 return 0;
2339}
2340
f690808e 2341static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
2342 .start = ipmr_mfc_seq_start,
2343 .next = ipmr_mfc_seq_next,
2344 .stop = ipmr_mfc_seq_stop,
2345 .show = ipmr_mfc_seq_show,
2346};
2347
2348static int ipmr_mfc_open(struct inode *inode, struct file *file)
2349{
f6bb4514
BT
2350 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2351 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
2352}
2353
9a32144e 2354static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
2355 .owner = THIS_MODULE,
2356 .open = ipmr_mfc_open,
2357 .read = seq_read,
2358 .llseek = seq_lseek,
f6bb4514 2359 .release = seq_release_net,
1da177e4 2360};
e905a9ed 2361#endif
1da177e4
LT
2362
2363#ifdef CONFIG_IP_PIMSM_V2
32613090 2364static const struct net_protocol pim_protocol = {
1da177e4 2365 .handler = pim_rcv,
403dbb97 2366 .netns_ok = 1,
1da177e4
LT
2367};
2368#endif
2369
2370
2371/*
2372 * Setup for IP multicast routing
2373 */
cf958ae3
BT
2374static int __net_init ipmr_net_init(struct net *net)
2375{
f0ad0860 2376 int err;
cf958ae3 2377
f0ad0860
PM
2378 err = ipmr_rules_init(net);
2379 if (err < 0)
cf958ae3 2380 goto fail;
f6bb4514
BT
2381
2382#ifdef CONFIG_PROC_FS
2383 err = -ENOMEM;
2384 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2385 goto proc_vif_fail;
2386 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2387 goto proc_cache_fail;
2388#endif
2bb8b26c
BT
2389 return 0;
2390
f6bb4514
BT
2391#ifdef CONFIG_PROC_FS
2392proc_cache_fail:
2393 proc_net_remove(net, "ip_mr_vif");
2394proc_vif_fail:
f0ad0860 2395 ipmr_rules_exit(net);
f6bb4514 2396#endif
cf958ae3
BT
2397fail:
2398 return err;
2399}
2400
2401static void __net_exit ipmr_net_exit(struct net *net)
2402{
f6bb4514
BT
2403#ifdef CONFIG_PROC_FS
2404 proc_net_remove(net, "ip_mr_cache");
2405 proc_net_remove(net, "ip_mr_vif");
2406#endif
f0ad0860 2407 ipmr_rules_exit(net);
cf958ae3
BT
2408}
2409
2410static struct pernet_operations ipmr_net_ops = {
2411 .init = ipmr_net_init,
2412 .exit = ipmr_net_exit,
2413};
e905a9ed 2414
03d2f897 2415int __init ip_mr_init(void)
1da177e4 2416{
03d2f897
WC
2417 int err;
2418
1da177e4
LT
2419 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2420 sizeof(struct mfc_cache),
e5d679f3 2421 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2422 NULL);
03d2f897
WC
2423 if (!mrt_cachep)
2424 return -ENOMEM;
2425
cf958ae3
BT
2426 err = register_pernet_subsys(&ipmr_net_ops);
2427 if (err)
2428 goto reg_pernet_fail;
2429
03d2f897
WC
2430 err = register_netdevice_notifier(&ip_mr_notifier);
2431 if (err)
2432 goto reg_notif_fail;
403dbb97
TG
2433#ifdef CONFIG_IP_PIMSM_V2
2434 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2435 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2436 err = -EAGAIN;
2437 goto add_proto_fail;
2438 }
2439#endif
cb6a4e46 2440 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
03d2f897 2441 return 0;
f6bb4514 2442
403dbb97
TG
2443#ifdef CONFIG_IP_PIMSM_V2
2444add_proto_fail:
2445 unregister_netdevice_notifier(&ip_mr_notifier);
2446#endif
c3e38896 2447reg_notif_fail:
cf958ae3
BT
2448 unregister_pernet_subsys(&ipmr_net_ops);
2449reg_pernet_fail:
c3e38896 2450 kmem_cache_destroy(mrt_cachep);
03d2f897 2451 return err;
1da177e4 2452}