]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
ipmr: RCU protection for mfc_cache_array
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
f77f13e2 25 * Relax this requirement to work with older peers.
1da177e4
LT
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
f0ad0860 66#include <net/fib_rules.h>
1da177e4
LT
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
0c12295a 72struct mr_table {
f0ad0860 73 struct list_head list;
8de53dfb
PM
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
f0ad0860 77 u32 id;
4c968709 78 struct sock __rcu *mroute_sk;
0c12295a
PM
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
f0ad0860
PM
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
1da177e4
LT
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
0c12295a 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
1da177e4
LT
111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
e18b890b 123static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 124
f0ad0860 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
0c12295a
PM
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 130 struct sk_buff *pkt, vifi_t vifi, int assert);
cb6a4e46
PM
131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
f0ad0860
PM
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
1da177e4 169
f0ad0860
PM
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
3d0c9c4e 219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
25239cee 220 .family = RTNL_FAMILY_IPMR,
f0ad0860
PM
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
035320d5
ED
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
f0ad0860 272 kfree(mrt);
035320d5 273 }
f0ad0860
PM
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
275}
276#else
277#define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
279
280static struct mr_table *ipmr_get_table(struct net *net, u32 id)
281{
282 return net->ipv4.mrt;
283}
284
285static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
287{
288 *mrt = net->ipv4.mrt;
289 return 0;
290}
291
292static int __net_init ipmr_rules_init(struct net *net)
293{
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
296}
297
298static void __net_exit ipmr_rules_exit(struct net *net)
299{
300 kfree(net->ipv4.mrt);
301}
302#endif
303
304static struct mr_table *ipmr_new_table(struct net *net, u32 id)
305{
306 struct mr_table *mrt;
307 unsigned int i;
1da177e4 308
f0ad0860
PM
309 mrt = ipmr_get_table(net, id);
310 if (mrt != NULL)
311 return mrt;
312
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 if (mrt == NULL)
315 return NULL;
8de53dfb 316 write_pnet(&mrt->net, net);
f0ad0860
PM
317 mrt->id = id;
318
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
322
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
324
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 (unsigned long)mrt);
327
328#ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
330#endif
331#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333#endif
334 return mrt;
335}
1da177e4
LT
336
337/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
338
d607032d
WC
339static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
340{
4feb88e5
BT
341 struct net *net = dev_net(dev);
342
d607032d
WC
343 dev_close(dev);
344
4feb88e5 345 dev = __dev_get_by_name(net, "tunl0");
d607032d 346 if (dev) {
5bc3eb7e 347 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 348 struct ifreq ifr;
d607032d
WC
349 struct ip_tunnel_parm p;
350
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354 p.iph.version = 4;
355 p.iph.ihl = 5;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
359
5bc3eb7e
SH
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
362
363 set_fs(KERNEL_DS);
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 set_fs(oldfs);
366 }
d607032d
WC
367 }
368}
369
1da177e4 370static
4feb88e5 371struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
372{
373 struct net_device *dev;
374
4feb88e5 375 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
376
377 if (dev) {
5bc3eb7e 378 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
379 int err;
380 struct ifreq ifr;
1da177e4
LT
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
383
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387 p.iph.version = 4;
388 p.iph.ihl = 5;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 392
5bc3eb7e
SH
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
395
396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs);
399 } else
400 err = -EOPNOTSUPP;
1da177e4
LT
401
402 dev = NULL;
403
4feb88e5
BT
404 if (err == 0 &&
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
406 dev->flags |= IFF_MULTICAST;
407
e5ed6399 408 in_dev = __in_dev_get_rtnl(dev);
71e27da9 409 if (in_dev == NULL)
1da177e4 410 goto failure;
71e27da9
HX
411
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
414
415 if (dev_open(dev))
416 goto failure;
7dc00c82 417 dev_hold(dev);
1da177e4
LT
418 }
419 }
420 return dev;
421
422failure:
423 /* allow the register to be completed before unregistering. */
424 rtnl_unlock();
425 rtnl_lock();
426
427 unregister_netdevice(dev);
428 return NULL;
429}
430
431#ifdef CONFIG_IP_PIMSM
432
6fef4c0c 433static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 434{
4feb88e5 435 struct net *net = dev_net(dev);
f0ad0860
PM
436 struct mr_table *mrt;
437 struct flowi fl = {
438 .oif = dev->ifindex,
439 .iif = skb->skb_iif,
440 .mark = skb->mark,
441 };
442 int err;
443
444 err = ipmr_fib_lookup(net, &fl, &mrt);
e40dbc51
BG
445 if (err < 0) {
446 kfree_skb(skb);
f0ad0860 447 return err;
e40dbc51 448 }
4feb88e5 449
1da177e4 450 read_lock(&mrt_lock);
cf3677ae
PE
451 dev->stats.tx_bytes += skb->len;
452 dev->stats.tx_packets++;
0c12295a 453 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
454 read_unlock(&mrt_lock);
455 kfree_skb(skb);
6ed10654 456 return NETDEV_TX_OK;
1da177e4
LT
457}
458
007c3838
SH
459static const struct net_device_ops reg_vif_netdev_ops = {
460 .ndo_start_xmit = reg_vif_xmit,
461};
462
1da177e4
LT
463static void reg_vif_setup(struct net_device *dev)
464{
465 dev->type = ARPHRD_PIMREG;
46f25dff 466 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 467 dev->flags = IFF_NOARP;
007c3838 468 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 469 dev->destructor = free_netdev;
403dbb97 470 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
471}
472
f0ad0860 473static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
1da177e4
LT
474{
475 struct net_device *dev;
476 struct in_device *in_dev;
f0ad0860 477 char name[IFNAMSIZ];
1da177e4 478
f0ad0860
PM
479 if (mrt->id == RT_TABLE_DEFAULT)
480 sprintf(name, "pimreg");
481 else
482 sprintf(name, "pimreg%u", mrt->id);
1da177e4 483
f0ad0860 484 dev = alloc_netdev(0, name, reg_vif_setup);
1da177e4
LT
485
486 if (dev == NULL)
487 return NULL;
488
403dbb97
TG
489 dev_net_set(dev, net);
490
1da177e4
LT
491 if (register_netdevice(dev)) {
492 free_netdev(dev);
493 return NULL;
494 }
495 dev->iflink = 0;
496
71e27da9
HX
497 rcu_read_lock();
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
499 rcu_read_unlock();
1da177e4 500 goto failure;
71e27da9 501 }
1da177e4 502
71e27da9
HX
503 ipv4_devconf_setall(in_dev);
504 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
505 rcu_read_unlock();
1da177e4
LT
506
507 if (dev_open(dev))
508 goto failure;
509
7dc00c82
WC
510 dev_hold(dev);
511
1da177e4
LT
512 return dev;
513
514failure:
515 /* allow the register to be completed before unregistering. */
516 rtnl_unlock();
517 rtnl_lock();
518
519 unregister_netdevice(dev);
520 return NULL;
521}
522#endif
523
524/*
525 * Delete a VIF entry
7dc00c82 526 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 527 */
e905a9ed 528
0c12295a 529static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 530 struct list_head *head)
1da177e4
LT
531{
532 struct vif_device *v;
533 struct net_device *dev;
534 struct in_device *in_dev;
535
0c12295a 536 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
537 return -EADDRNOTAVAIL;
538
0c12295a 539 v = &mrt->vif_table[vifi];
1da177e4
LT
540
541 write_lock_bh(&mrt_lock);
542 dev = v->dev;
543 v->dev = NULL;
544
545 if (!dev) {
546 write_unlock_bh(&mrt_lock);
547 return -EADDRNOTAVAIL;
548 }
549
550#ifdef CONFIG_IP_PIMSM
0c12295a
PM
551 if (vifi == mrt->mroute_reg_vif_num)
552 mrt->mroute_reg_vif_num = -1;
1da177e4
LT
553#endif
554
0c12295a 555 if (vifi+1 == mrt->maxvif) {
1da177e4
LT
556 int tmp;
557 for (tmp=vifi-1; tmp>=0; tmp--) {
0c12295a 558 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
559 break;
560 }
0c12295a 561 mrt->maxvif = tmp+1;
1da177e4
LT
562 }
563
564 write_unlock_bh(&mrt_lock);
565
566 dev_set_allmulti(dev, -1);
567
e5ed6399 568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
570 ip_rt_multicast_event(in_dev);
571 }
572
7dc00c82 573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 574 unregister_netdevice_queue(dev, head);
1da177e4
LT
575
576 dev_put(dev);
577 return 0;
578}
579
a8c9486b 580static void ipmr_cache_free_rcu(struct rcu_head *head)
5c0a66f5 581{
a8c9486b
ED
582 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
583
5c0a66f5
BT
584 kmem_cache_free(mrt_cachep, c);
585}
586
a8c9486b
ED
587static inline void ipmr_cache_free(struct mfc_cache *c)
588{
589 call_rcu(&c->rcu, ipmr_cache_free_rcu);
590}
591
1da177e4
LT
592/* Destroy an unresolved cache entry, killing queued skbs
593 and reporting error to netlink readers.
594 */
595
0c12295a 596static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 597{
8de53dfb 598 struct net *net = read_pnet(&mrt->net);
1da177e4 599 struct sk_buff *skb;
9ef1d4c7 600 struct nlmsgerr *e;
1da177e4 601
0c12295a 602 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 603
c354e124 604 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 605 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
606 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
607 nlh->nlmsg_type = NLMSG_ERROR;
608 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
609 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
610 e = NLMSG_DATA(nlh);
611 e->error = -ETIMEDOUT;
612 memset(&e->msg, 0, sizeof(e->msg));
2942e900 613
4feb88e5 614 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
615 } else
616 kfree_skb(skb);
617 }
618
5c0a66f5 619 ipmr_cache_free(c);
1da177e4
LT
620}
621
622
e258beb2 623/* Timer process for the unresolved queue. */
1da177e4 624
e258beb2 625static void ipmr_expire_process(unsigned long arg)
1da177e4 626{
0c12295a 627 struct mr_table *mrt = (struct mr_table *)arg;
1da177e4
LT
628 unsigned long now;
629 unsigned long expires;
862465f2 630 struct mfc_cache *c, *next;
1da177e4
LT
631
632 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 633 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
634 return;
635 }
636
0c12295a 637 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
638 goto out;
639
640 now = jiffies;
641 expires = 10*HZ;
1da177e4 642
0c12295a 643 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
644 if (time_after(c->mfc_un.unres.expires, now)) {
645 unsigned long interval = c->mfc_un.unres.expires - now;
646 if (interval < expires)
647 expires = interval;
1da177e4
LT
648 continue;
649 }
650
862465f2 651 list_del(&c->list);
0c12295a 652 ipmr_destroy_unres(mrt, c);
1da177e4
LT
653 }
654
0c12295a
PM
655 if (!list_empty(&mrt->mfc_unres_queue))
656 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
657
658out:
659 spin_unlock(&mfc_unres_lock);
660}
661
662/* Fill oifs list. It is called under write locked mrt_lock. */
663
0c12295a 664static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 665 unsigned char *ttls)
1da177e4
LT
666{
667 int vifi;
668
669 cache->mfc_un.res.minvif = MAXVIFS;
670 cache->mfc_un.res.maxvif = 0;
671 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
672
0c12295a
PM
673 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
674 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 675 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
676 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
677 if (cache->mfc_un.res.minvif > vifi)
678 cache->mfc_un.res.minvif = vifi;
679 if (cache->mfc_un.res.maxvif <= vifi)
680 cache->mfc_un.res.maxvif = vifi + 1;
681 }
682 }
683}
684
0c12295a
PM
685static int vif_add(struct net *net, struct mr_table *mrt,
686 struct vifctl *vifc, int mrtsock)
1da177e4
LT
687{
688 int vifi = vifc->vifc_vifi;
0c12295a 689 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
690 struct net_device *dev;
691 struct in_device *in_dev;
d607032d 692 int err;
1da177e4
LT
693
694 /* Is vif busy ? */
0c12295a 695 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
696 return -EADDRINUSE;
697
698 switch (vifc->vifc_flags) {
699#ifdef CONFIG_IP_PIMSM
700 case VIFF_REGISTER:
701 /*
702 * Special Purpose VIF in PIM
703 * All the packets will be sent to the daemon
704 */
0c12295a 705 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 706 return -EADDRINUSE;
f0ad0860 707 dev = ipmr_reg_vif(net, mrt);
1da177e4
LT
708 if (!dev)
709 return -ENOBUFS;
d607032d
WC
710 err = dev_set_allmulti(dev, 1);
711 if (err) {
712 unregister_netdevice(dev);
7dc00c82 713 dev_put(dev);
d607032d
WC
714 return err;
715 }
1da177e4
LT
716 break;
717#endif
e905a9ed 718 case VIFF_TUNNEL:
4feb88e5 719 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
720 if (!dev)
721 return -ENOBUFS;
d607032d
WC
722 err = dev_set_allmulti(dev, 1);
723 if (err) {
724 ipmr_del_tunnel(dev, vifc);
7dc00c82 725 dev_put(dev);
d607032d
WC
726 return err;
727 }
1da177e4 728 break;
ee5e81f0
I
729
730 case VIFF_USE_IFINDEX:
1da177e4 731 case 0:
ee5e81f0
I
732 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
733 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
95ae6b22 734 if (dev && __in_dev_get_rtnl(dev) == NULL) {
ee5e81f0
I
735 dev_put(dev);
736 return -EADDRNOTAVAIL;
737 }
738 } else
739 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
740
1da177e4
LT
741 if (!dev)
742 return -EADDRNOTAVAIL;
d607032d 743 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
744 if (err) {
745 dev_put(dev);
d607032d 746 return err;
7dc00c82 747 }
1da177e4
LT
748 break;
749 default:
750 return -EINVAL;
751 }
752
d0490cfd
DC
753 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
754 dev_put(dev);
1da177e4 755 return -EADDRNOTAVAIL;
d0490cfd 756 }
42f811b8 757 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
758 ip_rt_multicast_event(in_dev);
759
760 /*
761 * Fill in the VIF structures
762 */
c354e124
JK
763 v->rate_limit = vifc->vifc_rate_limit;
764 v->local = vifc->vifc_lcl_addr.s_addr;
765 v->remote = vifc->vifc_rmt_addr.s_addr;
766 v->flags = vifc->vifc_flags;
1da177e4
LT
767 if (!mrtsock)
768 v->flags |= VIFF_STATIC;
c354e124 769 v->threshold = vifc->vifc_threshold;
1da177e4
LT
770 v->bytes_in = 0;
771 v->bytes_out = 0;
772 v->pkt_in = 0;
773 v->pkt_out = 0;
774 v->link = dev->ifindex;
775 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
776 v->link = dev->iflink;
777
778 /* And finish update writing critical data */
779 write_lock_bh(&mrt_lock);
c354e124 780 v->dev = dev;
1da177e4
LT
781#ifdef CONFIG_IP_PIMSM
782 if (v->flags&VIFF_REGISTER)
0c12295a 783 mrt->mroute_reg_vif_num = vifi;
1da177e4 784#endif
0c12295a
PM
785 if (vifi+1 > mrt->maxvif)
786 mrt->maxvif = vifi+1;
1da177e4
LT
787 write_unlock_bh(&mrt_lock);
788 return 0;
789}
790
a8c9486b 791/* called with rcu_read_lock() */
0c12295a 792static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
793 __be32 origin,
794 __be32 mcastgrp)
1da177e4 795{
c354e124 796 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
797 struct mfc_cache *c;
798
a8c9486b 799 list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
862465f2
PM
800 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
801 return c;
1da177e4 802 }
862465f2 803 return NULL;
1da177e4
LT
804}
805
806/*
807 * Allocate a multicast cache entry
808 */
d658f8a0 809static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 810{
c354e124 811 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
a8c9486b
ED
812
813 if (c)
814 c->mfc_un.res.minvif = MAXVIFS;
1da177e4
LT
815 return c;
816}
817
d658f8a0 818static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 819{
c354e124 820 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
a8c9486b
ED
821
822 if (c) {
823 skb_queue_head_init(&c->mfc_un.unres.unresolved);
824 c->mfc_un.unres.expires = jiffies + 10*HZ;
825 }
1da177e4
LT
826 return c;
827}
828
829/*
830 * A cache entry has gone into a resolved state from queued
831 */
e905a9ed 832
0c12295a
PM
833static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
834 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
835{
836 struct sk_buff *skb;
9ef1d4c7 837 struct nlmsgerr *e;
1da177e4
LT
838
839 /*
840 * Play the pending entries through our router
841 */
842
c354e124 843 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 844 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
845 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
846
cb6a4e46 847 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
848 nlh->nlmsg_len = (skb_tail_pointer(skb) -
849 (u8 *)nlh);
1da177e4
LT
850 } else {
851 nlh->nlmsg_type = NLMSG_ERROR;
852 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
853 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
854 e = NLMSG_DATA(nlh);
855 e->error = -EMSGSIZE;
856 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 857 }
2942e900 858
d658f8a0 859 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 860 } else
0c12295a 861 ip_mr_forward(net, mrt, skb, c, 0);
1da177e4
LT
862 }
863}
864
865/*
866 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
867 * expects the following bizarre scheme.
868 *
869 * Called under mrt_lock.
870 */
e905a9ed 871
0c12295a 872static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 873 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
874{
875 struct sk_buff *skb;
c9bdd4b5 876 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
877 struct igmphdr *igmp;
878 struct igmpmsg *msg;
4c968709 879 struct sock *mroute_sk;
1da177e4
LT
880 int ret;
881
882#ifdef CONFIG_IP_PIMSM
883 if (assert == IGMPMSG_WHOLEPKT)
884 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
885 else
886#endif
887 skb = alloc_skb(128, GFP_ATOMIC);
888
132adf54 889 if (!skb)
1da177e4
LT
890 return -ENOBUFS;
891
892#ifdef CONFIG_IP_PIMSM
893 if (assert == IGMPMSG_WHOLEPKT) {
894 /* Ugly, but we have no choice with this interface.
895 Duplicate old header, fix ihl, length etc.
896 And all this only to mangle msg->im_msgtype and
897 to set msg->im_mbz to "mbz" :-)
898 */
878c8145
ACM
899 skb_push(skb, sizeof(struct iphdr));
900 skb_reset_network_header(skb);
badff6d0 901 skb_reset_transport_header(skb);
0272ffc4 902 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 903 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
904 msg->im_msgtype = IGMPMSG_WHOLEPKT;
905 msg->im_mbz = 0;
0c12295a 906 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
907 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
908 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
909 sizeof(struct iphdr));
e905a9ed 910 } else
1da177e4 911#endif
e905a9ed
YH
912 {
913
1da177e4
LT
914 /*
915 * Copy the IP header
916 */
917
27a884dc 918 skb->network_header = skb->tail;
ddc7b8e3 919 skb_put(skb, ihl);
27d7ff46 920 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
921 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
922 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 923 msg->im_vif = vifi;
adf30907 924 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
925
926 /*
927 * Add our header
928 */
929
c354e124 930 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
931 igmp->type =
932 msg->im_msgtype = assert;
933 igmp->code = 0;
eddc9ec5 934 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 935 skb->transport_header = skb->network_header;
e905a9ed 936 }
1da177e4 937
4c968709
ED
938 rcu_read_lock();
939 mroute_sk = rcu_dereference(mrt->mroute_sk);
940 if (mroute_sk == NULL) {
941 rcu_read_unlock();
1da177e4
LT
942 kfree_skb(skb);
943 return -EINVAL;
944 }
945
946 /*
947 * Deliver to mrouted
948 */
4c968709
ED
949 ret = sock_queue_rcv_skb(mroute_sk, skb);
950 rcu_read_unlock();
70a269e6 951 if (ret < 0) {
1da177e4
LT
952 if (net_ratelimit())
953 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
954 kfree_skb(skb);
955 }
956
957 return ret;
958}
959
960/*
961 * Queue a packet for resolution. It gets locked cache entry!
962 */
e905a9ed 963
1da177e4 964static int
0c12295a 965ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1da177e4 966{
862465f2 967 bool found = false;
1da177e4
LT
968 int err;
969 struct mfc_cache *c;
eddc9ec5 970 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
971
972 spin_lock_bh(&mfc_unres_lock);
0c12295a 973 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 974 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
975 c->mfc_origin == iph->saddr) {
976 found = true;
1da177e4 977 break;
862465f2 978 }
1da177e4
LT
979 }
980
862465f2 981 if (!found) {
1da177e4
LT
982 /*
983 * Create a new entry if allowable
984 */
985
0c12295a 986 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 987 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
988 spin_unlock_bh(&mfc_unres_lock);
989
990 kfree_skb(skb);
991 return -ENOBUFS;
992 }
993
994 /*
995 * Fill in the new cache entry
996 */
eddc9ec5
ACM
997 c->mfc_parent = -1;
998 c->mfc_origin = iph->saddr;
999 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
1000
1001 /*
1002 * Reflect first query at mrouted.
1003 */
0c12295a 1004 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 1005 if (err < 0) {
e905a9ed 1006 /* If the report failed throw the cache entry
1da177e4
LT
1007 out - Brad Parker
1008 */
1009 spin_unlock_bh(&mfc_unres_lock);
1010
5c0a66f5 1011 ipmr_cache_free(c);
1da177e4
LT
1012 kfree_skb(skb);
1013 return err;
1014 }
1015
0c12295a
PM
1016 atomic_inc(&mrt->cache_resolve_queue_len);
1017 list_add(&c->list, &mrt->mfc_unres_queue);
1da177e4 1018
278554bd
DM
1019 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1020 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
1021 }
1022
1023 /*
1024 * See if we can append the packet
1025 */
1026 if (c->mfc_un.unres.unresolved.qlen>3) {
1027 kfree_skb(skb);
1028 err = -ENOBUFS;
1029 } else {
c354e124 1030 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
1031 err = 0;
1032 }
1033
1034 spin_unlock_bh(&mfc_unres_lock);
1035 return err;
1036}
1037
1038/*
1039 * MFC cache manipulation by user space mroute daemon
1040 */
1041
0c12295a 1042static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1da177e4
LT
1043{
1044 int line;
862465f2 1045 struct mfc_cache *c, *next;
1da177e4 1046
c354e124 1047 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1048
0c12295a 1049 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1da177e4
LT
1050 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1051 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
a8c9486b 1052 list_del_rcu(&c->list);
1da177e4 1053
5c0a66f5 1054 ipmr_cache_free(c);
1da177e4
LT
1055 return 0;
1056 }
1057 }
1058 return -ENOENT;
1059}
1060
0c12295a
PM
1061static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1062 struct mfcctl *mfc, int mrtsock)
1da177e4 1063{
862465f2 1064 bool found = false;
1da177e4 1065 int line;
862465f2 1066 struct mfc_cache *uc, *c;
1da177e4 1067
a50436f2
PM
1068 if (mfc->mfcc_parent >= MAXVIFS)
1069 return -ENFILE;
1070
c354e124 1071 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1072
0c12295a 1073 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1da177e4 1074 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
1075 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1076 found = true;
1da177e4 1077 break;
862465f2 1078 }
1da177e4
LT
1079 }
1080
862465f2 1081 if (found) {
1da177e4
LT
1082 write_lock_bh(&mrt_lock);
1083 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1084 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1085 if (!mrtsock)
1086 c->mfc_flags |= MFC_STATIC;
1087 write_unlock_bh(&mrt_lock);
1088 return 0;
1089 }
1090
f97c1e0c 1091 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
1092 return -EINVAL;
1093
d658f8a0 1094 c = ipmr_cache_alloc();
c354e124 1095 if (c == NULL)
1da177e4
LT
1096 return -ENOMEM;
1097
c354e124
JK
1098 c->mfc_origin = mfc->mfcc_origin.s_addr;
1099 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1100 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1101 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1102 if (!mrtsock)
1103 c->mfc_flags |= MFC_STATIC;
1104
a8c9486b 1105 list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
1da177e4
LT
1106
1107 /*
1108 * Check to see if we resolved a queued list. If so we
1109 * need to send on the frames and tidy up.
1110 */
b0ebb739 1111 found = false;
1da177e4 1112 spin_lock_bh(&mfc_unres_lock);
0c12295a 1113 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 1114 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 1115 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 1116 list_del(&uc->list);
0c12295a 1117 atomic_dec(&mrt->cache_resolve_queue_len);
b0ebb739 1118 found = true;
1da177e4
LT
1119 break;
1120 }
1121 }
0c12295a
PM
1122 if (list_empty(&mrt->mfc_unres_queue))
1123 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
1124 spin_unlock_bh(&mfc_unres_lock);
1125
b0ebb739 1126 if (found) {
0c12295a 1127 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 1128 ipmr_cache_free(uc);
1da177e4
LT
1129 }
1130 return 0;
1131}
1132
1133/*
1134 * Close the multicast socket, and clear the vif tables etc
1135 */
e905a9ed 1136
0c12295a 1137static void mroute_clean_tables(struct mr_table *mrt)
1da177e4
LT
1138{
1139 int i;
d17fa6fa 1140 LIST_HEAD(list);
862465f2 1141 struct mfc_cache *c, *next;
e905a9ed 1142
1da177e4
LT
1143 /*
1144 * Shut down all active vif entries
1145 */
0c12295a
PM
1146 for (i = 0; i < mrt->maxvif; i++) {
1147 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1148 vif_delete(mrt, i, 0, &list);
1da177e4 1149 }
d17fa6fa 1150 unregister_netdevice_many(&list);
1da177e4
LT
1151
1152 /*
1153 * Wipe the cache
1154 */
862465f2 1155 for (i = 0; i < MFC_LINES; i++) {
0c12295a 1156 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
a8c9486b 1157 if (c->mfc_flags & MFC_STATIC)
1da177e4 1158 continue;
a8c9486b 1159 list_del_rcu(&c->list);
5c0a66f5 1160 ipmr_cache_free(c);
1da177e4
LT
1161 }
1162 }
1163
0c12295a 1164 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 1165 spin_lock_bh(&mfc_unres_lock);
0c12295a 1166 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
862465f2 1167 list_del(&c->list);
0c12295a 1168 ipmr_destroy_unres(mrt, c);
1da177e4
LT
1169 }
1170 spin_unlock_bh(&mfc_unres_lock);
1171 }
1172}
1173
4c968709
ED
1174/* called from ip_ra_control(), before an RCU grace period,
1175 * we dont need to call synchronize_rcu() here
1176 */
1da177e4
LT
1177static void mrtsock_destruct(struct sock *sk)
1178{
4feb88e5 1179 struct net *net = sock_net(sk);
f0ad0860 1180 struct mr_table *mrt;
4feb88e5 1181
1da177e4 1182 rtnl_lock();
f0ad0860 1183 ipmr_for_each_table(mrt, net) {
4c968709 1184 if (sk == rtnl_dereference(mrt->mroute_sk)) {
f0ad0860 1185 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
4c968709 1186 rcu_assign_pointer(mrt->mroute_sk, NULL);
f0ad0860
PM
1187 mroute_clean_tables(mrt);
1188 }
1da177e4
LT
1189 }
1190 rtnl_unlock();
1191}
1192
1193/*
1194 * Socket options and virtual interface manipulation. The whole
1195 * virtual interface system is a complete heap, but unfortunately
1196 * that's how BSD mrouted happens to think. Maybe one day with a proper
1197 * MOSPF/PIM router set up we can clean this up.
1198 */
e905a9ed 1199
b7058842 1200int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1201{
1202 int ret;
1203 struct vifctl vif;
1204 struct mfcctl mfc;
4feb88e5 1205 struct net *net = sock_net(sk);
f0ad0860
PM
1206 struct mr_table *mrt;
1207
1208 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1209 if (mrt == NULL)
1210 return -ENOENT;
e905a9ed 1211
132adf54 1212 if (optname != MRT_INIT) {
4c968709
ED
1213 if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
1214 !capable(CAP_NET_ADMIN))
1da177e4
LT
1215 return -EACCES;
1216 }
1217
132adf54
SH
1218 switch (optname) {
1219 case MRT_INIT:
1220 if (sk->sk_type != SOCK_RAW ||
c720c7e8 1221 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 1222 return -EOPNOTSUPP;
c354e124 1223 if (optlen != sizeof(int))
132adf54 1224 return -ENOPROTOOPT;
1da177e4 1225
132adf54 1226 rtnl_lock();
4c968709 1227 if (rtnl_dereference(mrt->mroute_sk)) {
1da177e4 1228 rtnl_unlock();
132adf54
SH
1229 return -EADDRINUSE;
1230 }
1231
1232 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1233 if (ret == 0) {
4c968709 1234 rcu_assign_pointer(mrt->mroute_sk, sk);
4feb88e5 1235 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
1236 }
1237 rtnl_unlock();
1238 return ret;
1239 case MRT_DONE:
4c968709 1240 if (sk != rcu_dereference_raw(mrt->mroute_sk))
132adf54
SH
1241 return -EACCES;
1242 return ip_ra_control(sk, 0, NULL);
1243 case MRT_ADD_VIF:
1244 case MRT_DEL_VIF:
c354e124 1245 if (optlen != sizeof(vif))
132adf54 1246 return -EINVAL;
c354e124 1247 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
1248 return -EFAULT;
1249 if (vif.vifc_vifi >= MAXVIFS)
1250 return -ENFILE;
1251 rtnl_lock();
c354e124 1252 if (optname == MRT_ADD_VIF) {
4c968709
ED
1253 ret = vif_add(net, mrt, &vif,
1254 sk == rtnl_dereference(mrt->mroute_sk));
132adf54 1255 } else {
0c12295a 1256 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54
SH
1257 }
1258 rtnl_unlock();
1259 return ret;
1da177e4
LT
1260
1261 /*
1262 * Manipulate the forwarding caches. These live
1263 * in a sort of kernel/user symbiosis.
1264 */
132adf54
SH
1265 case MRT_ADD_MFC:
1266 case MRT_DEL_MFC:
c354e124 1267 if (optlen != sizeof(mfc))
132adf54 1268 return -EINVAL;
c354e124 1269 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1270 return -EFAULT;
1271 rtnl_lock();
c354e124 1272 if (optname == MRT_DEL_MFC)
0c12295a 1273 ret = ipmr_mfc_delete(mrt, &mfc);
132adf54 1274 else
4c968709
ED
1275 ret = ipmr_mfc_add(net, mrt, &mfc,
1276 sk == rtnl_dereference(mrt->mroute_sk));
132adf54
SH
1277 rtnl_unlock();
1278 return ret;
1da177e4
LT
1279 /*
1280 * Control PIM assert.
1281 */
132adf54
SH
1282 case MRT_ASSERT:
1283 {
1284 int v;
1285 if (get_user(v,(int __user *)optval))
1286 return -EFAULT;
0c12295a 1287 mrt->mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1288 return 0;
1289 }
1da177e4 1290#ifdef CONFIG_IP_PIMSM
132adf54
SH
1291 case MRT_PIM:
1292 {
ba93ef74
SH
1293 int v;
1294
132adf54
SH
1295 if (get_user(v,(int __user *)optval))
1296 return -EFAULT;
ba93ef74
SH
1297 v = (v) ? 1 : 0;
1298
132adf54
SH
1299 rtnl_lock();
1300 ret = 0;
0c12295a
PM
1301 if (v != mrt->mroute_do_pim) {
1302 mrt->mroute_do_pim = v;
1303 mrt->mroute_do_assert = v;
1da177e4 1304 }
132adf54
SH
1305 rtnl_unlock();
1306 return ret;
1307 }
f0ad0860
PM
1308#endif
1309#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1310 case MRT_TABLE:
1311 {
1312 u32 v;
1313
1314 if (optlen != sizeof(u32))
1315 return -EINVAL;
1316 if (get_user(v, (u32 __user *)optval))
1317 return -EFAULT;
f0ad0860
PM
1318
1319 rtnl_lock();
1320 ret = 0;
4c968709
ED
1321 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1322 ret = -EBUSY;
1323 } else {
1324 if (!ipmr_new_table(net, v))
1325 ret = -ENOMEM;
1326 raw_sk(sk)->ipmr_table = v;
1327 }
f0ad0860
PM
1328 rtnl_unlock();
1329 return ret;
1330 }
1da177e4 1331#endif
132adf54
SH
1332 /*
1333 * Spurious command, or MRT_VERSION which you cannot
1334 * set.
1335 */
1336 default:
1337 return -ENOPROTOOPT;
1da177e4
LT
1338 }
1339}
1340
1341/*
1342 * Getsock opt support for the multicast routing system.
1343 */
e905a9ed 1344
c354e124 1345int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1346{
1347 int olr;
1348 int val;
4feb88e5 1349 struct net *net = sock_net(sk);
f0ad0860
PM
1350 struct mr_table *mrt;
1351
1352 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1353 if (mrt == NULL)
1354 return -ENOENT;
1da177e4 1355
c354e124 1356 if (optname != MRT_VERSION &&
1da177e4
LT
1357#ifdef CONFIG_IP_PIMSM
1358 optname!=MRT_PIM &&
1359#endif
1360 optname!=MRT_ASSERT)
1361 return -ENOPROTOOPT;
1362
1363 if (get_user(olr, optlen))
1364 return -EFAULT;
1365
1366 olr = min_t(unsigned int, olr, sizeof(int));
1367 if (olr < 0)
1368 return -EINVAL;
e905a9ed 1369
c354e124 1370 if (put_user(olr, optlen))
1da177e4 1371 return -EFAULT;
c354e124
JK
1372 if (optname == MRT_VERSION)
1373 val = 0x0305;
1da177e4 1374#ifdef CONFIG_IP_PIMSM
c354e124 1375 else if (optname == MRT_PIM)
0c12295a 1376 val = mrt->mroute_do_pim;
1da177e4
LT
1377#endif
1378 else
0c12295a 1379 val = mrt->mroute_do_assert;
c354e124 1380 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1381 return -EFAULT;
1382 return 0;
1383}
1384
1385/*
1386 * The IP multicast ioctl support routines.
1387 */
e905a9ed 1388
1da177e4
LT
1389int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1390{
1391 struct sioc_sg_req sr;
1392 struct sioc_vif_req vr;
1393 struct vif_device *vif;
1394 struct mfc_cache *c;
4feb88e5 1395 struct net *net = sock_net(sk);
f0ad0860
PM
1396 struct mr_table *mrt;
1397
1398 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1399 if (mrt == NULL)
1400 return -ENOENT;
e905a9ed 1401
132adf54
SH
1402 switch (cmd) {
1403 case SIOCGETVIFCNT:
c354e124 1404 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1405 return -EFAULT;
0c12295a 1406 if (vr.vifi >= mrt->maxvif)
132adf54
SH
1407 return -EINVAL;
1408 read_lock(&mrt_lock);
0c12295a
PM
1409 vif = &mrt->vif_table[vr.vifi];
1410 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1411 vr.icount = vif->pkt_in;
1412 vr.ocount = vif->pkt_out;
1413 vr.ibytes = vif->bytes_in;
1414 vr.obytes = vif->bytes_out;
1da177e4 1415 read_unlock(&mrt_lock);
1da177e4 1416
c354e124 1417 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1418 return -EFAULT;
1419 return 0;
1420 }
1421 read_unlock(&mrt_lock);
1422 return -EADDRNOTAVAIL;
1423 case SIOCGETSGCNT:
c354e124 1424 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1425 return -EFAULT;
1426
a8c9486b 1427 rcu_read_lock();
0c12295a 1428 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1429 if (c) {
1430 sr.pktcnt = c->mfc_un.res.pkt;
1431 sr.bytecnt = c->mfc_un.res.bytes;
1432 sr.wrong_if = c->mfc_un.res.wrong_if;
a8c9486b 1433 rcu_read_unlock();
132adf54 1434
c354e124 1435 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1436 return -EFAULT;
1437 return 0;
1438 }
a8c9486b 1439 rcu_read_unlock();
132adf54
SH
1440 return -EADDRNOTAVAIL;
1441 default:
1442 return -ENOIOCTLCMD;
1da177e4
LT
1443 }
1444}
1445
1446
1447static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1448{
e9dc8653 1449 struct net_device *dev = ptr;
4feb88e5 1450 struct net *net = dev_net(dev);
f0ad0860 1451 struct mr_table *mrt;
1da177e4
LT
1452 struct vif_device *v;
1453 int ct;
d17fa6fa 1454 LIST_HEAD(list);
e9dc8653 1455
1da177e4
LT
1456 if (event != NETDEV_UNREGISTER)
1457 return NOTIFY_DONE;
f0ad0860
PM
1458
1459 ipmr_for_each_table(mrt, net) {
1460 v = &mrt->vif_table[0];
1461 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1462 if (v->dev == dev)
1463 vif_delete(mrt, ct, 1, &list);
1464 }
1da177e4 1465 }
d17fa6fa 1466 unregister_netdevice_many(&list);
1da177e4
LT
1467 return NOTIFY_DONE;
1468}
1469
1470
c354e124 1471static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1472 .notifier_call = ipmr_device_event,
1473};
1474
1475/*
1476 * Encapsulate a packet by attaching a valid IPIP header to it.
1477 * This avoids tunnel drivers and other mess and gives us the speed so
1478 * important for multicast video.
1479 */
e905a9ed 1480
114c7844 1481static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1482{
8856dfa3 1483 struct iphdr *iph;
eddc9ec5 1484 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1485
1486 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1487 skb->transport_header = skb->network_header;
8856dfa3 1488 skb_reset_network_header(skb);
eddc9ec5 1489 iph = ip_hdr(skb);
1da177e4
LT
1490
1491 iph->version = 4;
e023dd64
ACM
1492 iph->tos = old_iph->tos;
1493 iph->ttl = old_iph->ttl;
1da177e4
LT
1494 iph->frag_off = 0;
1495 iph->daddr = daddr;
1496 iph->saddr = saddr;
1497 iph->protocol = IPPROTO_IPIP;
1498 iph->ihl = 5;
1499 iph->tot_len = htons(skb->len);
adf30907 1500 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1501 ip_send_check(iph);
1502
1da177e4
LT
1503 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1504 nf_reset(skb);
1505}
1506
1507static inline int ipmr_forward_finish(struct sk_buff *skb)
1508{
1509 struct ip_options * opt = &(IPCB(skb)->opt);
1510
adf30907 1511 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1512
1513 if (unlikely(opt->optlen))
1514 ip_forward_options(skb);
1515
1516 return dst_output(skb);
1517}
1518
1519/*
1520 * Processing handlers for ipmr_forward
1521 */
1522
0c12295a
PM
1523static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1524 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1da177e4 1525{
eddc9ec5 1526 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1527 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1528 struct net_device *dev;
1529 struct rtable *rt;
1530 int encap = 0;
1531
1532 if (vif->dev == NULL)
1533 goto out_free;
1534
1535#ifdef CONFIG_IP_PIMSM
1536 if (vif->flags & VIFF_REGISTER) {
1537 vif->pkt_out++;
c354e124 1538 vif->bytes_out += skb->len;
cf3677ae
PE
1539 vif->dev->stats.tx_bytes += skb->len;
1540 vif->dev->stats.tx_packets++;
0c12295a 1541 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1542 goto out_free;
1da177e4
LT
1543 }
1544#endif
1545
1546 if (vif->flags&VIFF_TUNNEL) {
1547 struct flowi fl = { .oif = vif->link,
1548 .nl_u = { .ip4_u =
1549 { .daddr = vif->remote,
1550 .saddr = vif->local,
1551 .tos = RT_TOS(iph->tos) } },
1552 .proto = IPPROTO_IPIP };
4feb88e5 1553 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1554 goto out_free;
1555 encap = sizeof(struct iphdr);
1556 } else {
1557 struct flowi fl = { .oif = vif->link,
1558 .nl_u = { .ip4_u =
1559 { .daddr = iph->daddr,
1560 .tos = RT_TOS(iph->tos) } },
1561 .proto = IPPROTO_IPIP };
4feb88e5 1562 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1563 goto out_free;
1564 }
1565
d8d1f30b 1566 dev = rt->dst.dev;
1da177e4 1567
d8d1f30b 1568 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1da177e4
LT
1569 /* Do not fragment multicasts. Alas, IPv4 does not
1570 allow to send ICMP, so that packets will disappear
1571 to blackhole.
1572 */
1573
7c73a6fa 1574 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1575 ip_rt_put(rt);
1576 goto out_free;
1577 }
1578
d8d1f30b 1579 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1da177e4
LT
1580
1581 if (skb_cow(skb, encap)) {
e905a9ed 1582 ip_rt_put(rt);
1da177e4
LT
1583 goto out_free;
1584 }
1585
1586 vif->pkt_out++;
c354e124 1587 vif->bytes_out += skb->len;
1da177e4 1588
adf30907 1589 skb_dst_drop(skb);
d8d1f30b 1590 skb_dst_set(skb, &rt->dst);
eddc9ec5 1591 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1592
1593 /* FIXME: forward and output firewalls used to be called here.
1594 * What do we do with netfilter? -- RR */
1595 if (vif->flags & VIFF_TUNNEL) {
1596 ip_encap(skb, vif->local, vif->remote);
1597 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1598 vif->dev->stats.tx_packets++;
1599 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1600 }
1601
1602 IPCB(skb)->flags |= IPSKB_FORWARDED;
1603
1604 /*
1605 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1606 * not only before forwarding, but after forwarding on all output
1607 * interfaces. It is clear, if mrouter runs a multicasting
1608 * program, it should receive packets not depending to what interface
1609 * program is joined.
1610 * If we will not make it, the program will have to join on all
1611 * interfaces. On the other hand, multihoming host (or router, but
1612 * not mrouter) cannot join to more than one interface - it will
1613 * result in receiving multiple packets.
1614 */
9bbc768a 1615 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1616 ipmr_forward_finish);
1617 return;
1618
1619out_free:
1620 kfree_skb(skb);
1da177e4
LT
1621}
1622
0c12295a 1623static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1624{
1625 int ct;
0c12295a
PM
1626
1627 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1628 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
1629 break;
1630 }
1631 return ct;
1632}
1633
1634/* "local" means that we should preserve one skb (for local delivery) */
1635
0c12295a
PM
1636static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1637 struct sk_buff *skb, struct mfc_cache *cache,
1638 int local)
1da177e4
LT
1639{
1640 int psend = -1;
1641 int vif, ct;
1642
1643 vif = cache->mfc_parent;
1644 cache->mfc_un.res.pkt++;
1645 cache->mfc_un.res.bytes += skb->len;
1646
1647 /*
1648 * Wrong interface: drop packet and (maybe) send PIM assert.
1649 */
0c12295a 1650 if (mrt->vif_table[vif].dev != skb->dev) {
1da177e4
LT
1651 int true_vifi;
1652
511c3f92 1653 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1654 /* It is our own packet, looped back.
1655 Very complicated situation...
1656
1657 The best workaround until routing daemons will be
1658 fixed is not to redistribute packet, if it was
1659 send through wrong interface. It means, that
1660 multicast applications WILL NOT work for
1661 (S,G), which have default multicast route pointing
1662 to wrong oif. In any case, it is not a good
1663 idea to use multicasting applications on router.
1664 */
1665 goto dont_forward;
1666 }
1667
1668 cache->mfc_un.res.wrong_if++;
0c12295a 1669 true_vifi = ipmr_find_vif(mrt, skb->dev);
1da177e4 1670
0c12295a 1671 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4
LT
1672 /* pimsm uses asserts, when switching from RPT to SPT,
1673 so that we cannot check that packet arrived on an oif.
1674 It is bad, but otherwise we would need to move pretty
1675 large chunk of pimd to kernel. Ough... --ANK
1676 */
0c12295a 1677 (mrt->mroute_do_pim ||
6f9374a9 1678 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1679 time_after(jiffies,
1da177e4
LT
1680 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1681 cache->mfc_un.res.last_assert = jiffies;
0c12295a 1682 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1683 }
1684 goto dont_forward;
1685 }
1686
0c12295a
PM
1687 mrt->vif_table[vif].pkt_in++;
1688 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1689
1690 /*
1691 * Forward the frame
1692 */
1693 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1694 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1695 if (psend != -1) {
1696 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1697 if (skb2)
0c12295a
PM
1698 ipmr_queue_xmit(net, mrt, skb2, cache,
1699 psend);
1da177e4 1700 }
c354e124 1701 psend = ct;
1da177e4
LT
1702 }
1703 }
1704 if (psend != -1) {
1705 if (local) {
1706 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1707 if (skb2)
0c12295a 1708 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1da177e4 1709 } else {
0c12295a 1710 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1da177e4
LT
1711 return 0;
1712 }
1713 }
1714
1715dont_forward:
1716 if (!local)
1717 kfree_skb(skb);
1718 return 0;
1719}
1720
1721
1722/*
1723 * Multicast packets for forwarding arrive here
4c968709 1724 * Called with rcu_read_lock();
1da177e4
LT
1725 */
1726
1727int ip_mr_input(struct sk_buff *skb)
1728{
1729 struct mfc_cache *cache;
4feb88e5 1730 struct net *net = dev_net(skb->dev);
511c3f92 1731 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
f0ad0860
PM
1732 struct mr_table *mrt;
1733 int err;
1da177e4
LT
1734
1735 /* Packet is looped back after forward, it should not be
1736 forwarded second time, but still can be delivered locally.
1737 */
4c968709 1738 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1da177e4
LT
1739 goto dont_forward;
1740
f0ad0860 1741 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
e40dbc51
BG
1742 if (err < 0) {
1743 kfree_skb(skb);
f0ad0860 1744 return err;
e40dbc51 1745 }
f0ad0860 1746
1da177e4 1747 if (!local) {
4c968709
ED
1748 if (IPCB(skb)->opt.router_alert) {
1749 if (ip_call_ra_chain(skb))
1750 return 0;
1751 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
1752 /* IGMPv1 (and broken IGMPv2 implementations sort of
1753 * Cisco IOS <= 11.2(8)) do not put router alert
1754 * option to IGMP packets destined to routable
1755 * groups. It is very bad, because it means
1756 * that we can forward NO IGMP messages.
1757 */
1758 struct sock *mroute_sk;
1759
1760 mroute_sk = rcu_dereference(mrt->mroute_sk);
1761 if (mroute_sk) {
1762 nf_reset(skb);
1763 raw_rcv(mroute_sk, skb);
1764 return 0;
1765 }
1da177e4
LT
1766 }
1767 }
1768
a8c9486b 1769 /* already under rcu_read_lock() */
0c12295a 1770 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1771
1772 /*
1773 * No usable cache entry
1774 */
c354e124 1775 if (cache == NULL) {
1da177e4
LT
1776 int vif;
1777
1778 if (local) {
1779 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1780 ip_local_deliver(skb);
a8c9486b 1781 if (skb2 == NULL)
1da177e4 1782 return -ENOBUFS;
1da177e4
LT
1783 skb = skb2;
1784 }
1785
a8c9486b 1786 read_lock(&mrt_lock);
0c12295a 1787 vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 1788 if (vif >= 0) {
0eae88f3 1789 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1da177e4
LT
1790 read_unlock(&mrt_lock);
1791
0eae88f3 1792 return err2;
1da177e4
LT
1793 }
1794 read_unlock(&mrt_lock);
1795 kfree_skb(skb);
1796 return -ENODEV;
1797 }
1798
a8c9486b 1799 read_lock(&mrt_lock);
0c12295a 1800 ip_mr_forward(net, mrt, skb, cache, local);
1da177e4
LT
1801 read_unlock(&mrt_lock);
1802
1803 if (local)
1804 return ip_local_deliver(skb);
1805
1806 return 0;
1807
1808dont_forward:
1809 if (local)
1810 return ip_local_deliver(skb);
1811 kfree_skb(skb);
1812 return 0;
1813}
1814
b1879204 1815#ifdef CONFIG_IP_PIMSM
55747a0a 1816/* called with rcu_read_lock() */
f0ad0860
PM
1817static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1818 unsigned int pimlen)
1da177e4 1819{
b1879204
IJ
1820 struct net_device *reg_dev = NULL;
1821 struct iphdr *encap;
1da177e4 1822
b1879204 1823 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1824 /*
1825 Check that:
1826 a. packet is really destinted to a multicast group
1827 b. packet is not a NULL-REGISTER
1828 c. packet is not truncated
1829 */
f97c1e0c 1830 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1831 encap->tot_len == 0 ||
b1879204
IJ
1832 ntohs(encap->tot_len) + pimlen > skb->len)
1833 return 1;
1da177e4
LT
1834
1835 read_lock(&mrt_lock);
0c12295a
PM
1836 if (mrt->mroute_reg_vif_num >= 0)
1837 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1da177e4
LT
1838 read_unlock(&mrt_lock);
1839
e905a9ed 1840 if (reg_dev == NULL)
b1879204 1841 return 1;
1da177e4 1842
b0e380b1 1843 skb->mac_header = skb->network_header;
55747a0a 1844 skb_pull(skb, (u8 *)encap - skb->data);
31c7711b 1845 skb_reset_network_header(skb);
1da177e4 1846 skb->protocol = htons(ETH_P_IP);
55747a0a 1847 skb->ip_summed = CHECKSUM_NONE;
1da177e4 1848 skb->pkt_type = PACKET_HOST;
d19d56dd
ED
1849
1850 skb_tunnel_rx(skb, reg_dev);
1851
1da177e4 1852 netif_rx(skb);
b1879204 1853
55747a0a 1854 return NET_RX_SUCCESS;
b1879204
IJ
1855}
1856#endif
1857
1858#ifdef CONFIG_IP_PIMSM_V1
1859/*
1860 * Handle IGMP messages of PIMv1
1861 */
1862
1863int pim_rcv_v1(struct sk_buff * skb)
1864{
1865 struct igmphdr *pim;
4feb88e5 1866 struct net *net = dev_net(skb->dev);
f0ad0860 1867 struct mr_table *mrt;
b1879204
IJ
1868
1869 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1870 goto drop;
1871
1872 pim = igmp_hdr(skb);
1873
f0ad0860
PM
1874 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1875 goto drop;
1876
0c12295a 1877 if (!mrt->mroute_do_pim ||
b1879204
IJ
1878 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1879 goto drop;
1880
f0ad0860 1881 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1882drop:
1883 kfree_skb(skb);
1884 }
1da177e4
LT
1885 return 0;
1886}
1887#endif
1888
1889#ifdef CONFIG_IP_PIMSM_V2
1890static int pim_rcv(struct sk_buff * skb)
1891{
1892 struct pimreghdr *pim;
f0ad0860
PM
1893 struct net *net = dev_net(skb->dev);
1894 struct mr_table *mrt;
1da177e4 1895
b1879204 1896 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1897 goto drop;
1898
9c70220b 1899 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1900 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1901 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1902 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1903 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1904 goto drop;
1905
f0ad0860
PM
1906 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1907 goto drop;
1908
1909 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1910drop:
1911 kfree_skb(skb);
1912 }
1da177e4
LT
1913 return 0;
1914}
1915#endif
1916
cb6a4e46
PM
1917static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1918 struct mfc_cache *c, struct rtmsg *rtm)
1da177e4
LT
1919{
1920 int ct;
1921 struct rtnexthop *nhp;
27a884dc 1922 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1923 struct rtattr *mp_head;
1924
7438189b 1925 /* If cache is unresolved, don't try to parse IIF and OIF */
ed0f160a 1926 if (c->mfc_parent >= MAXVIFS)
7438189b
ND
1927 return -ENOENT;
1928
0c12295a
PM
1929 if (VIF_EXISTS(mrt, c->mfc_parent))
1930 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1931
c354e124 1932 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1933
1934 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 1935 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1936 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1937 goto rtattr_failure;
c354e124 1938 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1939 nhp->rtnh_flags = 0;
1940 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 1941 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
1942 nhp->rtnh_len = sizeof(*nhp);
1943 }
1944 }
1945 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1946 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1947 rtm->rtm_type = RTN_MULTICAST;
1948 return 1;
1949
1950rtattr_failure:
dc5fc579 1951 nlmsg_trim(skb, b);
1da177e4
LT
1952 return -EMSGSIZE;
1953}
1954
4feb88e5
BT
1955int ipmr_get_route(struct net *net,
1956 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1957{
1958 int err;
f0ad0860 1959 struct mr_table *mrt;
1da177e4 1960 struct mfc_cache *cache;
511c3f92 1961 struct rtable *rt = skb_rtable(skb);
1da177e4 1962
f0ad0860
PM
1963 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1964 if (mrt == NULL)
1965 return -ENOENT;
1966
a8c9486b 1967 rcu_read_lock();
0c12295a 1968 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1da177e4 1969
c354e124 1970 if (cache == NULL) {
72287490 1971 struct sk_buff *skb2;
eddc9ec5 1972 struct iphdr *iph;
1da177e4
LT
1973 struct net_device *dev;
1974 int vif;
1975
1976 if (nowait) {
a8c9486b 1977 rcu_read_unlock();
1da177e4
LT
1978 return -EAGAIN;
1979 }
1980
1981 dev = skb->dev;
a8c9486b 1982 read_lock(&mrt_lock);
0c12295a 1983 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1da177e4 1984 read_unlock(&mrt_lock);
a8c9486b 1985 rcu_read_unlock();
1da177e4
LT
1986 return -ENODEV;
1987 }
72287490
AK
1988 skb2 = skb_clone(skb, GFP_ATOMIC);
1989 if (!skb2) {
1990 read_unlock(&mrt_lock);
a8c9486b 1991 rcu_read_unlock();
72287490
AK
1992 return -ENOMEM;
1993 }
1994
e2d1bca7
ACM
1995 skb_push(skb2, sizeof(struct iphdr));
1996 skb_reset_network_header(skb2);
eddc9ec5
ACM
1997 iph = ip_hdr(skb2);
1998 iph->ihl = sizeof(struct iphdr) >> 2;
1999 iph->saddr = rt->rt_src;
2000 iph->daddr = rt->rt_dst;
2001 iph->version = 0;
0c12295a 2002 err = ipmr_cache_unresolved(mrt, vif, skb2);
1da177e4 2003 read_unlock(&mrt_lock);
a8c9486b 2004 rcu_read_unlock();
1da177e4
LT
2005 return err;
2006 }
2007
a8c9486b
ED
2008 read_lock(&mrt_lock);
2009 if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
1da177e4 2010 cache->mfc_flags |= MFC_NOTIFY;
cb6a4e46 2011 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4 2012 read_unlock(&mrt_lock);
a8c9486b 2013 rcu_read_unlock();
1da177e4
LT
2014 return err;
2015}
2016
cb6a4e46
PM
2017static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2018 u32 pid, u32 seq, struct mfc_cache *c)
2019{
2020 struct nlmsghdr *nlh;
2021 struct rtmsg *rtm;
2022
2023 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2024 if (nlh == NULL)
2025 return -EMSGSIZE;
2026
2027 rtm = nlmsg_data(nlh);
2028 rtm->rtm_family = RTNL_FAMILY_IPMR;
2029 rtm->rtm_dst_len = 32;
2030 rtm->rtm_src_len = 32;
2031 rtm->rtm_tos = 0;
2032 rtm->rtm_table = mrt->id;
2033 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2034 rtm->rtm_type = RTN_MULTICAST;
2035 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2036 rtm->rtm_protocol = RTPROT_UNSPEC;
2037 rtm->rtm_flags = 0;
2038
2039 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2040 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2041
2042 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2043 goto nla_put_failure;
2044
2045 return nlmsg_end(skb, nlh);
2046
2047nla_put_failure:
2048 nlmsg_cancel(skb, nlh);
2049 return -EMSGSIZE;
2050}
2051
2052static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2053{
2054 struct net *net = sock_net(skb->sk);
2055 struct mr_table *mrt;
2056 struct mfc_cache *mfc;
2057 unsigned int t = 0, s_t;
2058 unsigned int h = 0, s_h;
2059 unsigned int e = 0, s_e;
2060
2061 s_t = cb->args[0];
2062 s_h = cb->args[1];
2063 s_e = cb->args[2];
2064
a8c9486b 2065 rcu_read_lock();
cb6a4e46
PM
2066 ipmr_for_each_table(mrt, net) {
2067 if (t < s_t)
2068 goto next_table;
2069 if (t > s_t)
2070 s_h = 0;
2071 for (h = s_h; h < MFC_LINES; h++) {
a8c9486b 2072 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
cb6a4e46
PM
2073 if (e < s_e)
2074 goto next_entry;
2075 if (ipmr_fill_mroute(mrt, skb,
2076 NETLINK_CB(cb->skb).pid,
2077 cb->nlh->nlmsg_seq,
2078 mfc) < 0)
2079 goto done;
2080next_entry:
2081 e++;
2082 }
2083 e = s_e = 0;
2084 }
2085 s_h = 0;
2086next_table:
2087 t++;
2088 }
2089done:
a8c9486b 2090 rcu_read_unlock();
cb6a4e46
PM
2091
2092 cb->args[2] = e;
2093 cb->args[1] = h;
2094 cb->args[0] = t;
2095
2096 return skb->len;
2097}
2098
e905a9ed 2099#ifdef CONFIG_PROC_FS
1da177e4
LT
2100/*
2101 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2102 */
2103struct ipmr_vif_iter {
f6bb4514 2104 struct seq_net_private p;
f0ad0860 2105 struct mr_table *mrt;
1da177e4
LT
2106 int ct;
2107};
2108
f6bb4514
BT
2109static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2110 struct ipmr_vif_iter *iter,
1da177e4
LT
2111 loff_t pos)
2112{
f0ad0860 2113 struct mr_table *mrt = iter->mrt;
0c12295a
PM
2114
2115 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2116 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2117 continue;
e905a9ed 2118 if (pos-- == 0)
0c12295a 2119 return &mrt->vif_table[iter->ct];
1da177e4
LT
2120 }
2121 return NULL;
2122}
2123
2124static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 2125 __acquires(mrt_lock)
1da177e4 2126{
f0ad0860 2127 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2128 struct net *net = seq_file_net(seq);
f0ad0860
PM
2129 struct mr_table *mrt;
2130
2131 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2132 if (mrt == NULL)
2133 return ERR_PTR(-ENOENT);
2134
2135 iter->mrt = mrt;
f6bb4514 2136
1da177e4 2137 read_lock(&mrt_lock);
f6bb4514 2138 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2139 : SEQ_START_TOKEN;
2140}
2141
2142static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2143{
2144 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2145 struct net *net = seq_file_net(seq);
f0ad0860 2146 struct mr_table *mrt = iter->mrt;
1da177e4
LT
2147
2148 ++*pos;
2149 if (v == SEQ_START_TOKEN)
f6bb4514 2150 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 2151
0c12295a
PM
2152 while (++iter->ct < mrt->maxvif) {
2153 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2154 continue;
0c12295a 2155 return &mrt->vif_table[iter->ct];
1da177e4
LT
2156 }
2157 return NULL;
2158}
2159
2160static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 2161 __releases(mrt_lock)
1da177e4
LT
2162{
2163 read_unlock(&mrt_lock);
2164}
2165
2166static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2167{
f0ad0860
PM
2168 struct ipmr_vif_iter *iter = seq->private;
2169 struct mr_table *mrt = iter->mrt;
f6bb4514 2170
1da177e4 2171 if (v == SEQ_START_TOKEN) {
e905a9ed 2172 seq_puts(seq,
1da177e4
LT
2173 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2174 } else {
2175 const struct vif_device *vif = v;
2176 const char *name = vif->dev ? vif->dev->name : "none";
2177
2178 seq_printf(seq,
2179 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 2180 vif - mrt->vif_table,
e905a9ed 2181 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
2182 vif->bytes_out, vif->pkt_out,
2183 vif->flags, vif->local, vif->remote);
2184 }
2185 return 0;
2186}
2187
f690808e 2188static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
2189 .start = ipmr_vif_seq_start,
2190 .next = ipmr_vif_seq_next,
2191 .stop = ipmr_vif_seq_stop,
2192 .show = ipmr_vif_seq_show,
2193};
2194
2195static int ipmr_vif_open(struct inode *inode, struct file *file)
2196{
f6bb4514
BT
2197 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2198 sizeof(struct ipmr_vif_iter));
1da177e4
LT
2199}
2200
9a32144e 2201static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
2202 .owner = THIS_MODULE,
2203 .open = ipmr_vif_open,
2204 .read = seq_read,
2205 .llseek = seq_lseek,
f6bb4514 2206 .release = seq_release_net,
1da177e4
LT
2207};
2208
2209struct ipmr_mfc_iter {
f6bb4514 2210 struct seq_net_private p;
f0ad0860 2211 struct mr_table *mrt;
862465f2 2212 struct list_head *cache;
1da177e4
LT
2213 int ct;
2214};
2215
2216
f6bb4514
BT
2217static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2218 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 2219{
f0ad0860 2220 struct mr_table *mrt = it->mrt;
1da177e4
LT
2221 struct mfc_cache *mfc;
2222
a8c9486b 2223 rcu_read_lock();
862465f2 2224 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
0c12295a 2225 it->cache = &mrt->mfc_cache_array[it->ct];
a8c9486b 2226 list_for_each_entry_rcu(mfc, it->cache, list)
e905a9ed 2227 if (pos-- == 0)
1da177e4 2228 return mfc;
862465f2 2229 }
a8c9486b 2230 rcu_read_unlock();
1da177e4 2231
1da177e4 2232 spin_lock_bh(&mfc_unres_lock);
0c12295a 2233 it->cache = &mrt->mfc_unres_queue;
862465f2 2234 list_for_each_entry(mfc, it->cache, list)
e258beb2 2235 if (pos-- == 0)
1da177e4
LT
2236 return mfc;
2237 spin_unlock_bh(&mfc_unres_lock);
2238
2239 it->cache = NULL;
2240 return NULL;
2241}
2242
2243
2244static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2245{
2246 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2247 struct net *net = seq_file_net(seq);
f0ad0860 2248 struct mr_table *mrt;
f6bb4514 2249
f0ad0860
PM
2250 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2251 if (mrt == NULL)
2252 return ERR_PTR(-ENOENT);
f6bb4514 2253
f0ad0860 2254 it->mrt = mrt;
1da177e4
LT
2255 it->cache = NULL;
2256 it->ct = 0;
f6bb4514 2257 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2258 : SEQ_START_TOKEN;
2259}
2260
2261static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2262{
2263 struct mfc_cache *mfc = v;
2264 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2265 struct net *net = seq_file_net(seq);
f0ad0860 2266 struct mr_table *mrt = it->mrt;
1da177e4
LT
2267
2268 ++*pos;
2269
2270 if (v == SEQ_START_TOKEN)
f6bb4514 2271 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 2272
862465f2
PM
2273 if (mfc->list.next != it->cache)
2274 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 2275
0c12295a 2276 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
2277 goto end_of_list;
2278
0c12295a 2279 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1da177e4
LT
2280
2281 while (++it->ct < MFC_LINES) {
0c12295a 2282 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2
PM
2283 if (list_empty(it->cache))
2284 continue;
2285 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2286 }
2287
2288 /* exhausted cache_array, show unresolved */
a8c9486b 2289 rcu_read_unlock();
0c12295a 2290 it->cache = &mrt->mfc_unres_queue;
1da177e4 2291 it->ct = 0;
e905a9ed 2292
1da177e4 2293 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
2294 if (!list_empty(it->cache))
2295 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2296
2297 end_of_list:
2298 spin_unlock_bh(&mfc_unres_lock);
2299 it->cache = NULL;
2300
2301 return NULL;
2302}
2303
2304static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2305{
2306 struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2307 struct mr_table *mrt = it->mrt;
1da177e4 2308
0c12295a 2309 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 2310 spin_unlock_bh(&mfc_unres_lock);
0c12295a 2311 else if (it->cache == &mrt->mfc_cache_array[it->ct])
a8c9486b 2312 rcu_read_unlock();
1da177e4
LT
2313}
2314
2315static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2316{
2317 int n;
2318
2319 if (v == SEQ_START_TOKEN) {
e905a9ed 2320 seq_puts(seq,
1da177e4
LT
2321 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2322 } else {
2323 const struct mfc_cache *mfc = v;
2324 const struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2325 const struct mr_table *mrt = it->mrt;
e905a9ed 2326
0eae88f3
ED
2327 seq_printf(seq, "%08X %08X %-3hd",
2328 (__force u32) mfc->mfc_mcastgrp,
2329 (__force u32) mfc->mfc_origin,
1ea472e2 2330 mfc->mfc_parent);
1da177e4 2331
0c12295a 2332 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
2333 seq_printf(seq, " %8lu %8lu %8lu",
2334 mfc->mfc_un.res.pkt,
2335 mfc->mfc_un.res.bytes,
2336 mfc->mfc_un.res.wrong_if);
132adf54
SH
2337 for (n = mfc->mfc_un.res.minvif;
2338 n < mfc->mfc_un.res.maxvif; n++ ) {
0c12295a 2339 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
2340 mfc->mfc_un.res.ttls[n] < 255)
2341 seq_printf(seq,
e905a9ed 2342 " %2d:%-3d",
1da177e4
LT
2343 n, mfc->mfc_un.res.ttls[n]);
2344 }
1ea472e2
BT
2345 } else {
2346 /* unresolved mfc_caches don't contain
2347 * pkt, bytes and wrong_if values
2348 */
2349 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
2350 }
2351 seq_putc(seq, '\n');
2352 }
2353 return 0;
2354}
2355
f690808e 2356static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
2357 .start = ipmr_mfc_seq_start,
2358 .next = ipmr_mfc_seq_next,
2359 .stop = ipmr_mfc_seq_stop,
2360 .show = ipmr_mfc_seq_show,
2361};
2362
2363static int ipmr_mfc_open(struct inode *inode, struct file *file)
2364{
f6bb4514
BT
2365 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2366 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
2367}
2368
9a32144e 2369static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
2370 .owner = THIS_MODULE,
2371 .open = ipmr_mfc_open,
2372 .read = seq_read,
2373 .llseek = seq_lseek,
f6bb4514 2374 .release = seq_release_net,
1da177e4 2375};
e905a9ed 2376#endif
1da177e4
LT
2377
2378#ifdef CONFIG_IP_PIMSM_V2
32613090 2379static const struct net_protocol pim_protocol = {
1da177e4 2380 .handler = pim_rcv,
403dbb97 2381 .netns_ok = 1,
1da177e4
LT
2382};
2383#endif
2384
2385
2386/*
2387 * Setup for IP multicast routing
2388 */
cf958ae3
BT
2389static int __net_init ipmr_net_init(struct net *net)
2390{
f0ad0860 2391 int err;
cf958ae3 2392
f0ad0860
PM
2393 err = ipmr_rules_init(net);
2394 if (err < 0)
cf958ae3 2395 goto fail;
f6bb4514
BT
2396
2397#ifdef CONFIG_PROC_FS
2398 err = -ENOMEM;
2399 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2400 goto proc_vif_fail;
2401 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2402 goto proc_cache_fail;
2403#endif
2bb8b26c
BT
2404 return 0;
2405
f6bb4514
BT
2406#ifdef CONFIG_PROC_FS
2407proc_cache_fail:
2408 proc_net_remove(net, "ip_mr_vif");
2409proc_vif_fail:
f0ad0860 2410 ipmr_rules_exit(net);
f6bb4514 2411#endif
cf958ae3
BT
2412fail:
2413 return err;
2414}
2415
2416static void __net_exit ipmr_net_exit(struct net *net)
2417{
f6bb4514
BT
2418#ifdef CONFIG_PROC_FS
2419 proc_net_remove(net, "ip_mr_cache");
2420 proc_net_remove(net, "ip_mr_vif");
2421#endif
f0ad0860 2422 ipmr_rules_exit(net);
cf958ae3
BT
2423}
2424
2425static struct pernet_operations ipmr_net_ops = {
2426 .init = ipmr_net_init,
2427 .exit = ipmr_net_exit,
2428};
e905a9ed 2429
03d2f897 2430int __init ip_mr_init(void)
1da177e4 2431{
03d2f897
WC
2432 int err;
2433
1da177e4
LT
2434 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2435 sizeof(struct mfc_cache),
a8c9486b 2436 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
20c2df83 2437 NULL);
03d2f897
WC
2438 if (!mrt_cachep)
2439 return -ENOMEM;
2440
cf958ae3
BT
2441 err = register_pernet_subsys(&ipmr_net_ops);
2442 if (err)
2443 goto reg_pernet_fail;
2444
03d2f897
WC
2445 err = register_netdevice_notifier(&ip_mr_notifier);
2446 if (err)
2447 goto reg_notif_fail;
403dbb97
TG
2448#ifdef CONFIG_IP_PIMSM_V2
2449 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2450 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2451 err = -EAGAIN;
2452 goto add_proto_fail;
2453 }
2454#endif
cb6a4e46 2455 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
03d2f897 2456 return 0;
f6bb4514 2457
403dbb97
TG
2458#ifdef CONFIG_IP_PIMSM_V2
2459add_proto_fail:
2460 unregister_netdevice_notifier(&ip_mr_notifier);
2461#endif
c3e38896 2462reg_notif_fail:
cf958ae3
BT
2463 unregister_pernet_subsys(&ipmr_net_ops);
2464reg_pernet_fail:
c3e38896 2465 kmem_cache_destroy(mrt_cachep);
03d2f897 2466 return err;
1da177e4 2467}