]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
ipmr: __pim_rcv() is called under rcu_read_lock
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
f77f13e2 25 * Relax this requirement to work with older peers.
1da177e4
LT
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
f0ad0860 66#include <net/fib_rules.h>
1da177e4
LT
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
0c12295a 72struct mr_table {
f0ad0860 73 struct list_head list;
8de53dfb
PM
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
f0ad0860 77 u32 id;
0c12295a
PM
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
f0ad0860
PM
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
1da177e4
LT
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
0c12295a 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
1da177e4
LT
111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
e18b890b 123static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 124
f0ad0860 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
0c12295a
PM
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 130 struct sk_buff *pkt, vifi_t vifi, int assert);
cb6a4e46
PM
131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
f0ad0860
PM
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
1da177e4 169
f0ad0860
PM
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
3d0c9c4e 219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
25239cee 220 .family = RTNL_FAMILY_IPMR,
f0ad0860
PM
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
035320d5
ED
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
f0ad0860 272 kfree(mrt);
035320d5 273 }
f0ad0860
PM
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
275}
276#else
277#define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
279
280static struct mr_table *ipmr_get_table(struct net *net, u32 id)
281{
282 return net->ipv4.mrt;
283}
284
285static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
287{
288 *mrt = net->ipv4.mrt;
289 return 0;
290}
291
292static int __net_init ipmr_rules_init(struct net *net)
293{
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
296}
297
298static void __net_exit ipmr_rules_exit(struct net *net)
299{
300 kfree(net->ipv4.mrt);
301}
302#endif
303
304static struct mr_table *ipmr_new_table(struct net *net, u32 id)
305{
306 struct mr_table *mrt;
307 unsigned int i;
1da177e4 308
f0ad0860
PM
309 mrt = ipmr_get_table(net, id);
310 if (mrt != NULL)
311 return mrt;
312
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 if (mrt == NULL)
315 return NULL;
8de53dfb 316 write_pnet(&mrt->net, net);
f0ad0860
PM
317 mrt->id = id;
318
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
322
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
324
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 (unsigned long)mrt);
327
328#ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
330#endif
331#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333#endif
334 return mrt;
335}
1da177e4
LT
336
337/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
338
d607032d
WC
339static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
340{
4feb88e5
BT
341 struct net *net = dev_net(dev);
342
d607032d
WC
343 dev_close(dev);
344
4feb88e5 345 dev = __dev_get_by_name(net, "tunl0");
d607032d 346 if (dev) {
5bc3eb7e 347 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 348 struct ifreq ifr;
d607032d
WC
349 struct ip_tunnel_parm p;
350
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354 p.iph.version = 4;
355 p.iph.ihl = 5;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
359
5bc3eb7e
SH
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
362
363 set_fs(KERNEL_DS);
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 set_fs(oldfs);
366 }
d607032d
WC
367 }
368}
369
1da177e4 370static
4feb88e5 371struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
372{
373 struct net_device *dev;
374
4feb88e5 375 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
376
377 if (dev) {
5bc3eb7e 378 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
379 int err;
380 struct ifreq ifr;
1da177e4
LT
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
383
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387 p.iph.version = 4;
388 p.iph.ihl = 5;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 392
5bc3eb7e
SH
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
395
396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs);
399 } else
400 err = -EOPNOTSUPP;
1da177e4
LT
401
402 dev = NULL;
403
4feb88e5
BT
404 if (err == 0 &&
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
406 dev->flags |= IFF_MULTICAST;
407
e5ed6399 408 in_dev = __in_dev_get_rtnl(dev);
71e27da9 409 if (in_dev == NULL)
1da177e4 410 goto failure;
71e27da9
HX
411
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
414
415 if (dev_open(dev))
416 goto failure;
7dc00c82 417 dev_hold(dev);
1da177e4
LT
418 }
419 }
420 return dev;
421
422failure:
423 /* allow the register to be completed before unregistering. */
424 rtnl_unlock();
425 rtnl_lock();
426
427 unregister_netdevice(dev);
428 return NULL;
429}
430
431#ifdef CONFIG_IP_PIMSM
432
6fef4c0c 433static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 434{
4feb88e5 435 struct net *net = dev_net(dev);
f0ad0860
PM
436 struct mr_table *mrt;
437 struct flowi fl = {
438 .oif = dev->ifindex,
439 .iif = skb->skb_iif,
440 .mark = skb->mark,
441 };
442 int err;
443
444 err = ipmr_fib_lookup(net, &fl, &mrt);
e40dbc51
BG
445 if (err < 0) {
446 kfree_skb(skb);
f0ad0860 447 return err;
e40dbc51 448 }
4feb88e5 449
1da177e4 450 read_lock(&mrt_lock);
cf3677ae
PE
451 dev->stats.tx_bytes += skb->len;
452 dev->stats.tx_packets++;
0c12295a 453 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
454 read_unlock(&mrt_lock);
455 kfree_skb(skb);
6ed10654 456 return NETDEV_TX_OK;
1da177e4
LT
457}
458
007c3838
SH
459static const struct net_device_ops reg_vif_netdev_ops = {
460 .ndo_start_xmit = reg_vif_xmit,
461};
462
1da177e4
LT
463static void reg_vif_setup(struct net_device *dev)
464{
465 dev->type = ARPHRD_PIMREG;
46f25dff 466 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 467 dev->flags = IFF_NOARP;
007c3838 468 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 469 dev->destructor = free_netdev;
403dbb97 470 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
471}
472
f0ad0860 473static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
1da177e4
LT
474{
475 struct net_device *dev;
476 struct in_device *in_dev;
f0ad0860 477 char name[IFNAMSIZ];
1da177e4 478
f0ad0860
PM
479 if (mrt->id == RT_TABLE_DEFAULT)
480 sprintf(name, "pimreg");
481 else
482 sprintf(name, "pimreg%u", mrt->id);
1da177e4 483
f0ad0860 484 dev = alloc_netdev(0, name, reg_vif_setup);
1da177e4
LT
485
486 if (dev == NULL)
487 return NULL;
488
403dbb97
TG
489 dev_net_set(dev, net);
490
1da177e4
LT
491 if (register_netdevice(dev)) {
492 free_netdev(dev);
493 return NULL;
494 }
495 dev->iflink = 0;
496
71e27da9
HX
497 rcu_read_lock();
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
499 rcu_read_unlock();
1da177e4 500 goto failure;
71e27da9 501 }
1da177e4 502
71e27da9
HX
503 ipv4_devconf_setall(in_dev);
504 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
505 rcu_read_unlock();
1da177e4
LT
506
507 if (dev_open(dev))
508 goto failure;
509
7dc00c82
WC
510 dev_hold(dev);
511
1da177e4
LT
512 return dev;
513
514failure:
515 /* allow the register to be completed before unregistering. */
516 rtnl_unlock();
517 rtnl_lock();
518
519 unregister_netdevice(dev);
520 return NULL;
521}
522#endif
523
524/*
525 * Delete a VIF entry
7dc00c82 526 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 527 */
e905a9ed 528
0c12295a 529static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 530 struct list_head *head)
1da177e4
LT
531{
532 struct vif_device *v;
533 struct net_device *dev;
534 struct in_device *in_dev;
535
0c12295a 536 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
537 return -EADDRNOTAVAIL;
538
0c12295a 539 v = &mrt->vif_table[vifi];
1da177e4
LT
540
541 write_lock_bh(&mrt_lock);
542 dev = v->dev;
543 v->dev = NULL;
544
545 if (!dev) {
546 write_unlock_bh(&mrt_lock);
547 return -EADDRNOTAVAIL;
548 }
549
550#ifdef CONFIG_IP_PIMSM
0c12295a
PM
551 if (vifi == mrt->mroute_reg_vif_num)
552 mrt->mroute_reg_vif_num = -1;
1da177e4
LT
553#endif
554
0c12295a 555 if (vifi+1 == mrt->maxvif) {
1da177e4
LT
556 int tmp;
557 for (tmp=vifi-1; tmp>=0; tmp--) {
0c12295a 558 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
559 break;
560 }
0c12295a 561 mrt->maxvif = tmp+1;
1da177e4
LT
562 }
563
564 write_unlock_bh(&mrt_lock);
565
566 dev_set_allmulti(dev, -1);
567
e5ed6399 568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
570 ip_rt_multicast_event(in_dev);
571 }
572
7dc00c82 573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 574 unregister_netdevice_queue(dev, head);
1da177e4
LT
575
576 dev_put(dev);
577 return 0;
578}
579
5c0a66f5
BT
580static inline void ipmr_cache_free(struct mfc_cache *c)
581{
5c0a66f5
BT
582 kmem_cache_free(mrt_cachep, c);
583}
584
1da177e4
LT
585/* Destroy an unresolved cache entry, killing queued skbs
586 and reporting error to netlink readers.
587 */
588
0c12295a 589static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 590{
8de53dfb 591 struct net *net = read_pnet(&mrt->net);
1da177e4 592 struct sk_buff *skb;
9ef1d4c7 593 struct nlmsgerr *e;
1da177e4 594
0c12295a 595 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 596
c354e124 597 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 598 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
599 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
600 nlh->nlmsg_type = NLMSG_ERROR;
601 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
602 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
603 e = NLMSG_DATA(nlh);
604 e->error = -ETIMEDOUT;
605 memset(&e->msg, 0, sizeof(e->msg));
2942e900 606
4feb88e5 607 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
608 } else
609 kfree_skb(skb);
610 }
611
5c0a66f5 612 ipmr_cache_free(c);
1da177e4
LT
613}
614
615
e258beb2 616/* Timer process for the unresolved queue. */
1da177e4 617
e258beb2 618static void ipmr_expire_process(unsigned long arg)
1da177e4 619{
0c12295a 620 struct mr_table *mrt = (struct mr_table *)arg;
1da177e4
LT
621 unsigned long now;
622 unsigned long expires;
862465f2 623 struct mfc_cache *c, *next;
1da177e4
LT
624
625 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 626 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
627 return;
628 }
629
0c12295a 630 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
631 goto out;
632
633 now = jiffies;
634 expires = 10*HZ;
1da177e4 635
0c12295a 636 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
637 if (time_after(c->mfc_un.unres.expires, now)) {
638 unsigned long interval = c->mfc_un.unres.expires - now;
639 if (interval < expires)
640 expires = interval;
1da177e4
LT
641 continue;
642 }
643
862465f2 644 list_del(&c->list);
0c12295a 645 ipmr_destroy_unres(mrt, c);
1da177e4
LT
646 }
647
0c12295a
PM
648 if (!list_empty(&mrt->mfc_unres_queue))
649 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
650
651out:
652 spin_unlock(&mfc_unres_lock);
653}
654
655/* Fill oifs list. It is called under write locked mrt_lock. */
656
0c12295a 657static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 658 unsigned char *ttls)
1da177e4
LT
659{
660 int vifi;
661
662 cache->mfc_un.res.minvif = MAXVIFS;
663 cache->mfc_un.res.maxvif = 0;
664 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
665
0c12295a
PM
666 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
667 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 668 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
669 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
670 if (cache->mfc_un.res.minvif > vifi)
671 cache->mfc_un.res.minvif = vifi;
672 if (cache->mfc_un.res.maxvif <= vifi)
673 cache->mfc_un.res.maxvif = vifi + 1;
674 }
675 }
676}
677
0c12295a
PM
678static int vif_add(struct net *net, struct mr_table *mrt,
679 struct vifctl *vifc, int mrtsock)
1da177e4
LT
680{
681 int vifi = vifc->vifc_vifi;
0c12295a 682 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
683 struct net_device *dev;
684 struct in_device *in_dev;
d607032d 685 int err;
1da177e4
LT
686
687 /* Is vif busy ? */
0c12295a 688 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
689 return -EADDRINUSE;
690
691 switch (vifc->vifc_flags) {
692#ifdef CONFIG_IP_PIMSM
693 case VIFF_REGISTER:
694 /*
695 * Special Purpose VIF in PIM
696 * All the packets will be sent to the daemon
697 */
0c12295a 698 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 699 return -EADDRINUSE;
f0ad0860 700 dev = ipmr_reg_vif(net, mrt);
1da177e4
LT
701 if (!dev)
702 return -ENOBUFS;
d607032d
WC
703 err = dev_set_allmulti(dev, 1);
704 if (err) {
705 unregister_netdevice(dev);
7dc00c82 706 dev_put(dev);
d607032d
WC
707 return err;
708 }
1da177e4
LT
709 break;
710#endif
e905a9ed 711 case VIFF_TUNNEL:
4feb88e5 712 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
713 if (!dev)
714 return -ENOBUFS;
d607032d
WC
715 err = dev_set_allmulti(dev, 1);
716 if (err) {
717 ipmr_del_tunnel(dev, vifc);
7dc00c82 718 dev_put(dev);
d607032d
WC
719 return err;
720 }
1da177e4 721 break;
ee5e81f0
I
722
723 case VIFF_USE_IFINDEX:
1da177e4 724 case 0:
ee5e81f0
I
725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
95ae6b22 727 if (dev && __in_dev_get_rtnl(dev) == NULL) {
ee5e81f0
I
728 dev_put(dev);
729 return -EADDRNOTAVAIL;
730 }
731 } else
732 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
733
1da177e4
LT
734 if (!dev)
735 return -EADDRNOTAVAIL;
d607032d 736 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
737 if (err) {
738 dev_put(dev);
d607032d 739 return err;
7dc00c82 740 }
1da177e4
LT
741 break;
742 default:
743 return -EINVAL;
744 }
745
d0490cfd
DC
746 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
747 dev_put(dev);
1da177e4 748 return -EADDRNOTAVAIL;
d0490cfd 749 }
42f811b8 750 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
751 ip_rt_multicast_event(in_dev);
752
753 /*
754 * Fill in the VIF structures
755 */
c354e124
JK
756 v->rate_limit = vifc->vifc_rate_limit;
757 v->local = vifc->vifc_lcl_addr.s_addr;
758 v->remote = vifc->vifc_rmt_addr.s_addr;
759 v->flags = vifc->vifc_flags;
1da177e4
LT
760 if (!mrtsock)
761 v->flags |= VIFF_STATIC;
c354e124 762 v->threshold = vifc->vifc_threshold;
1da177e4
LT
763 v->bytes_in = 0;
764 v->bytes_out = 0;
765 v->pkt_in = 0;
766 v->pkt_out = 0;
767 v->link = dev->ifindex;
768 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
769 v->link = dev->iflink;
770
771 /* And finish update writing critical data */
772 write_lock_bh(&mrt_lock);
c354e124 773 v->dev = dev;
1da177e4
LT
774#ifdef CONFIG_IP_PIMSM
775 if (v->flags&VIFF_REGISTER)
0c12295a 776 mrt->mroute_reg_vif_num = vifi;
1da177e4 777#endif
0c12295a
PM
778 if (vifi+1 > mrt->maxvif)
779 mrt->maxvif = vifi+1;
1da177e4
LT
780 write_unlock_bh(&mrt_lock);
781 return 0;
782}
783
0c12295a 784static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
785 __be32 origin,
786 __be32 mcastgrp)
1da177e4 787{
c354e124 788 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
789 struct mfc_cache *c;
790
0c12295a 791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
862465f2
PM
792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
793 return c;
1da177e4 794 }
862465f2 795 return NULL;
1da177e4
LT
796}
797
798/*
799 * Allocate a multicast cache entry
800 */
d658f8a0 801static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 802{
c354e124
JK
803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
804 if (c == NULL)
1da177e4 805 return NULL;
1da177e4
LT
806 c->mfc_un.res.minvif = MAXVIFS;
807 return c;
808}
809
d658f8a0 810static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 811{
c354e124
JK
812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
813 if (c == NULL)
1da177e4 814 return NULL;
1da177e4
LT
815 skb_queue_head_init(&c->mfc_un.unres.unresolved);
816 c->mfc_un.unres.expires = jiffies + 10*HZ;
817 return c;
818}
819
820/*
821 * A cache entry has gone into a resolved state from queued
822 */
e905a9ed 823
0c12295a
PM
824static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
825 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
826{
827 struct sk_buff *skb;
9ef1d4c7 828 struct nlmsgerr *e;
1da177e4
LT
829
830 /*
831 * Play the pending entries through our router
832 */
833
c354e124 834 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 835 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
837
cb6a4e46 838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
839 nlh->nlmsg_len = (skb_tail_pointer(skb) -
840 (u8 *)nlh);
1da177e4
LT
841 } else {
842 nlh->nlmsg_type = NLMSG_ERROR;
843 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
844 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
845 e = NLMSG_DATA(nlh);
846 e->error = -EMSGSIZE;
847 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 848 }
2942e900 849
d658f8a0 850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 851 } else
0c12295a 852 ip_mr_forward(net, mrt, skb, c, 0);
1da177e4
LT
853 }
854}
855
856/*
857 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
858 * expects the following bizarre scheme.
859 *
860 * Called under mrt_lock.
861 */
e905a9ed 862
0c12295a 863static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 864 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
865{
866 struct sk_buff *skb;
c9bdd4b5 867 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
868 struct igmphdr *igmp;
869 struct igmpmsg *msg;
870 int ret;
871
872#ifdef CONFIG_IP_PIMSM
873 if (assert == IGMPMSG_WHOLEPKT)
874 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
875 else
876#endif
877 skb = alloc_skb(128, GFP_ATOMIC);
878
132adf54 879 if (!skb)
1da177e4
LT
880 return -ENOBUFS;
881
882#ifdef CONFIG_IP_PIMSM
883 if (assert == IGMPMSG_WHOLEPKT) {
884 /* Ugly, but we have no choice with this interface.
885 Duplicate old header, fix ihl, length etc.
886 And all this only to mangle msg->im_msgtype and
887 to set msg->im_mbz to "mbz" :-)
888 */
878c8145
ACM
889 skb_push(skb, sizeof(struct iphdr));
890 skb_reset_network_header(skb);
badff6d0 891 skb_reset_transport_header(skb);
0272ffc4 892 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 893 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
894 msg->im_msgtype = IGMPMSG_WHOLEPKT;
895 msg->im_mbz = 0;
0c12295a 896 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
897 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
898 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
899 sizeof(struct iphdr));
e905a9ed 900 } else
1da177e4 901#endif
e905a9ed
YH
902 {
903
1da177e4
LT
904 /*
905 * Copy the IP header
906 */
907
27a884dc 908 skb->network_header = skb->tail;
ddc7b8e3 909 skb_put(skb, ihl);
27d7ff46 910 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
911 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
912 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 913 msg->im_vif = vifi;
adf30907 914 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
915
916 /*
917 * Add our header
918 */
919
c354e124 920 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
921 igmp->type =
922 msg->im_msgtype = assert;
923 igmp->code = 0;
eddc9ec5 924 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 925 skb->transport_header = skb->network_header;
e905a9ed 926 }
1da177e4 927
0c12295a 928 if (mrt->mroute_sk == NULL) {
1da177e4
LT
929 kfree_skb(skb);
930 return -EINVAL;
931 }
932
933 /*
934 * Deliver to mrouted
935 */
0c12295a 936 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
70a269e6 937 if (ret < 0) {
1da177e4
LT
938 if (net_ratelimit())
939 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
940 kfree_skb(skb);
941 }
942
943 return ret;
944}
945
946/*
947 * Queue a packet for resolution. It gets locked cache entry!
948 */
e905a9ed 949
1da177e4 950static int
0c12295a 951ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1da177e4 952{
862465f2 953 bool found = false;
1da177e4
LT
954 int err;
955 struct mfc_cache *c;
eddc9ec5 956 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
957
958 spin_lock_bh(&mfc_unres_lock);
0c12295a 959 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 960 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
961 c->mfc_origin == iph->saddr) {
962 found = true;
1da177e4 963 break;
862465f2 964 }
1da177e4
LT
965 }
966
862465f2 967 if (!found) {
1da177e4
LT
968 /*
969 * Create a new entry if allowable
970 */
971
0c12295a 972 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 973 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
974 spin_unlock_bh(&mfc_unres_lock);
975
976 kfree_skb(skb);
977 return -ENOBUFS;
978 }
979
980 /*
981 * Fill in the new cache entry
982 */
eddc9ec5
ACM
983 c->mfc_parent = -1;
984 c->mfc_origin = iph->saddr;
985 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
986
987 /*
988 * Reflect first query at mrouted.
989 */
0c12295a 990 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 991 if (err < 0) {
e905a9ed 992 /* If the report failed throw the cache entry
1da177e4
LT
993 out - Brad Parker
994 */
995 spin_unlock_bh(&mfc_unres_lock);
996
5c0a66f5 997 ipmr_cache_free(c);
1da177e4
LT
998 kfree_skb(skb);
999 return err;
1000 }
1001
0c12295a
PM
1002 atomic_inc(&mrt->cache_resolve_queue_len);
1003 list_add(&c->list, &mrt->mfc_unres_queue);
1da177e4 1004
278554bd
DM
1005 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1006 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
1007 }
1008
1009 /*
1010 * See if we can append the packet
1011 */
1012 if (c->mfc_un.unres.unresolved.qlen>3) {
1013 kfree_skb(skb);
1014 err = -ENOBUFS;
1015 } else {
c354e124 1016 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
1017 err = 0;
1018 }
1019
1020 spin_unlock_bh(&mfc_unres_lock);
1021 return err;
1022}
1023
1024/*
1025 * MFC cache manipulation by user space mroute daemon
1026 */
1027
0c12295a 1028static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1da177e4
LT
1029{
1030 int line;
862465f2 1031 struct mfc_cache *c, *next;
1da177e4 1032
c354e124 1033 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1034
0c12295a 1035 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1da177e4
LT
1036 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1037 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1038 write_lock_bh(&mrt_lock);
862465f2 1039 list_del(&c->list);
1da177e4
LT
1040 write_unlock_bh(&mrt_lock);
1041
5c0a66f5 1042 ipmr_cache_free(c);
1da177e4
LT
1043 return 0;
1044 }
1045 }
1046 return -ENOENT;
1047}
1048
0c12295a
PM
1049static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1050 struct mfcctl *mfc, int mrtsock)
1da177e4 1051{
862465f2 1052 bool found = false;
1da177e4 1053 int line;
862465f2 1054 struct mfc_cache *uc, *c;
1da177e4 1055
a50436f2
PM
1056 if (mfc->mfcc_parent >= MAXVIFS)
1057 return -ENFILE;
1058
c354e124 1059 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1060
0c12295a 1061 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1da177e4 1062 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
1063 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1064 found = true;
1da177e4 1065 break;
862465f2 1066 }
1da177e4
LT
1067 }
1068
862465f2 1069 if (found) {
1da177e4
LT
1070 write_lock_bh(&mrt_lock);
1071 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1072 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1073 if (!mrtsock)
1074 c->mfc_flags |= MFC_STATIC;
1075 write_unlock_bh(&mrt_lock);
1076 return 0;
1077 }
1078
f97c1e0c 1079 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
1080 return -EINVAL;
1081
d658f8a0 1082 c = ipmr_cache_alloc();
c354e124 1083 if (c == NULL)
1da177e4
LT
1084 return -ENOMEM;
1085
c354e124
JK
1086 c->mfc_origin = mfc->mfcc_origin.s_addr;
1087 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1088 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1089 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1090 if (!mrtsock)
1091 c->mfc_flags |= MFC_STATIC;
1092
1093 write_lock_bh(&mrt_lock);
0c12295a 1094 list_add(&c->list, &mrt->mfc_cache_array[line]);
1da177e4
LT
1095 write_unlock_bh(&mrt_lock);
1096
1097 /*
1098 * Check to see if we resolved a queued list. If so we
1099 * need to send on the frames and tidy up.
1100 */
b0ebb739 1101 found = false;
1da177e4 1102 spin_lock_bh(&mfc_unres_lock);
0c12295a 1103 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 1104 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 1105 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 1106 list_del(&uc->list);
0c12295a 1107 atomic_dec(&mrt->cache_resolve_queue_len);
b0ebb739 1108 found = true;
1da177e4
LT
1109 break;
1110 }
1111 }
0c12295a
PM
1112 if (list_empty(&mrt->mfc_unres_queue))
1113 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
1114 spin_unlock_bh(&mfc_unres_lock);
1115
b0ebb739 1116 if (found) {
0c12295a 1117 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 1118 ipmr_cache_free(uc);
1da177e4
LT
1119 }
1120 return 0;
1121}
1122
1123/*
1124 * Close the multicast socket, and clear the vif tables etc
1125 */
e905a9ed 1126
0c12295a 1127static void mroute_clean_tables(struct mr_table *mrt)
1da177e4
LT
1128{
1129 int i;
d17fa6fa 1130 LIST_HEAD(list);
862465f2 1131 struct mfc_cache *c, *next;
e905a9ed 1132
1da177e4
LT
1133 /*
1134 * Shut down all active vif entries
1135 */
0c12295a
PM
1136 for (i = 0; i < mrt->maxvif; i++) {
1137 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1138 vif_delete(mrt, i, 0, &list);
1da177e4 1139 }
d17fa6fa 1140 unregister_netdevice_many(&list);
1da177e4
LT
1141
1142 /*
1143 * Wipe the cache
1144 */
862465f2 1145 for (i = 0; i < MFC_LINES; i++) {
0c12295a 1146 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
862465f2 1147 if (c->mfc_flags&MFC_STATIC)
1da177e4 1148 continue;
1da177e4 1149 write_lock_bh(&mrt_lock);
862465f2 1150 list_del(&c->list);
1da177e4
LT
1151 write_unlock_bh(&mrt_lock);
1152
5c0a66f5 1153 ipmr_cache_free(c);
1da177e4
LT
1154 }
1155 }
1156
0c12295a 1157 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 1158 spin_lock_bh(&mfc_unres_lock);
0c12295a 1159 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
862465f2 1160 list_del(&c->list);
0c12295a 1161 ipmr_destroy_unres(mrt, c);
1da177e4
LT
1162 }
1163 spin_unlock_bh(&mfc_unres_lock);
1164 }
1165}
1166
1167static void mrtsock_destruct(struct sock *sk)
1168{
4feb88e5 1169 struct net *net = sock_net(sk);
f0ad0860 1170 struct mr_table *mrt;
4feb88e5 1171
1da177e4 1172 rtnl_lock();
f0ad0860
PM
1173 ipmr_for_each_table(mrt, net) {
1174 if (sk == mrt->mroute_sk) {
1175 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4 1176
f0ad0860
PM
1177 write_lock_bh(&mrt_lock);
1178 mrt->mroute_sk = NULL;
1179 write_unlock_bh(&mrt_lock);
1da177e4 1180
f0ad0860
PM
1181 mroute_clean_tables(mrt);
1182 }
1da177e4
LT
1183 }
1184 rtnl_unlock();
1185}
1186
1187/*
1188 * Socket options and virtual interface manipulation. The whole
1189 * virtual interface system is a complete heap, but unfortunately
1190 * that's how BSD mrouted happens to think. Maybe one day with a proper
1191 * MOSPF/PIM router set up we can clean this up.
1192 */
e905a9ed 1193
b7058842 1194int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1195{
1196 int ret;
1197 struct vifctl vif;
1198 struct mfcctl mfc;
4feb88e5 1199 struct net *net = sock_net(sk);
f0ad0860
PM
1200 struct mr_table *mrt;
1201
1202 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1203 if (mrt == NULL)
1204 return -ENOENT;
e905a9ed 1205
132adf54 1206 if (optname != MRT_INIT) {
0c12295a 1207 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
1208 return -EACCES;
1209 }
1210
132adf54
SH
1211 switch (optname) {
1212 case MRT_INIT:
1213 if (sk->sk_type != SOCK_RAW ||
c720c7e8 1214 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 1215 return -EOPNOTSUPP;
c354e124 1216 if (optlen != sizeof(int))
132adf54 1217 return -ENOPROTOOPT;
1da177e4 1218
132adf54 1219 rtnl_lock();
0c12295a 1220 if (mrt->mroute_sk) {
1da177e4 1221 rtnl_unlock();
132adf54
SH
1222 return -EADDRINUSE;
1223 }
1224
1225 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226 if (ret == 0) {
1227 write_lock_bh(&mrt_lock);
0c12295a 1228 mrt->mroute_sk = sk;
132adf54
SH
1229 write_unlock_bh(&mrt_lock);
1230
4feb88e5 1231 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
1232 }
1233 rtnl_unlock();
1234 return ret;
1235 case MRT_DONE:
0c12295a 1236 if (sk != mrt->mroute_sk)
132adf54
SH
1237 return -EACCES;
1238 return ip_ra_control(sk, 0, NULL);
1239 case MRT_ADD_VIF:
1240 case MRT_DEL_VIF:
c354e124 1241 if (optlen != sizeof(vif))
132adf54 1242 return -EINVAL;
c354e124 1243 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
1244 return -EFAULT;
1245 if (vif.vifc_vifi >= MAXVIFS)
1246 return -ENFILE;
1247 rtnl_lock();
c354e124 1248 if (optname == MRT_ADD_VIF) {
0c12295a 1249 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
132adf54 1250 } else {
0c12295a 1251 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54
SH
1252 }
1253 rtnl_unlock();
1254 return ret;
1da177e4
LT
1255
1256 /*
1257 * Manipulate the forwarding caches. These live
1258 * in a sort of kernel/user symbiosis.
1259 */
132adf54
SH
1260 case MRT_ADD_MFC:
1261 case MRT_DEL_MFC:
c354e124 1262 if (optlen != sizeof(mfc))
132adf54 1263 return -EINVAL;
c354e124 1264 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1265 return -EFAULT;
1266 rtnl_lock();
c354e124 1267 if (optname == MRT_DEL_MFC)
0c12295a 1268 ret = ipmr_mfc_delete(mrt, &mfc);
132adf54 1269 else
0c12295a 1270 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
132adf54
SH
1271 rtnl_unlock();
1272 return ret;
1da177e4
LT
1273 /*
1274 * Control PIM assert.
1275 */
132adf54
SH
1276 case MRT_ASSERT:
1277 {
1278 int v;
1279 if (get_user(v,(int __user *)optval))
1280 return -EFAULT;
0c12295a 1281 mrt->mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1282 return 0;
1283 }
1da177e4 1284#ifdef CONFIG_IP_PIMSM
132adf54
SH
1285 case MRT_PIM:
1286 {
ba93ef74
SH
1287 int v;
1288
132adf54
SH
1289 if (get_user(v,(int __user *)optval))
1290 return -EFAULT;
ba93ef74
SH
1291 v = (v) ? 1 : 0;
1292
132adf54
SH
1293 rtnl_lock();
1294 ret = 0;
0c12295a
PM
1295 if (v != mrt->mroute_do_pim) {
1296 mrt->mroute_do_pim = v;
1297 mrt->mroute_do_assert = v;
1da177e4 1298 }
132adf54
SH
1299 rtnl_unlock();
1300 return ret;
1301 }
f0ad0860
PM
1302#endif
1303#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1304 case MRT_TABLE:
1305 {
1306 u32 v;
1307
1308 if (optlen != sizeof(u32))
1309 return -EINVAL;
1310 if (get_user(v, (u32 __user *)optval))
1311 return -EFAULT;
1312 if (sk == mrt->mroute_sk)
1313 return -EBUSY;
1314
1315 rtnl_lock();
1316 ret = 0;
1317 if (!ipmr_new_table(net, v))
1318 ret = -ENOMEM;
1319 raw_sk(sk)->ipmr_table = v;
1320 rtnl_unlock();
1321 return ret;
1322 }
1da177e4 1323#endif
132adf54
SH
1324 /*
1325 * Spurious command, or MRT_VERSION which you cannot
1326 * set.
1327 */
1328 default:
1329 return -ENOPROTOOPT;
1da177e4
LT
1330 }
1331}
1332
1333/*
1334 * Getsock opt support for the multicast routing system.
1335 */
e905a9ed 1336
c354e124 1337int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1338{
1339 int olr;
1340 int val;
4feb88e5 1341 struct net *net = sock_net(sk);
f0ad0860
PM
1342 struct mr_table *mrt;
1343
1344 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1345 if (mrt == NULL)
1346 return -ENOENT;
1da177e4 1347
c354e124 1348 if (optname != MRT_VERSION &&
1da177e4
LT
1349#ifdef CONFIG_IP_PIMSM
1350 optname!=MRT_PIM &&
1351#endif
1352 optname!=MRT_ASSERT)
1353 return -ENOPROTOOPT;
1354
1355 if (get_user(olr, optlen))
1356 return -EFAULT;
1357
1358 olr = min_t(unsigned int, olr, sizeof(int));
1359 if (olr < 0)
1360 return -EINVAL;
e905a9ed 1361
c354e124 1362 if (put_user(olr, optlen))
1da177e4 1363 return -EFAULT;
c354e124
JK
1364 if (optname == MRT_VERSION)
1365 val = 0x0305;
1da177e4 1366#ifdef CONFIG_IP_PIMSM
c354e124 1367 else if (optname == MRT_PIM)
0c12295a 1368 val = mrt->mroute_do_pim;
1da177e4
LT
1369#endif
1370 else
0c12295a 1371 val = mrt->mroute_do_assert;
c354e124 1372 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1373 return -EFAULT;
1374 return 0;
1375}
1376
1377/*
1378 * The IP multicast ioctl support routines.
1379 */
e905a9ed 1380
1da177e4
LT
1381int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1382{
1383 struct sioc_sg_req sr;
1384 struct sioc_vif_req vr;
1385 struct vif_device *vif;
1386 struct mfc_cache *c;
4feb88e5 1387 struct net *net = sock_net(sk);
f0ad0860
PM
1388 struct mr_table *mrt;
1389
1390 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1391 if (mrt == NULL)
1392 return -ENOENT;
e905a9ed 1393
132adf54
SH
1394 switch (cmd) {
1395 case SIOCGETVIFCNT:
c354e124 1396 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1397 return -EFAULT;
0c12295a 1398 if (vr.vifi >= mrt->maxvif)
132adf54
SH
1399 return -EINVAL;
1400 read_lock(&mrt_lock);
0c12295a
PM
1401 vif = &mrt->vif_table[vr.vifi];
1402 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1403 vr.icount = vif->pkt_in;
1404 vr.ocount = vif->pkt_out;
1405 vr.ibytes = vif->bytes_in;
1406 vr.obytes = vif->bytes_out;
1da177e4 1407 read_unlock(&mrt_lock);
1da177e4 1408
c354e124 1409 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1410 return -EFAULT;
1411 return 0;
1412 }
1413 read_unlock(&mrt_lock);
1414 return -EADDRNOTAVAIL;
1415 case SIOCGETSGCNT:
c354e124 1416 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1417 return -EFAULT;
1418
1419 read_lock(&mrt_lock);
0c12295a 1420 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1421 if (c) {
1422 sr.pktcnt = c->mfc_un.res.pkt;
1423 sr.bytecnt = c->mfc_un.res.bytes;
1424 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1425 read_unlock(&mrt_lock);
132adf54 1426
c354e124 1427 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1428 return -EFAULT;
1429 return 0;
1430 }
1431 read_unlock(&mrt_lock);
1432 return -EADDRNOTAVAIL;
1433 default:
1434 return -ENOIOCTLCMD;
1da177e4
LT
1435 }
1436}
1437
1438
1439static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1440{
e9dc8653 1441 struct net_device *dev = ptr;
4feb88e5 1442 struct net *net = dev_net(dev);
f0ad0860 1443 struct mr_table *mrt;
1da177e4
LT
1444 struct vif_device *v;
1445 int ct;
d17fa6fa 1446 LIST_HEAD(list);
e9dc8653 1447
1da177e4
LT
1448 if (event != NETDEV_UNREGISTER)
1449 return NOTIFY_DONE;
f0ad0860
PM
1450
1451 ipmr_for_each_table(mrt, net) {
1452 v = &mrt->vif_table[0];
1453 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1454 if (v->dev == dev)
1455 vif_delete(mrt, ct, 1, &list);
1456 }
1da177e4 1457 }
d17fa6fa 1458 unregister_netdevice_many(&list);
1da177e4
LT
1459 return NOTIFY_DONE;
1460}
1461
1462
c354e124 1463static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1464 .notifier_call = ipmr_device_event,
1465};
1466
1467/*
1468 * Encapsulate a packet by attaching a valid IPIP header to it.
1469 * This avoids tunnel drivers and other mess and gives us the speed so
1470 * important for multicast video.
1471 */
e905a9ed 1472
114c7844 1473static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1474{
8856dfa3 1475 struct iphdr *iph;
eddc9ec5 1476 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1477
1478 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1479 skb->transport_header = skb->network_header;
8856dfa3 1480 skb_reset_network_header(skb);
eddc9ec5 1481 iph = ip_hdr(skb);
1da177e4
LT
1482
1483 iph->version = 4;
e023dd64
ACM
1484 iph->tos = old_iph->tos;
1485 iph->ttl = old_iph->ttl;
1da177e4
LT
1486 iph->frag_off = 0;
1487 iph->daddr = daddr;
1488 iph->saddr = saddr;
1489 iph->protocol = IPPROTO_IPIP;
1490 iph->ihl = 5;
1491 iph->tot_len = htons(skb->len);
adf30907 1492 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1493 ip_send_check(iph);
1494
1da177e4
LT
1495 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1496 nf_reset(skb);
1497}
1498
1499static inline int ipmr_forward_finish(struct sk_buff *skb)
1500{
1501 struct ip_options * opt = &(IPCB(skb)->opt);
1502
adf30907 1503 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1504
1505 if (unlikely(opt->optlen))
1506 ip_forward_options(skb);
1507
1508 return dst_output(skb);
1509}
1510
1511/*
1512 * Processing handlers for ipmr_forward
1513 */
1514
0c12295a
PM
1515static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1516 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1da177e4 1517{
eddc9ec5 1518 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1519 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1520 struct net_device *dev;
1521 struct rtable *rt;
1522 int encap = 0;
1523
1524 if (vif->dev == NULL)
1525 goto out_free;
1526
1527#ifdef CONFIG_IP_PIMSM
1528 if (vif->flags & VIFF_REGISTER) {
1529 vif->pkt_out++;
c354e124 1530 vif->bytes_out += skb->len;
cf3677ae
PE
1531 vif->dev->stats.tx_bytes += skb->len;
1532 vif->dev->stats.tx_packets++;
0c12295a 1533 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1534 goto out_free;
1da177e4
LT
1535 }
1536#endif
1537
1538 if (vif->flags&VIFF_TUNNEL) {
1539 struct flowi fl = { .oif = vif->link,
1540 .nl_u = { .ip4_u =
1541 { .daddr = vif->remote,
1542 .saddr = vif->local,
1543 .tos = RT_TOS(iph->tos) } },
1544 .proto = IPPROTO_IPIP };
4feb88e5 1545 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1546 goto out_free;
1547 encap = sizeof(struct iphdr);
1548 } else {
1549 struct flowi fl = { .oif = vif->link,
1550 .nl_u = { .ip4_u =
1551 { .daddr = iph->daddr,
1552 .tos = RT_TOS(iph->tos) } },
1553 .proto = IPPROTO_IPIP };
4feb88e5 1554 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1555 goto out_free;
1556 }
1557
d8d1f30b 1558 dev = rt->dst.dev;
1da177e4 1559
d8d1f30b 1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1da177e4
LT
1561 /* Do not fragment multicasts. Alas, IPv4 does not
1562 allow to send ICMP, so that packets will disappear
1563 to blackhole.
1564 */
1565
7c73a6fa 1566 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1567 ip_rt_put(rt);
1568 goto out_free;
1569 }
1570
d8d1f30b 1571 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1da177e4
LT
1572
1573 if (skb_cow(skb, encap)) {
e905a9ed 1574 ip_rt_put(rt);
1da177e4
LT
1575 goto out_free;
1576 }
1577
1578 vif->pkt_out++;
c354e124 1579 vif->bytes_out += skb->len;
1da177e4 1580
adf30907 1581 skb_dst_drop(skb);
d8d1f30b 1582 skb_dst_set(skb, &rt->dst);
eddc9ec5 1583 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1584
1585 /* FIXME: forward and output firewalls used to be called here.
1586 * What do we do with netfilter? -- RR */
1587 if (vif->flags & VIFF_TUNNEL) {
1588 ip_encap(skb, vif->local, vif->remote);
1589 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1590 vif->dev->stats.tx_packets++;
1591 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1592 }
1593
1594 IPCB(skb)->flags |= IPSKB_FORWARDED;
1595
1596 /*
1597 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1598 * not only before forwarding, but after forwarding on all output
1599 * interfaces. It is clear, if mrouter runs a multicasting
1600 * program, it should receive packets not depending to what interface
1601 * program is joined.
1602 * If we will not make it, the program will have to join on all
1603 * interfaces. On the other hand, multihoming host (or router, but
1604 * not mrouter) cannot join to more than one interface - it will
1605 * result in receiving multiple packets.
1606 */
9bbc768a 1607 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1608 ipmr_forward_finish);
1609 return;
1610
1611out_free:
1612 kfree_skb(skb);
1da177e4
LT
1613}
1614
0c12295a 1615static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1616{
1617 int ct;
0c12295a
PM
1618
1619 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1620 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
1621 break;
1622 }
1623 return ct;
1624}
1625
1626/* "local" means that we should preserve one skb (for local delivery) */
1627
0c12295a
PM
1628static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1629 struct sk_buff *skb, struct mfc_cache *cache,
1630 int local)
1da177e4
LT
1631{
1632 int psend = -1;
1633 int vif, ct;
1634
1635 vif = cache->mfc_parent;
1636 cache->mfc_un.res.pkt++;
1637 cache->mfc_un.res.bytes += skb->len;
1638
1639 /*
1640 * Wrong interface: drop packet and (maybe) send PIM assert.
1641 */
0c12295a 1642 if (mrt->vif_table[vif].dev != skb->dev) {
1da177e4
LT
1643 int true_vifi;
1644
511c3f92 1645 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1646 /* It is our own packet, looped back.
1647 Very complicated situation...
1648
1649 The best workaround until routing daemons will be
1650 fixed is not to redistribute packet, if it was
1651 send through wrong interface. It means, that
1652 multicast applications WILL NOT work for
1653 (S,G), which have default multicast route pointing
1654 to wrong oif. In any case, it is not a good
1655 idea to use multicasting applications on router.
1656 */
1657 goto dont_forward;
1658 }
1659
1660 cache->mfc_un.res.wrong_if++;
0c12295a 1661 true_vifi = ipmr_find_vif(mrt, skb->dev);
1da177e4 1662
0c12295a 1663 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4
LT
1664 /* pimsm uses asserts, when switching from RPT to SPT,
1665 so that we cannot check that packet arrived on an oif.
1666 It is bad, but otherwise we would need to move pretty
1667 large chunk of pimd to kernel. Ough... --ANK
1668 */
0c12295a 1669 (mrt->mroute_do_pim ||
6f9374a9 1670 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1671 time_after(jiffies,
1da177e4
LT
1672 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1673 cache->mfc_un.res.last_assert = jiffies;
0c12295a 1674 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1675 }
1676 goto dont_forward;
1677 }
1678
0c12295a
PM
1679 mrt->vif_table[vif].pkt_in++;
1680 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1681
1682 /*
1683 * Forward the frame
1684 */
1685 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1686 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1687 if (psend != -1) {
1688 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1689 if (skb2)
0c12295a
PM
1690 ipmr_queue_xmit(net, mrt, skb2, cache,
1691 psend);
1da177e4 1692 }
c354e124 1693 psend = ct;
1da177e4
LT
1694 }
1695 }
1696 if (psend != -1) {
1697 if (local) {
1698 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1699 if (skb2)
0c12295a 1700 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1da177e4 1701 } else {
0c12295a 1702 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1da177e4
LT
1703 return 0;
1704 }
1705 }
1706
1707dont_forward:
1708 if (!local)
1709 kfree_skb(skb);
1710 return 0;
1711}
1712
1713
1714/*
1715 * Multicast packets for forwarding arrive here
1716 */
1717
1718int ip_mr_input(struct sk_buff *skb)
1719{
1720 struct mfc_cache *cache;
4feb88e5 1721 struct net *net = dev_net(skb->dev);
511c3f92 1722 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
f0ad0860
PM
1723 struct mr_table *mrt;
1724 int err;
1da177e4
LT
1725
1726 /* Packet is looped back after forward, it should not be
1727 forwarded second time, but still can be delivered locally.
1728 */
1729 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1730 goto dont_forward;
1731
f0ad0860 1732 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
e40dbc51
BG
1733 if (err < 0) {
1734 kfree_skb(skb);
f0ad0860 1735 return err;
e40dbc51 1736 }
f0ad0860 1737
1da177e4
LT
1738 if (!local) {
1739 if (IPCB(skb)->opt.router_alert) {
1740 if (ip_call_ra_chain(skb))
1741 return 0;
eddc9ec5 1742 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1743 /* IGMPv1 (and broken IGMPv2 implementations sort of
1744 Cisco IOS <= 11.2(8)) do not put router alert
1745 option to IGMP packets destined to routable
1746 groups. It is very bad, because it means
1747 that we can forward NO IGMP messages.
1748 */
1749 read_lock(&mrt_lock);
0c12295a 1750 if (mrt->mroute_sk) {
2715bcf9 1751 nf_reset(skb);
0c12295a 1752 raw_rcv(mrt->mroute_sk, skb);
1da177e4
LT
1753 read_unlock(&mrt_lock);
1754 return 0;
1755 }
1756 read_unlock(&mrt_lock);
1757 }
1758 }
1759
1760 read_lock(&mrt_lock);
0c12295a 1761 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1762
1763 /*
1764 * No usable cache entry
1765 */
c354e124 1766 if (cache == NULL) {
1da177e4
LT
1767 int vif;
1768
1769 if (local) {
1770 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1771 ip_local_deliver(skb);
1772 if (skb2 == NULL) {
1773 read_unlock(&mrt_lock);
1774 return -ENOBUFS;
1775 }
1776 skb = skb2;
1777 }
1778
0c12295a 1779 vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 1780 if (vif >= 0) {
0eae88f3 1781 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1da177e4
LT
1782 read_unlock(&mrt_lock);
1783
0eae88f3 1784 return err2;
1da177e4
LT
1785 }
1786 read_unlock(&mrt_lock);
1787 kfree_skb(skb);
1788 return -ENODEV;
1789 }
1790
0c12295a 1791 ip_mr_forward(net, mrt, skb, cache, local);
1da177e4
LT
1792
1793 read_unlock(&mrt_lock);
1794
1795 if (local)
1796 return ip_local_deliver(skb);
1797
1798 return 0;
1799
1800dont_forward:
1801 if (local)
1802 return ip_local_deliver(skb);
1803 kfree_skb(skb);
1804 return 0;
1805}
1806
b1879204 1807#ifdef CONFIG_IP_PIMSM
55747a0a 1808/* called with rcu_read_lock() */
f0ad0860
PM
1809static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1810 unsigned int pimlen)
1da177e4 1811{
b1879204
IJ
1812 struct net_device *reg_dev = NULL;
1813 struct iphdr *encap;
1da177e4 1814
b1879204 1815 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1816 /*
1817 Check that:
1818 a. packet is really destinted to a multicast group
1819 b. packet is not a NULL-REGISTER
1820 c. packet is not truncated
1821 */
f97c1e0c 1822 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1823 encap->tot_len == 0 ||
b1879204
IJ
1824 ntohs(encap->tot_len) + pimlen > skb->len)
1825 return 1;
1da177e4
LT
1826
1827 read_lock(&mrt_lock);
0c12295a
PM
1828 if (mrt->mroute_reg_vif_num >= 0)
1829 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1da177e4
LT
1830 read_unlock(&mrt_lock);
1831
e905a9ed 1832 if (reg_dev == NULL)
b1879204 1833 return 1;
1da177e4 1834
b0e380b1 1835 skb->mac_header = skb->network_header;
55747a0a 1836 skb_pull(skb, (u8 *)encap - skb->data);
31c7711b 1837 skb_reset_network_header(skb);
1da177e4 1838 skb->protocol = htons(ETH_P_IP);
55747a0a 1839 skb->ip_summed = CHECKSUM_NONE;
1da177e4 1840 skb->pkt_type = PACKET_HOST;
d19d56dd
ED
1841
1842 skb_tunnel_rx(skb, reg_dev);
1843
1da177e4 1844 netif_rx(skb);
b1879204 1845
55747a0a 1846 return NET_RX_SUCCESS;
b1879204
IJ
1847}
1848#endif
1849
1850#ifdef CONFIG_IP_PIMSM_V1
1851/*
1852 * Handle IGMP messages of PIMv1
1853 */
1854
1855int pim_rcv_v1(struct sk_buff * skb)
1856{
1857 struct igmphdr *pim;
4feb88e5 1858 struct net *net = dev_net(skb->dev);
f0ad0860 1859 struct mr_table *mrt;
b1879204
IJ
1860
1861 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1862 goto drop;
1863
1864 pim = igmp_hdr(skb);
1865
f0ad0860
PM
1866 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1867 goto drop;
1868
0c12295a 1869 if (!mrt->mroute_do_pim ||
b1879204
IJ
1870 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1871 goto drop;
1872
f0ad0860 1873 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1874drop:
1875 kfree_skb(skb);
1876 }
1da177e4
LT
1877 return 0;
1878}
1879#endif
1880
1881#ifdef CONFIG_IP_PIMSM_V2
1882static int pim_rcv(struct sk_buff * skb)
1883{
1884 struct pimreghdr *pim;
f0ad0860
PM
1885 struct net *net = dev_net(skb->dev);
1886 struct mr_table *mrt;
1da177e4 1887
b1879204 1888 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1889 goto drop;
1890
9c70220b 1891 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1892 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1893 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1894 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1895 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1896 goto drop;
1897
f0ad0860
PM
1898 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1899 goto drop;
1900
1901 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1902drop:
1903 kfree_skb(skb);
1904 }
1da177e4
LT
1905 return 0;
1906}
1907#endif
1908
cb6a4e46
PM
1909static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1910 struct mfc_cache *c, struct rtmsg *rtm)
1da177e4
LT
1911{
1912 int ct;
1913 struct rtnexthop *nhp;
27a884dc 1914 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1915 struct rtattr *mp_head;
1916
7438189b 1917 /* If cache is unresolved, don't try to parse IIF and OIF */
ed0f160a 1918 if (c->mfc_parent >= MAXVIFS)
7438189b
ND
1919 return -ENOENT;
1920
0c12295a
PM
1921 if (VIF_EXISTS(mrt, c->mfc_parent))
1922 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1923
c354e124 1924 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1925
1926 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 1927 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1928 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1929 goto rtattr_failure;
c354e124 1930 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1931 nhp->rtnh_flags = 0;
1932 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 1933 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
1934 nhp->rtnh_len = sizeof(*nhp);
1935 }
1936 }
1937 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1938 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1939 rtm->rtm_type = RTN_MULTICAST;
1940 return 1;
1941
1942rtattr_failure:
dc5fc579 1943 nlmsg_trim(skb, b);
1da177e4
LT
1944 return -EMSGSIZE;
1945}
1946
4feb88e5
BT
1947int ipmr_get_route(struct net *net,
1948 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1949{
1950 int err;
f0ad0860 1951 struct mr_table *mrt;
1da177e4 1952 struct mfc_cache *cache;
511c3f92 1953 struct rtable *rt = skb_rtable(skb);
1da177e4 1954
f0ad0860
PM
1955 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1956 if (mrt == NULL)
1957 return -ENOENT;
1958
1da177e4 1959 read_lock(&mrt_lock);
0c12295a 1960 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1da177e4 1961
c354e124 1962 if (cache == NULL) {
72287490 1963 struct sk_buff *skb2;
eddc9ec5 1964 struct iphdr *iph;
1da177e4
LT
1965 struct net_device *dev;
1966 int vif;
1967
1968 if (nowait) {
1969 read_unlock(&mrt_lock);
1970 return -EAGAIN;
1971 }
1972
1973 dev = skb->dev;
0c12295a 1974 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1da177e4
LT
1975 read_unlock(&mrt_lock);
1976 return -ENODEV;
1977 }
72287490
AK
1978 skb2 = skb_clone(skb, GFP_ATOMIC);
1979 if (!skb2) {
1980 read_unlock(&mrt_lock);
1981 return -ENOMEM;
1982 }
1983
e2d1bca7
ACM
1984 skb_push(skb2, sizeof(struct iphdr));
1985 skb_reset_network_header(skb2);
eddc9ec5
ACM
1986 iph = ip_hdr(skb2);
1987 iph->ihl = sizeof(struct iphdr) >> 2;
1988 iph->saddr = rt->rt_src;
1989 iph->daddr = rt->rt_dst;
1990 iph->version = 0;
0c12295a 1991 err = ipmr_cache_unresolved(mrt, vif, skb2);
1da177e4
LT
1992 read_unlock(&mrt_lock);
1993 return err;
1994 }
1995
1996 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1997 cache->mfc_flags |= MFC_NOTIFY;
cb6a4e46 1998 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4
LT
1999 read_unlock(&mrt_lock);
2000 return err;
2001}
2002
cb6a4e46
PM
2003static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2004 u32 pid, u32 seq, struct mfc_cache *c)
2005{
2006 struct nlmsghdr *nlh;
2007 struct rtmsg *rtm;
2008
2009 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2010 if (nlh == NULL)
2011 return -EMSGSIZE;
2012
2013 rtm = nlmsg_data(nlh);
2014 rtm->rtm_family = RTNL_FAMILY_IPMR;
2015 rtm->rtm_dst_len = 32;
2016 rtm->rtm_src_len = 32;
2017 rtm->rtm_tos = 0;
2018 rtm->rtm_table = mrt->id;
2019 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2020 rtm->rtm_type = RTN_MULTICAST;
2021 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2022 rtm->rtm_protocol = RTPROT_UNSPEC;
2023 rtm->rtm_flags = 0;
2024
2025 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2026 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2027
2028 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2029 goto nla_put_failure;
2030
2031 return nlmsg_end(skb, nlh);
2032
2033nla_put_failure:
2034 nlmsg_cancel(skb, nlh);
2035 return -EMSGSIZE;
2036}
2037
2038static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2039{
2040 struct net *net = sock_net(skb->sk);
2041 struct mr_table *mrt;
2042 struct mfc_cache *mfc;
2043 unsigned int t = 0, s_t;
2044 unsigned int h = 0, s_h;
2045 unsigned int e = 0, s_e;
2046
2047 s_t = cb->args[0];
2048 s_h = cb->args[1];
2049 s_e = cb->args[2];
2050
2051 read_lock(&mrt_lock);
2052 ipmr_for_each_table(mrt, net) {
2053 if (t < s_t)
2054 goto next_table;
2055 if (t > s_t)
2056 s_h = 0;
2057 for (h = s_h; h < MFC_LINES; h++) {
2058 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2059 if (e < s_e)
2060 goto next_entry;
2061 if (ipmr_fill_mroute(mrt, skb,
2062 NETLINK_CB(cb->skb).pid,
2063 cb->nlh->nlmsg_seq,
2064 mfc) < 0)
2065 goto done;
2066next_entry:
2067 e++;
2068 }
2069 e = s_e = 0;
2070 }
2071 s_h = 0;
2072next_table:
2073 t++;
2074 }
2075done:
2076 read_unlock(&mrt_lock);
2077
2078 cb->args[2] = e;
2079 cb->args[1] = h;
2080 cb->args[0] = t;
2081
2082 return skb->len;
2083}
2084
e905a9ed 2085#ifdef CONFIG_PROC_FS
1da177e4
LT
2086/*
2087 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2088 */
2089struct ipmr_vif_iter {
f6bb4514 2090 struct seq_net_private p;
f0ad0860 2091 struct mr_table *mrt;
1da177e4
LT
2092 int ct;
2093};
2094
f6bb4514
BT
2095static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2096 struct ipmr_vif_iter *iter,
1da177e4
LT
2097 loff_t pos)
2098{
f0ad0860 2099 struct mr_table *mrt = iter->mrt;
0c12295a
PM
2100
2101 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2102 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2103 continue;
e905a9ed 2104 if (pos-- == 0)
0c12295a 2105 return &mrt->vif_table[iter->ct];
1da177e4
LT
2106 }
2107 return NULL;
2108}
2109
2110static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 2111 __acquires(mrt_lock)
1da177e4 2112{
f0ad0860 2113 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2114 struct net *net = seq_file_net(seq);
f0ad0860
PM
2115 struct mr_table *mrt;
2116
2117 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2118 if (mrt == NULL)
2119 return ERR_PTR(-ENOENT);
2120
2121 iter->mrt = mrt;
f6bb4514 2122
1da177e4 2123 read_lock(&mrt_lock);
f6bb4514 2124 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2125 : SEQ_START_TOKEN;
2126}
2127
2128static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2129{
2130 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2131 struct net *net = seq_file_net(seq);
f0ad0860 2132 struct mr_table *mrt = iter->mrt;
1da177e4
LT
2133
2134 ++*pos;
2135 if (v == SEQ_START_TOKEN)
f6bb4514 2136 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 2137
0c12295a
PM
2138 while (++iter->ct < mrt->maxvif) {
2139 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2140 continue;
0c12295a 2141 return &mrt->vif_table[iter->ct];
1da177e4
LT
2142 }
2143 return NULL;
2144}
2145
2146static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 2147 __releases(mrt_lock)
1da177e4
LT
2148{
2149 read_unlock(&mrt_lock);
2150}
2151
2152static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2153{
f0ad0860
PM
2154 struct ipmr_vif_iter *iter = seq->private;
2155 struct mr_table *mrt = iter->mrt;
f6bb4514 2156
1da177e4 2157 if (v == SEQ_START_TOKEN) {
e905a9ed 2158 seq_puts(seq,
1da177e4
LT
2159 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2160 } else {
2161 const struct vif_device *vif = v;
2162 const char *name = vif->dev ? vif->dev->name : "none";
2163
2164 seq_printf(seq,
2165 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 2166 vif - mrt->vif_table,
e905a9ed 2167 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
2168 vif->bytes_out, vif->pkt_out,
2169 vif->flags, vif->local, vif->remote);
2170 }
2171 return 0;
2172}
2173
f690808e 2174static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
2175 .start = ipmr_vif_seq_start,
2176 .next = ipmr_vif_seq_next,
2177 .stop = ipmr_vif_seq_stop,
2178 .show = ipmr_vif_seq_show,
2179};
2180
2181static int ipmr_vif_open(struct inode *inode, struct file *file)
2182{
f6bb4514
BT
2183 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2184 sizeof(struct ipmr_vif_iter));
1da177e4
LT
2185}
2186
9a32144e 2187static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
2188 .owner = THIS_MODULE,
2189 .open = ipmr_vif_open,
2190 .read = seq_read,
2191 .llseek = seq_lseek,
f6bb4514 2192 .release = seq_release_net,
1da177e4
LT
2193};
2194
2195struct ipmr_mfc_iter {
f6bb4514 2196 struct seq_net_private p;
f0ad0860 2197 struct mr_table *mrt;
862465f2 2198 struct list_head *cache;
1da177e4
LT
2199 int ct;
2200};
2201
2202
f6bb4514
BT
2203static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2204 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 2205{
f0ad0860 2206 struct mr_table *mrt = it->mrt;
1da177e4
LT
2207 struct mfc_cache *mfc;
2208
1da177e4 2209 read_lock(&mrt_lock);
862465f2 2210 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
0c12295a 2211 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2 2212 list_for_each_entry(mfc, it->cache, list)
e905a9ed 2213 if (pos-- == 0)
1da177e4 2214 return mfc;
862465f2 2215 }
1da177e4
LT
2216 read_unlock(&mrt_lock);
2217
1da177e4 2218 spin_lock_bh(&mfc_unres_lock);
0c12295a 2219 it->cache = &mrt->mfc_unres_queue;
862465f2 2220 list_for_each_entry(mfc, it->cache, list)
e258beb2 2221 if (pos-- == 0)
1da177e4
LT
2222 return mfc;
2223 spin_unlock_bh(&mfc_unres_lock);
2224
2225 it->cache = NULL;
2226 return NULL;
2227}
2228
2229
2230static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2231{
2232 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2233 struct net *net = seq_file_net(seq);
f0ad0860 2234 struct mr_table *mrt;
f6bb4514 2235
f0ad0860
PM
2236 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2237 if (mrt == NULL)
2238 return ERR_PTR(-ENOENT);
f6bb4514 2239
f0ad0860 2240 it->mrt = mrt;
1da177e4
LT
2241 it->cache = NULL;
2242 it->ct = 0;
f6bb4514 2243 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2244 : SEQ_START_TOKEN;
2245}
2246
2247static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2248{
2249 struct mfc_cache *mfc = v;
2250 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2251 struct net *net = seq_file_net(seq);
f0ad0860 2252 struct mr_table *mrt = it->mrt;
1da177e4
LT
2253
2254 ++*pos;
2255
2256 if (v == SEQ_START_TOKEN)
f6bb4514 2257 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 2258
862465f2
PM
2259 if (mfc->list.next != it->cache)
2260 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 2261
0c12295a 2262 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
2263 goto end_of_list;
2264
0c12295a 2265 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1da177e4
LT
2266
2267 while (++it->ct < MFC_LINES) {
0c12295a 2268 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2
PM
2269 if (list_empty(it->cache))
2270 continue;
2271 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2272 }
2273
2274 /* exhausted cache_array, show unresolved */
2275 read_unlock(&mrt_lock);
0c12295a 2276 it->cache = &mrt->mfc_unres_queue;
1da177e4 2277 it->ct = 0;
e905a9ed 2278
1da177e4 2279 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
2280 if (!list_empty(it->cache))
2281 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2282
2283 end_of_list:
2284 spin_unlock_bh(&mfc_unres_lock);
2285 it->cache = NULL;
2286
2287 return NULL;
2288}
2289
2290static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2291{
2292 struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2293 struct mr_table *mrt = it->mrt;
1da177e4 2294
0c12295a 2295 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 2296 spin_unlock_bh(&mfc_unres_lock);
0c12295a 2297 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1da177e4
LT
2298 read_unlock(&mrt_lock);
2299}
2300
2301static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2302{
2303 int n;
2304
2305 if (v == SEQ_START_TOKEN) {
e905a9ed 2306 seq_puts(seq,
1da177e4
LT
2307 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2308 } else {
2309 const struct mfc_cache *mfc = v;
2310 const struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2311 const struct mr_table *mrt = it->mrt;
e905a9ed 2312
0eae88f3
ED
2313 seq_printf(seq, "%08X %08X %-3hd",
2314 (__force u32) mfc->mfc_mcastgrp,
2315 (__force u32) mfc->mfc_origin,
1ea472e2 2316 mfc->mfc_parent);
1da177e4 2317
0c12295a 2318 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
2319 seq_printf(seq, " %8lu %8lu %8lu",
2320 mfc->mfc_un.res.pkt,
2321 mfc->mfc_un.res.bytes,
2322 mfc->mfc_un.res.wrong_if);
132adf54
SH
2323 for (n = mfc->mfc_un.res.minvif;
2324 n < mfc->mfc_un.res.maxvif; n++ ) {
0c12295a 2325 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
2326 mfc->mfc_un.res.ttls[n] < 255)
2327 seq_printf(seq,
e905a9ed 2328 " %2d:%-3d",
1da177e4
LT
2329 n, mfc->mfc_un.res.ttls[n]);
2330 }
1ea472e2
BT
2331 } else {
2332 /* unresolved mfc_caches don't contain
2333 * pkt, bytes and wrong_if values
2334 */
2335 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
2336 }
2337 seq_putc(seq, '\n');
2338 }
2339 return 0;
2340}
2341
f690808e 2342static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
2343 .start = ipmr_mfc_seq_start,
2344 .next = ipmr_mfc_seq_next,
2345 .stop = ipmr_mfc_seq_stop,
2346 .show = ipmr_mfc_seq_show,
2347};
2348
2349static int ipmr_mfc_open(struct inode *inode, struct file *file)
2350{
f6bb4514
BT
2351 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2352 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
2353}
2354
9a32144e 2355static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
2356 .owner = THIS_MODULE,
2357 .open = ipmr_mfc_open,
2358 .read = seq_read,
2359 .llseek = seq_lseek,
f6bb4514 2360 .release = seq_release_net,
1da177e4 2361};
e905a9ed 2362#endif
1da177e4
LT
2363
2364#ifdef CONFIG_IP_PIMSM_V2
32613090 2365static const struct net_protocol pim_protocol = {
1da177e4 2366 .handler = pim_rcv,
403dbb97 2367 .netns_ok = 1,
1da177e4
LT
2368};
2369#endif
2370
2371
2372/*
2373 * Setup for IP multicast routing
2374 */
cf958ae3
BT
2375static int __net_init ipmr_net_init(struct net *net)
2376{
f0ad0860 2377 int err;
cf958ae3 2378
f0ad0860
PM
2379 err = ipmr_rules_init(net);
2380 if (err < 0)
cf958ae3 2381 goto fail;
f6bb4514
BT
2382
2383#ifdef CONFIG_PROC_FS
2384 err = -ENOMEM;
2385 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2386 goto proc_vif_fail;
2387 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2388 goto proc_cache_fail;
2389#endif
2bb8b26c
BT
2390 return 0;
2391
f6bb4514
BT
2392#ifdef CONFIG_PROC_FS
2393proc_cache_fail:
2394 proc_net_remove(net, "ip_mr_vif");
2395proc_vif_fail:
f0ad0860 2396 ipmr_rules_exit(net);
f6bb4514 2397#endif
cf958ae3
BT
2398fail:
2399 return err;
2400}
2401
2402static void __net_exit ipmr_net_exit(struct net *net)
2403{
f6bb4514
BT
2404#ifdef CONFIG_PROC_FS
2405 proc_net_remove(net, "ip_mr_cache");
2406 proc_net_remove(net, "ip_mr_vif");
2407#endif
f0ad0860 2408 ipmr_rules_exit(net);
cf958ae3
BT
2409}
2410
2411static struct pernet_operations ipmr_net_ops = {
2412 .init = ipmr_net_init,
2413 .exit = ipmr_net_exit,
2414};
e905a9ed 2415
03d2f897 2416int __init ip_mr_init(void)
1da177e4 2417{
03d2f897
WC
2418 int err;
2419
1da177e4
LT
2420 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2421 sizeof(struct mfc_cache),
e5d679f3 2422 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2423 NULL);
03d2f897
WC
2424 if (!mrt_cachep)
2425 return -ENOMEM;
2426
cf958ae3
BT
2427 err = register_pernet_subsys(&ipmr_net_ops);
2428 if (err)
2429 goto reg_pernet_fail;
2430
03d2f897
WC
2431 err = register_netdevice_notifier(&ip_mr_notifier);
2432 if (err)
2433 goto reg_notif_fail;
403dbb97
TG
2434#ifdef CONFIG_IP_PIMSM_V2
2435 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2436 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2437 err = -EAGAIN;
2438 goto add_proto_fail;
2439 }
2440#endif
cb6a4e46 2441 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
03d2f897 2442 return 0;
f6bb4514 2443
403dbb97
TG
2444#ifdef CONFIG_IP_PIMSM_V2
2445add_proto_fail:
2446 unregister_netdevice_notifier(&ip_mr_notifier);
2447#endif
c3e38896 2448reg_notif_fail:
cf958ae3
BT
2449 unregister_pernet_subsys(&ipmr_net_ops);
2450reg_pernet_fail:
c3e38896 2451 kmem_cache_destroy(mrt_cachep);
03d2f897 2452 return err;
1da177e4 2453}