]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
phonet: Fix build warning.
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
f77f13e2 25 * Relax this requirement to work with older peers.
1da177e4
LT
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
f0ad0860 66#include <net/fib_rules.h>
1da177e4
LT
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
0c12295a 72struct mr_table {
f0ad0860 73 struct list_head list;
8de53dfb
PM
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
f0ad0860 77 u32 id;
0c12295a
PM
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
f0ad0860
PM
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
1da177e4
LT
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
0c12295a 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
1da177e4
LT
111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
e18b890b 123static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 124
f0ad0860 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
0c12295a
PM
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 130 struct sk_buff *pkt, vifi_t vifi, int assert);
cb6a4e46
PM
131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
f0ad0860
PM
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
1da177e4 169
f0ad0860
PM
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
3d0c9c4e 219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
25239cee 220 .family = RTNL_FAMILY_IPMR,
f0ad0860
PM
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
035320d5
ED
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
f0ad0860 272 kfree(mrt);
035320d5 273 }
f0ad0860
PM
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
275}
276#else
277#define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
279
280static struct mr_table *ipmr_get_table(struct net *net, u32 id)
281{
282 return net->ipv4.mrt;
283}
284
285static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
287{
288 *mrt = net->ipv4.mrt;
289 return 0;
290}
291
292static int __net_init ipmr_rules_init(struct net *net)
293{
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
296}
297
298static void __net_exit ipmr_rules_exit(struct net *net)
299{
300 kfree(net->ipv4.mrt);
301}
302#endif
303
304static struct mr_table *ipmr_new_table(struct net *net, u32 id)
305{
306 struct mr_table *mrt;
307 unsigned int i;
1da177e4 308
f0ad0860
PM
309 mrt = ipmr_get_table(net, id);
310 if (mrt != NULL)
311 return mrt;
312
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 if (mrt == NULL)
315 return NULL;
8de53dfb 316 write_pnet(&mrt->net, net);
f0ad0860
PM
317 mrt->id = id;
318
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
322
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
324
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 (unsigned long)mrt);
327
328#ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
330#endif
331#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333#endif
334 return mrt;
335}
1da177e4
LT
336
337/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
338
d607032d
WC
339static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
340{
4feb88e5
BT
341 struct net *net = dev_net(dev);
342
d607032d
WC
343 dev_close(dev);
344
4feb88e5 345 dev = __dev_get_by_name(net, "tunl0");
d607032d 346 if (dev) {
5bc3eb7e 347 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 348 struct ifreq ifr;
d607032d
WC
349 struct ip_tunnel_parm p;
350
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354 p.iph.version = 4;
355 p.iph.ihl = 5;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
359
5bc3eb7e
SH
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
362
363 set_fs(KERNEL_DS);
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 set_fs(oldfs);
366 }
d607032d
WC
367 }
368}
369
1da177e4 370static
4feb88e5 371struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
372{
373 struct net_device *dev;
374
4feb88e5 375 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
376
377 if (dev) {
5bc3eb7e 378 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
379 int err;
380 struct ifreq ifr;
1da177e4
LT
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
383
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387 p.iph.version = 4;
388 p.iph.ihl = 5;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 392
5bc3eb7e
SH
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
395
396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs);
399 } else
400 err = -EOPNOTSUPP;
1da177e4
LT
401
402 dev = NULL;
403
4feb88e5
BT
404 if (err == 0 &&
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
406 dev->flags |= IFF_MULTICAST;
407
e5ed6399 408 in_dev = __in_dev_get_rtnl(dev);
71e27da9 409 if (in_dev == NULL)
1da177e4 410 goto failure;
71e27da9
HX
411
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
414
415 if (dev_open(dev))
416 goto failure;
7dc00c82 417 dev_hold(dev);
1da177e4
LT
418 }
419 }
420 return dev;
421
422failure:
423 /* allow the register to be completed before unregistering. */
424 rtnl_unlock();
425 rtnl_lock();
426
427 unregister_netdevice(dev);
428 return NULL;
429}
430
431#ifdef CONFIG_IP_PIMSM
432
6fef4c0c 433static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 434{
4feb88e5 435 struct net *net = dev_net(dev);
f0ad0860
PM
436 struct mr_table *mrt;
437 struct flowi fl = {
438 .oif = dev->ifindex,
439 .iif = skb->skb_iif,
440 .mark = skb->mark,
441 };
442 int err;
443
444 err = ipmr_fib_lookup(net, &fl, &mrt);
e40dbc51
BG
445 if (err < 0) {
446 kfree_skb(skb);
f0ad0860 447 return err;
e40dbc51 448 }
4feb88e5 449
1da177e4 450 read_lock(&mrt_lock);
cf3677ae
PE
451 dev->stats.tx_bytes += skb->len;
452 dev->stats.tx_packets++;
0c12295a 453 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
454 read_unlock(&mrt_lock);
455 kfree_skb(skb);
6ed10654 456 return NETDEV_TX_OK;
1da177e4
LT
457}
458
007c3838
SH
459static const struct net_device_ops reg_vif_netdev_ops = {
460 .ndo_start_xmit = reg_vif_xmit,
461};
462
1da177e4
LT
463static void reg_vif_setup(struct net_device *dev)
464{
465 dev->type = ARPHRD_PIMREG;
46f25dff 466 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 467 dev->flags = IFF_NOARP;
007c3838 468 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 469 dev->destructor = free_netdev;
403dbb97 470 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
471}
472
f0ad0860 473static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
1da177e4
LT
474{
475 struct net_device *dev;
476 struct in_device *in_dev;
f0ad0860 477 char name[IFNAMSIZ];
1da177e4 478
f0ad0860
PM
479 if (mrt->id == RT_TABLE_DEFAULT)
480 sprintf(name, "pimreg");
481 else
482 sprintf(name, "pimreg%u", mrt->id);
1da177e4 483
f0ad0860 484 dev = alloc_netdev(0, name, reg_vif_setup);
1da177e4
LT
485
486 if (dev == NULL)
487 return NULL;
488
403dbb97
TG
489 dev_net_set(dev, net);
490
1da177e4
LT
491 if (register_netdevice(dev)) {
492 free_netdev(dev);
493 return NULL;
494 }
495 dev->iflink = 0;
496
71e27da9
HX
497 rcu_read_lock();
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
499 rcu_read_unlock();
1da177e4 500 goto failure;
71e27da9 501 }
1da177e4 502
71e27da9
HX
503 ipv4_devconf_setall(in_dev);
504 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
505 rcu_read_unlock();
1da177e4
LT
506
507 if (dev_open(dev))
508 goto failure;
509
7dc00c82
WC
510 dev_hold(dev);
511
1da177e4
LT
512 return dev;
513
514failure:
515 /* allow the register to be completed before unregistering. */
516 rtnl_unlock();
517 rtnl_lock();
518
519 unregister_netdevice(dev);
520 return NULL;
521}
522#endif
523
524/*
525 * Delete a VIF entry
7dc00c82 526 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 527 */
e905a9ed 528
0c12295a 529static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 530 struct list_head *head)
1da177e4
LT
531{
532 struct vif_device *v;
533 struct net_device *dev;
534 struct in_device *in_dev;
535
0c12295a 536 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
537 return -EADDRNOTAVAIL;
538
0c12295a 539 v = &mrt->vif_table[vifi];
1da177e4
LT
540
541 write_lock_bh(&mrt_lock);
542 dev = v->dev;
543 v->dev = NULL;
544
545 if (!dev) {
546 write_unlock_bh(&mrt_lock);
547 return -EADDRNOTAVAIL;
548 }
549
550#ifdef CONFIG_IP_PIMSM
0c12295a
PM
551 if (vifi == mrt->mroute_reg_vif_num)
552 mrt->mroute_reg_vif_num = -1;
1da177e4
LT
553#endif
554
0c12295a 555 if (vifi+1 == mrt->maxvif) {
1da177e4
LT
556 int tmp;
557 for (tmp=vifi-1; tmp>=0; tmp--) {
0c12295a 558 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
559 break;
560 }
0c12295a 561 mrt->maxvif = tmp+1;
1da177e4
LT
562 }
563
564 write_unlock_bh(&mrt_lock);
565
566 dev_set_allmulti(dev, -1);
567
e5ed6399 568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
570 ip_rt_multicast_event(in_dev);
571 }
572
7dc00c82 573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 574 unregister_netdevice_queue(dev, head);
1da177e4
LT
575
576 dev_put(dev);
577 return 0;
578}
579
5c0a66f5
BT
580static inline void ipmr_cache_free(struct mfc_cache *c)
581{
5c0a66f5
BT
582 kmem_cache_free(mrt_cachep, c);
583}
584
1da177e4
LT
585/* Destroy an unresolved cache entry, killing queued skbs
586 and reporting error to netlink readers.
587 */
588
0c12295a 589static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 590{
8de53dfb 591 struct net *net = read_pnet(&mrt->net);
1da177e4 592 struct sk_buff *skb;
9ef1d4c7 593 struct nlmsgerr *e;
1da177e4 594
0c12295a 595 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 596
c354e124 597 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 598 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
599 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
600 nlh->nlmsg_type = NLMSG_ERROR;
601 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
602 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
603 e = NLMSG_DATA(nlh);
604 e->error = -ETIMEDOUT;
605 memset(&e->msg, 0, sizeof(e->msg));
2942e900 606
4feb88e5 607 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
608 } else
609 kfree_skb(skb);
610 }
611
5c0a66f5 612 ipmr_cache_free(c);
1da177e4
LT
613}
614
615
e258beb2 616/* Timer process for the unresolved queue. */
1da177e4 617
e258beb2 618static void ipmr_expire_process(unsigned long arg)
1da177e4 619{
0c12295a 620 struct mr_table *mrt = (struct mr_table *)arg;
1da177e4
LT
621 unsigned long now;
622 unsigned long expires;
862465f2 623 struct mfc_cache *c, *next;
1da177e4
LT
624
625 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 626 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
627 return;
628 }
629
0c12295a 630 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
631 goto out;
632
633 now = jiffies;
634 expires = 10*HZ;
1da177e4 635
0c12295a 636 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
637 if (time_after(c->mfc_un.unres.expires, now)) {
638 unsigned long interval = c->mfc_un.unres.expires - now;
639 if (interval < expires)
640 expires = interval;
1da177e4
LT
641 continue;
642 }
643
862465f2 644 list_del(&c->list);
0c12295a 645 ipmr_destroy_unres(mrt, c);
1da177e4
LT
646 }
647
0c12295a
PM
648 if (!list_empty(&mrt->mfc_unres_queue))
649 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
650
651out:
652 spin_unlock(&mfc_unres_lock);
653}
654
655/* Fill oifs list. It is called under write locked mrt_lock. */
656
0c12295a 657static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 658 unsigned char *ttls)
1da177e4
LT
659{
660 int vifi;
661
662 cache->mfc_un.res.minvif = MAXVIFS;
663 cache->mfc_un.res.maxvif = 0;
664 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
665
0c12295a
PM
666 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
667 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 668 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
669 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
670 if (cache->mfc_un.res.minvif > vifi)
671 cache->mfc_un.res.minvif = vifi;
672 if (cache->mfc_un.res.maxvif <= vifi)
673 cache->mfc_un.res.maxvif = vifi + 1;
674 }
675 }
676}
677
0c12295a
PM
678static int vif_add(struct net *net, struct mr_table *mrt,
679 struct vifctl *vifc, int mrtsock)
1da177e4
LT
680{
681 int vifi = vifc->vifc_vifi;
0c12295a 682 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
683 struct net_device *dev;
684 struct in_device *in_dev;
d607032d 685 int err;
1da177e4
LT
686
687 /* Is vif busy ? */
0c12295a 688 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
689 return -EADDRINUSE;
690
691 switch (vifc->vifc_flags) {
692#ifdef CONFIG_IP_PIMSM
693 case VIFF_REGISTER:
694 /*
695 * Special Purpose VIF in PIM
696 * All the packets will be sent to the daemon
697 */
0c12295a 698 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 699 return -EADDRINUSE;
f0ad0860 700 dev = ipmr_reg_vif(net, mrt);
1da177e4
LT
701 if (!dev)
702 return -ENOBUFS;
d607032d
WC
703 err = dev_set_allmulti(dev, 1);
704 if (err) {
705 unregister_netdevice(dev);
7dc00c82 706 dev_put(dev);
d607032d
WC
707 return err;
708 }
1da177e4
LT
709 break;
710#endif
e905a9ed 711 case VIFF_TUNNEL:
4feb88e5 712 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
713 if (!dev)
714 return -ENOBUFS;
d607032d
WC
715 err = dev_set_allmulti(dev, 1);
716 if (err) {
717 ipmr_del_tunnel(dev, vifc);
7dc00c82 718 dev_put(dev);
d607032d
WC
719 return err;
720 }
1da177e4 721 break;
ee5e81f0
I
722
723 case VIFF_USE_IFINDEX:
1da177e4 724 case 0:
ee5e81f0
I
725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
727 if (dev && dev->ip_ptr == NULL) {
728 dev_put(dev);
729 return -EADDRNOTAVAIL;
730 }
731 } else
732 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
733
1da177e4
LT
734 if (!dev)
735 return -EADDRNOTAVAIL;
d607032d 736 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
737 if (err) {
738 dev_put(dev);
d607032d 739 return err;
7dc00c82 740 }
1da177e4
LT
741 break;
742 default:
743 return -EINVAL;
744 }
745
d0490cfd
DC
746 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
747 dev_put(dev);
1da177e4 748 return -EADDRNOTAVAIL;
d0490cfd 749 }
42f811b8 750 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
751 ip_rt_multicast_event(in_dev);
752
753 /*
754 * Fill in the VIF structures
755 */
c354e124
JK
756 v->rate_limit = vifc->vifc_rate_limit;
757 v->local = vifc->vifc_lcl_addr.s_addr;
758 v->remote = vifc->vifc_rmt_addr.s_addr;
759 v->flags = vifc->vifc_flags;
1da177e4
LT
760 if (!mrtsock)
761 v->flags |= VIFF_STATIC;
c354e124 762 v->threshold = vifc->vifc_threshold;
1da177e4
LT
763 v->bytes_in = 0;
764 v->bytes_out = 0;
765 v->pkt_in = 0;
766 v->pkt_out = 0;
767 v->link = dev->ifindex;
768 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
769 v->link = dev->iflink;
770
771 /* And finish update writing critical data */
772 write_lock_bh(&mrt_lock);
c354e124 773 v->dev = dev;
1da177e4
LT
774#ifdef CONFIG_IP_PIMSM
775 if (v->flags&VIFF_REGISTER)
0c12295a 776 mrt->mroute_reg_vif_num = vifi;
1da177e4 777#endif
0c12295a
PM
778 if (vifi+1 > mrt->maxvif)
779 mrt->maxvif = vifi+1;
1da177e4
LT
780 write_unlock_bh(&mrt_lock);
781 return 0;
782}
783
0c12295a 784static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
785 __be32 origin,
786 __be32 mcastgrp)
1da177e4 787{
c354e124 788 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
789 struct mfc_cache *c;
790
0c12295a 791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
862465f2
PM
792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
793 return c;
1da177e4 794 }
862465f2 795 return NULL;
1da177e4
LT
796}
797
798/*
799 * Allocate a multicast cache entry
800 */
d658f8a0 801static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 802{
c354e124
JK
803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
804 if (c == NULL)
1da177e4 805 return NULL;
1da177e4
LT
806 c->mfc_un.res.minvif = MAXVIFS;
807 return c;
808}
809
d658f8a0 810static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 811{
c354e124
JK
812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
813 if (c == NULL)
1da177e4 814 return NULL;
1da177e4
LT
815 skb_queue_head_init(&c->mfc_un.unres.unresolved);
816 c->mfc_un.unres.expires = jiffies + 10*HZ;
817 return c;
818}
819
820/*
821 * A cache entry has gone into a resolved state from queued
822 */
e905a9ed 823
0c12295a
PM
824static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
825 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
826{
827 struct sk_buff *skb;
9ef1d4c7 828 struct nlmsgerr *e;
1da177e4
LT
829
830 /*
831 * Play the pending entries through our router
832 */
833
c354e124 834 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 835 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
837
cb6a4e46 838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
839 nlh->nlmsg_len = (skb_tail_pointer(skb) -
840 (u8 *)nlh);
1da177e4
LT
841 } else {
842 nlh->nlmsg_type = NLMSG_ERROR;
843 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
844 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
845 e = NLMSG_DATA(nlh);
846 e->error = -EMSGSIZE;
847 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 848 }
2942e900 849
d658f8a0 850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 851 } else
0c12295a 852 ip_mr_forward(net, mrt, skb, c, 0);
1da177e4
LT
853 }
854}
855
856/*
857 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
858 * expects the following bizarre scheme.
859 *
860 * Called under mrt_lock.
861 */
e905a9ed 862
0c12295a 863static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 864 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
865{
866 struct sk_buff *skb;
c9bdd4b5 867 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
868 struct igmphdr *igmp;
869 struct igmpmsg *msg;
870 int ret;
871
872#ifdef CONFIG_IP_PIMSM
873 if (assert == IGMPMSG_WHOLEPKT)
874 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
875 else
876#endif
877 skb = alloc_skb(128, GFP_ATOMIC);
878
132adf54 879 if (!skb)
1da177e4
LT
880 return -ENOBUFS;
881
882#ifdef CONFIG_IP_PIMSM
883 if (assert == IGMPMSG_WHOLEPKT) {
884 /* Ugly, but we have no choice with this interface.
885 Duplicate old header, fix ihl, length etc.
886 And all this only to mangle msg->im_msgtype and
887 to set msg->im_mbz to "mbz" :-)
888 */
878c8145
ACM
889 skb_push(skb, sizeof(struct iphdr));
890 skb_reset_network_header(skb);
badff6d0 891 skb_reset_transport_header(skb);
0272ffc4 892 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 893 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
894 msg->im_msgtype = IGMPMSG_WHOLEPKT;
895 msg->im_mbz = 0;
0c12295a 896 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
897 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
898 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
899 sizeof(struct iphdr));
e905a9ed 900 } else
1da177e4 901#endif
e905a9ed
YH
902 {
903
1da177e4
LT
904 /*
905 * Copy the IP header
906 */
907
27a884dc 908 skb->network_header = skb->tail;
ddc7b8e3 909 skb_put(skb, ihl);
27d7ff46 910 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
911 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
912 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 913 msg->im_vif = vifi;
adf30907 914 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
915
916 /*
917 * Add our header
918 */
919
c354e124 920 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
921 igmp->type =
922 msg->im_msgtype = assert;
923 igmp->code = 0;
eddc9ec5 924 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 925 skb->transport_header = skb->network_header;
e905a9ed 926 }
1da177e4 927
0c12295a 928 if (mrt->mroute_sk == NULL) {
1da177e4
LT
929 kfree_skb(skb);
930 return -EINVAL;
931 }
932
933 /*
934 * Deliver to mrouted
935 */
0c12295a 936 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
70a269e6 937 if (ret < 0) {
1da177e4
LT
938 if (net_ratelimit())
939 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
940 kfree_skb(skb);
941 }
942
943 return ret;
944}
945
946/*
947 * Queue a packet for resolution. It gets locked cache entry!
948 */
e905a9ed 949
1da177e4 950static int
0c12295a 951ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1da177e4 952{
862465f2 953 bool found = false;
1da177e4
LT
954 int err;
955 struct mfc_cache *c;
eddc9ec5 956 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
957
958 spin_lock_bh(&mfc_unres_lock);
0c12295a 959 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 960 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
961 c->mfc_origin == iph->saddr) {
962 found = true;
1da177e4 963 break;
862465f2 964 }
1da177e4
LT
965 }
966
862465f2 967 if (!found) {
1da177e4
LT
968 /*
969 * Create a new entry if allowable
970 */
971
0c12295a 972 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 973 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
974 spin_unlock_bh(&mfc_unres_lock);
975
976 kfree_skb(skb);
977 return -ENOBUFS;
978 }
979
980 /*
981 * Fill in the new cache entry
982 */
eddc9ec5
ACM
983 c->mfc_parent = -1;
984 c->mfc_origin = iph->saddr;
985 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
986
987 /*
988 * Reflect first query at mrouted.
989 */
0c12295a 990 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 991 if (err < 0) {
e905a9ed 992 /* If the report failed throw the cache entry
1da177e4
LT
993 out - Brad Parker
994 */
995 spin_unlock_bh(&mfc_unres_lock);
996
5c0a66f5 997 ipmr_cache_free(c);
1da177e4
LT
998 kfree_skb(skb);
999 return err;
1000 }
1001
0c12295a
PM
1002 atomic_inc(&mrt->cache_resolve_queue_len);
1003 list_add(&c->list, &mrt->mfc_unres_queue);
1da177e4 1004
278554bd
DM
1005 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1006 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
1007 }
1008
1009 /*
1010 * See if we can append the packet
1011 */
1012 if (c->mfc_un.unres.unresolved.qlen>3) {
1013 kfree_skb(skb);
1014 err = -ENOBUFS;
1015 } else {
c354e124 1016 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
1017 err = 0;
1018 }
1019
1020 spin_unlock_bh(&mfc_unres_lock);
1021 return err;
1022}
1023
1024/*
1025 * MFC cache manipulation by user space mroute daemon
1026 */
1027
0c12295a 1028static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1da177e4
LT
1029{
1030 int line;
862465f2 1031 struct mfc_cache *c, *next;
1da177e4 1032
c354e124 1033 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1034
0c12295a 1035 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1da177e4
LT
1036 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1037 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1038 write_lock_bh(&mrt_lock);
862465f2 1039 list_del(&c->list);
1da177e4
LT
1040 write_unlock_bh(&mrt_lock);
1041
5c0a66f5 1042 ipmr_cache_free(c);
1da177e4
LT
1043 return 0;
1044 }
1045 }
1046 return -ENOENT;
1047}
1048
0c12295a
PM
1049static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1050 struct mfcctl *mfc, int mrtsock)
1da177e4 1051{
862465f2 1052 bool found = false;
1da177e4 1053 int line;
862465f2 1054 struct mfc_cache *uc, *c;
1da177e4 1055
a50436f2
PM
1056 if (mfc->mfcc_parent >= MAXVIFS)
1057 return -ENFILE;
1058
c354e124 1059 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1060
0c12295a 1061 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1da177e4 1062 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
1063 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1064 found = true;
1da177e4 1065 break;
862465f2 1066 }
1da177e4
LT
1067 }
1068
862465f2 1069 if (found) {
1da177e4
LT
1070 write_lock_bh(&mrt_lock);
1071 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1072 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1073 if (!mrtsock)
1074 c->mfc_flags |= MFC_STATIC;
1075 write_unlock_bh(&mrt_lock);
1076 return 0;
1077 }
1078
f97c1e0c 1079 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
1080 return -EINVAL;
1081
d658f8a0 1082 c = ipmr_cache_alloc();
c354e124 1083 if (c == NULL)
1da177e4
LT
1084 return -ENOMEM;
1085
c354e124
JK
1086 c->mfc_origin = mfc->mfcc_origin.s_addr;
1087 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1088 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1089 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1090 if (!mrtsock)
1091 c->mfc_flags |= MFC_STATIC;
1092
1093 write_lock_bh(&mrt_lock);
0c12295a 1094 list_add(&c->list, &mrt->mfc_cache_array[line]);
1da177e4
LT
1095 write_unlock_bh(&mrt_lock);
1096
1097 /*
1098 * Check to see if we resolved a queued list. If so we
1099 * need to send on the frames and tidy up.
1100 */
b0ebb739 1101 found = false;
1da177e4 1102 spin_lock_bh(&mfc_unres_lock);
0c12295a 1103 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 1104 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 1105 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 1106 list_del(&uc->list);
0c12295a 1107 atomic_dec(&mrt->cache_resolve_queue_len);
b0ebb739 1108 found = true;
1da177e4
LT
1109 break;
1110 }
1111 }
0c12295a
PM
1112 if (list_empty(&mrt->mfc_unres_queue))
1113 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
1114 spin_unlock_bh(&mfc_unres_lock);
1115
b0ebb739 1116 if (found) {
0c12295a 1117 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 1118 ipmr_cache_free(uc);
1da177e4
LT
1119 }
1120 return 0;
1121}
1122
1123/*
1124 * Close the multicast socket, and clear the vif tables etc
1125 */
e905a9ed 1126
0c12295a 1127static void mroute_clean_tables(struct mr_table *mrt)
1da177e4
LT
1128{
1129 int i;
d17fa6fa 1130 LIST_HEAD(list);
862465f2 1131 struct mfc_cache *c, *next;
e905a9ed 1132
1da177e4
LT
1133 /*
1134 * Shut down all active vif entries
1135 */
0c12295a
PM
1136 for (i = 0; i < mrt->maxvif; i++) {
1137 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1138 vif_delete(mrt, i, 0, &list);
1da177e4 1139 }
d17fa6fa 1140 unregister_netdevice_many(&list);
1da177e4
LT
1141
1142 /*
1143 * Wipe the cache
1144 */
862465f2 1145 for (i = 0; i < MFC_LINES; i++) {
0c12295a 1146 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
862465f2 1147 if (c->mfc_flags&MFC_STATIC)
1da177e4 1148 continue;
1da177e4 1149 write_lock_bh(&mrt_lock);
862465f2 1150 list_del(&c->list);
1da177e4
LT
1151 write_unlock_bh(&mrt_lock);
1152
5c0a66f5 1153 ipmr_cache_free(c);
1da177e4
LT
1154 }
1155 }
1156
0c12295a 1157 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 1158 spin_lock_bh(&mfc_unres_lock);
0c12295a 1159 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
862465f2 1160 list_del(&c->list);
0c12295a 1161 ipmr_destroy_unres(mrt, c);
1da177e4
LT
1162 }
1163 spin_unlock_bh(&mfc_unres_lock);
1164 }
1165}
1166
1167static void mrtsock_destruct(struct sock *sk)
1168{
4feb88e5 1169 struct net *net = sock_net(sk);
f0ad0860 1170 struct mr_table *mrt;
4feb88e5 1171
1da177e4 1172 rtnl_lock();
f0ad0860
PM
1173 ipmr_for_each_table(mrt, net) {
1174 if (sk == mrt->mroute_sk) {
1175 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4 1176
f0ad0860
PM
1177 write_lock_bh(&mrt_lock);
1178 mrt->mroute_sk = NULL;
1179 write_unlock_bh(&mrt_lock);
1da177e4 1180
f0ad0860
PM
1181 mroute_clean_tables(mrt);
1182 }
1da177e4
LT
1183 }
1184 rtnl_unlock();
1185}
1186
1187/*
1188 * Socket options and virtual interface manipulation. The whole
1189 * virtual interface system is a complete heap, but unfortunately
1190 * that's how BSD mrouted happens to think. Maybe one day with a proper
1191 * MOSPF/PIM router set up we can clean this up.
1192 */
e905a9ed 1193
b7058842 1194int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1195{
1196 int ret;
1197 struct vifctl vif;
1198 struct mfcctl mfc;
4feb88e5 1199 struct net *net = sock_net(sk);
f0ad0860
PM
1200 struct mr_table *mrt;
1201
1202 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1203 if (mrt == NULL)
1204 return -ENOENT;
e905a9ed 1205
132adf54 1206 if (optname != MRT_INIT) {
0c12295a 1207 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
1208 return -EACCES;
1209 }
1210
132adf54
SH
1211 switch (optname) {
1212 case MRT_INIT:
1213 if (sk->sk_type != SOCK_RAW ||
c720c7e8 1214 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 1215 return -EOPNOTSUPP;
c354e124 1216 if (optlen != sizeof(int))
132adf54 1217 return -ENOPROTOOPT;
1da177e4 1218
132adf54 1219 rtnl_lock();
0c12295a 1220 if (mrt->mroute_sk) {
1da177e4 1221 rtnl_unlock();
132adf54
SH
1222 return -EADDRINUSE;
1223 }
1224
1225 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226 if (ret == 0) {
1227 write_lock_bh(&mrt_lock);
0c12295a 1228 mrt->mroute_sk = sk;
132adf54
SH
1229 write_unlock_bh(&mrt_lock);
1230
4feb88e5 1231 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
1232 }
1233 rtnl_unlock();
1234 return ret;
1235 case MRT_DONE:
0c12295a 1236 if (sk != mrt->mroute_sk)
132adf54
SH
1237 return -EACCES;
1238 return ip_ra_control(sk, 0, NULL);
1239 case MRT_ADD_VIF:
1240 case MRT_DEL_VIF:
c354e124 1241 if (optlen != sizeof(vif))
132adf54 1242 return -EINVAL;
c354e124 1243 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
1244 return -EFAULT;
1245 if (vif.vifc_vifi >= MAXVIFS)
1246 return -ENFILE;
1247 rtnl_lock();
c354e124 1248 if (optname == MRT_ADD_VIF) {
0c12295a 1249 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
132adf54 1250 } else {
0c12295a 1251 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54
SH
1252 }
1253 rtnl_unlock();
1254 return ret;
1da177e4
LT
1255
1256 /*
1257 * Manipulate the forwarding caches. These live
1258 * in a sort of kernel/user symbiosis.
1259 */
132adf54
SH
1260 case MRT_ADD_MFC:
1261 case MRT_DEL_MFC:
c354e124 1262 if (optlen != sizeof(mfc))
132adf54 1263 return -EINVAL;
c354e124 1264 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1265 return -EFAULT;
1266 rtnl_lock();
c354e124 1267 if (optname == MRT_DEL_MFC)
0c12295a 1268 ret = ipmr_mfc_delete(mrt, &mfc);
132adf54 1269 else
0c12295a 1270 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
132adf54
SH
1271 rtnl_unlock();
1272 return ret;
1da177e4
LT
1273 /*
1274 * Control PIM assert.
1275 */
132adf54
SH
1276 case MRT_ASSERT:
1277 {
1278 int v;
1279 if (get_user(v,(int __user *)optval))
1280 return -EFAULT;
0c12295a 1281 mrt->mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1282 return 0;
1283 }
1da177e4 1284#ifdef CONFIG_IP_PIMSM
132adf54
SH
1285 case MRT_PIM:
1286 {
ba93ef74
SH
1287 int v;
1288
132adf54
SH
1289 if (get_user(v,(int __user *)optval))
1290 return -EFAULT;
ba93ef74
SH
1291 v = (v) ? 1 : 0;
1292
132adf54
SH
1293 rtnl_lock();
1294 ret = 0;
0c12295a
PM
1295 if (v != mrt->mroute_do_pim) {
1296 mrt->mroute_do_pim = v;
1297 mrt->mroute_do_assert = v;
1da177e4 1298 }
132adf54
SH
1299 rtnl_unlock();
1300 return ret;
1301 }
f0ad0860
PM
1302#endif
1303#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1304 case MRT_TABLE:
1305 {
1306 u32 v;
1307
1308 if (optlen != sizeof(u32))
1309 return -EINVAL;
1310 if (get_user(v, (u32 __user *)optval))
1311 return -EFAULT;
1312 if (sk == mrt->mroute_sk)
1313 return -EBUSY;
1314
1315 rtnl_lock();
1316 ret = 0;
1317 if (!ipmr_new_table(net, v))
1318 ret = -ENOMEM;
1319 raw_sk(sk)->ipmr_table = v;
1320 rtnl_unlock();
1321 return ret;
1322 }
1da177e4 1323#endif
132adf54
SH
1324 /*
1325 * Spurious command, or MRT_VERSION which you cannot
1326 * set.
1327 */
1328 default:
1329 return -ENOPROTOOPT;
1da177e4
LT
1330 }
1331}
1332
1333/*
1334 * Getsock opt support for the multicast routing system.
1335 */
e905a9ed 1336
c354e124 1337int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1338{
1339 int olr;
1340 int val;
4feb88e5 1341 struct net *net = sock_net(sk);
f0ad0860
PM
1342 struct mr_table *mrt;
1343
1344 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1345 if (mrt == NULL)
1346 return -ENOENT;
1da177e4 1347
c354e124 1348 if (optname != MRT_VERSION &&
1da177e4
LT
1349#ifdef CONFIG_IP_PIMSM
1350 optname!=MRT_PIM &&
1351#endif
1352 optname!=MRT_ASSERT)
1353 return -ENOPROTOOPT;
1354
1355 if (get_user(olr, optlen))
1356 return -EFAULT;
1357
1358 olr = min_t(unsigned int, olr, sizeof(int));
1359 if (olr < 0)
1360 return -EINVAL;
e905a9ed 1361
c354e124 1362 if (put_user(olr, optlen))
1da177e4 1363 return -EFAULT;
c354e124
JK
1364 if (optname == MRT_VERSION)
1365 val = 0x0305;
1da177e4 1366#ifdef CONFIG_IP_PIMSM
c354e124 1367 else if (optname == MRT_PIM)
0c12295a 1368 val = mrt->mroute_do_pim;
1da177e4
LT
1369#endif
1370 else
0c12295a 1371 val = mrt->mroute_do_assert;
c354e124 1372 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1373 return -EFAULT;
1374 return 0;
1375}
1376
1377/*
1378 * The IP multicast ioctl support routines.
1379 */
e905a9ed 1380
1da177e4
LT
1381int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1382{
1383 struct sioc_sg_req sr;
1384 struct sioc_vif_req vr;
1385 struct vif_device *vif;
1386 struct mfc_cache *c;
4feb88e5 1387 struct net *net = sock_net(sk);
f0ad0860
PM
1388 struct mr_table *mrt;
1389
1390 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1391 if (mrt == NULL)
1392 return -ENOENT;
e905a9ed 1393
132adf54
SH
1394 switch (cmd) {
1395 case SIOCGETVIFCNT:
c354e124 1396 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1397 return -EFAULT;
0c12295a 1398 if (vr.vifi >= mrt->maxvif)
132adf54
SH
1399 return -EINVAL;
1400 read_lock(&mrt_lock);
0c12295a
PM
1401 vif = &mrt->vif_table[vr.vifi];
1402 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1403 vr.icount = vif->pkt_in;
1404 vr.ocount = vif->pkt_out;
1405 vr.ibytes = vif->bytes_in;
1406 vr.obytes = vif->bytes_out;
1da177e4 1407 read_unlock(&mrt_lock);
1da177e4 1408
c354e124 1409 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1410 return -EFAULT;
1411 return 0;
1412 }
1413 read_unlock(&mrt_lock);
1414 return -EADDRNOTAVAIL;
1415 case SIOCGETSGCNT:
c354e124 1416 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1417 return -EFAULT;
1418
1419 read_lock(&mrt_lock);
0c12295a 1420 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1421 if (c) {
1422 sr.pktcnt = c->mfc_un.res.pkt;
1423 sr.bytecnt = c->mfc_un.res.bytes;
1424 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1425 read_unlock(&mrt_lock);
132adf54 1426
c354e124 1427 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1428 return -EFAULT;
1429 return 0;
1430 }
1431 read_unlock(&mrt_lock);
1432 return -EADDRNOTAVAIL;
1433 default:
1434 return -ENOIOCTLCMD;
1da177e4
LT
1435 }
1436}
1437
1438
1439static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1440{
e9dc8653 1441 struct net_device *dev = ptr;
4feb88e5 1442 struct net *net = dev_net(dev);
f0ad0860 1443 struct mr_table *mrt;
1da177e4
LT
1444 struct vif_device *v;
1445 int ct;
d17fa6fa 1446 LIST_HEAD(list);
e9dc8653 1447
1da177e4
LT
1448 if (event != NETDEV_UNREGISTER)
1449 return NOTIFY_DONE;
f0ad0860
PM
1450
1451 ipmr_for_each_table(mrt, net) {
1452 v = &mrt->vif_table[0];
1453 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1454 if (v->dev == dev)
1455 vif_delete(mrt, ct, 1, &list);
1456 }
1da177e4 1457 }
d17fa6fa 1458 unregister_netdevice_many(&list);
1da177e4
LT
1459 return NOTIFY_DONE;
1460}
1461
1462
c354e124 1463static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1464 .notifier_call = ipmr_device_event,
1465};
1466
1467/*
1468 * Encapsulate a packet by attaching a valid IPIP header to it.
1469 * This avoids tunnel drivers and other mess and gives us the speed so
1470 * important for multicast video.
1471 */
e905a9ed 1472
114c7844 1473static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1474{
8856dfa3 1475 struct iphdr *iph;
eddc9ec5 1476 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1477
1478 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1479 skb->transport_header = skb->network_header;
8856dfa3 1480 skb_reset_network_header(skb);
eddc9ec5 1481 iph = ip_hdr(skb);
1da177e4
LT
1482
1483 iph->version = 4;
e023dd64
ACM
1484 iph->tos = old_iph->tos;
1485 iph->ttl = old_iph->ttl;
1da177e4
LT
1486 iph->frag_off = 0;
1487 iph->daddr = daddr;
1488 iph->saddr = saddr;
1489 iph->protocol = IPPROTO_IPIP;
1490 iph->ihl = 5;
1491 iph->tot_len = htons(skb->len);
adf30907 1492 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1493 ip_send_check(iph);
1494
1da177e4
LT
1495 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1496 nf_reset(skb);
1497}
1498
1499static inline int ipmr_forward_finish(struct sk_buff *skb)
1500{
1501 struct ip_options * opt = &(IPCB(skb)->opt);
1502
adf30907 1503 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1504
1505 if (unlikely(opt->optlen))
1506 ip_forward_options(skb);
1507
1508 return dst_output(skb);
1509}
1510
1511/*
1512 * Processing handlers for ipmr_forward
1513 */
1514
0c12295a
PM
1515static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1516 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1da177e4 1517{
eddc9ec5 1518 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1519 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1520 struct net_device *dev;
1521 struct rtable *rt;
1522 int encap = 0;
1523
1524 if (vif->dev == NULL)
1525 goto out_free;
1526
1527#ifdef CONFIG_IP_PIMSM
1528 if (vif->flags & VIFF_REGISTER) {
1529 vif->pkt_out++;
c354e124 1530 vif->bytes_out += skb->len;
cf3677ae
PE
1531 vif->dev->stats.tx_bytes += skb->len;
1532 vif->dev->stats.tx_packets++;
0c12295a 1533 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1534 goto out_free;
1da177e4
LT
1535 }
1536#endif
1537
1538 if (vif->flags&VIFF_TUNNEL) {
1539 struct flowi fl = { .oif = vif->link,
1540 .nl_u = { .ip4_u =
1541 { .daddr = vif->remote,
1542 .saddr = vif->local,
1543 .tos = RT_TOS(iph->tos) } },
1544 .proto = IPPROTO_IPIP };
4feb88e5 1545 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1546 goto out_free;
1547 encap = sizeof(struct iphdr);
1548 } else {
1549 struct flowi fl = { .oif = vif->link,
1550 .nl_u = { .ip4_u =
1551 { .daddr = iph->daddr,
1552 .tos = RT_TOS(iph->tos) } },
1553 .proto = IPPROTO_IPIP };
4feb88e5 1554 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1555 goto out_free;
1556 }
1557
d8d1f30b 1558 dev = rt->dst.dev;
1da177e4 1559
d8d1f30b 1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1da177e4
LT
1561 /* Do not fragment multicasts. Alas, IPv4 does not
1562 allow to send ICMP, so that packets will disappear
1563 to blackhole.
1564 */
1565
7c73a6fa 1566 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1567 ip_rt_put(rt);
1568 goto out_free;
1569 }
1570
d8d1f30b 1571 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1da177e4
LT
1572
1573 if (skb_cow(skb, encap)) {
e905a9ed 1574 ip_rt_put(rt);
1da177e4
LT
1575 goto out_free;
1576 }
1577
1578 vif->pkt_out++;
c354e124 1579 vif->bytes_out += skb->len;
1da177e4 1580
adf30907 1581 skb_dst_drop(skb);
d8d1f30b 1582 skb_dst_set(skb, &rt->dst);
eddc9ec5 1583 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1584
1585 /* FIXME: forward and output firewalls used to be called here.
1586 * What do we do with netfilter? -- RR */
1587 if (vif->flags & VIFF_TUNNEL) {
1588 ip_encap(skb, vif->local, vif->remote);
1589 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1590 vif->dev->stats.tx_packets++;
1591 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1592 }
1593
1594 IPCB(skb)->flags |= IPSKB_FORWARDED;
1595
1596 /*
1597 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1598 * not only before forwarding, but after forwarding on all output
1599 * interfaces. It is clear, if mrouter runs a multicasting
1600 * program, it should receive packets not depending to what interface
1601 * program is joined.
1602 * If we will not make it, the program will have to join on all
1603 * interfaces. On the other hand, multihoming host (or router, but
1604 * not mrouter) cannot join to more than one interface - it will
1605 * result in receiving multiple packets.
1606 */
9bbc768a 1607 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1608 ipmr_forward_finish);
1609 return;
1610
1611out_free:
1612 kfree_skb(skb);
1da177e4
LT
1613}
1614
0c12295a 1615static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1616{
1617 int ct;
0c12295a
PM
1618
1619 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1620 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
1621 break;
1622 }
1623 return ct;
1624}
1625
1626/* "local" means that we should preserve one skb (for local delivery) */
1627
0c12295a
PM
1628static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1629 struct sk_buff *skb, struct mfc_cache *cache,
1630 int local)
1da177e4
LT
1631{
1632 int psend = -1;
1633 int vif, ct;
1634
1635 vif = cache->mfc_parent;
1636 cache->mfc_un.res.pkt++;
1637 cache->mfc_un.res.bytes += skb->len;
1638
1639 /*
1640 * Wrong interface: drop packet and (maybe) send PIM assert.
1641 */
0c12295a 1642 if (mrt->vif_table[vif].dev != skb->dev) {
1da177e4
LT
1643 int true_vifi;
1644
511c3f92 1645 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1646 /* It is our own packet, looped back.
1647 Very complicated situation...
1648
1649 The best workaround until routing daemons will be
1650 fixed is not to redistribute packet, if it was
1651 send through wrong interface. It means, that
1652 multicast applications WILL NOT work for
1653 (S,G), which have default multicast route pointing
1654 to wrong oif. In any case, it is not a good
1655 idea to use multicasting applications on router.
1656 */
1657 goto dont_forward;
1658 }
1659
1660 cache->mfc_un.res.wrong_if++;
0c12295a 1661 true_vifi = ipmr_find_vif(mrt, skb->dev);
1da177e4 1662
0c12295a 1663 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4
LT
1664 /* pimsm uses asserts, when switching from RPT to SPT,
1665 so that we cannot check that packet arrived on an oif.
1666 It is bad, but otherwise we would need to move pretty
1667 large chunk of pimd to kernel. Ough... --ANK
1668 */
0c12295a 1669 (mrt->mroute_do_pim ||
6f9374a9 1670 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1671 time_after(jiffies,
1da177e4
LT
1672 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1673 cache->mfc_un.res.last_assert = jiffies;
0c12295a 1674 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1675 }
1676 goto dont_forward;
1677 }
1678
0c12295a
PM
1679 mrt->vif_table[vif].pkt_in++;
1680 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1681
1682 /*
1683 * Forward the frame
1684 */
1685 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1686 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1687 if (psend != -1) {
1688 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1689 if (skb2)
0c12295a
PM
1690 ipmr_queue_xmit(net, mrt, skb2, cache,
1691 psend);
1da177e4 1692 }
c354e124 1693 psend = ct;
1da177e4
LT
1694 }
1695 }
1696 if (psend != -1) {
1697 if (local) {
1698 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1699 if (skb2)
0c12295a 1700 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1da177e4 1701 } else {
0c12295a 1702 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1da177e4
LT
1703 return 0;
1704 }
1705 }
1706
1707dont_forward:
1708 if (!local)
1709 kfree_skb(skb);
1710 return 0;
1711}
1712
1713
1714/*
1715 * Multicast packets for forwarding arrive here
1716 */
1717
1718int ip_mr_input(struct sk_buff *skb)
1719{
1720 struct mfc_cache *cache;
4feb88e5 1721 struct net *net = dev_net(skb->dev);
511c3f92 1722 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
f0ad0860
PM
1723 struct mr_table *mrt;
1724 int err;
1da177e4
LT
1725
1726 /* Packet is looped back after forward, it should not be
1727 forwarded second time, but still can be delivered locally.
1728 */
1729 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1730 goto dont_forward;
1731
f0ad0860 1732 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
e40dbc51
BG
1733 if (err < 0) {
1734 kfree_skb(skb);
f0ad0860 1735 return err;
e40dbc51 1736 }
f0ad0860 1737
1da177e4
LT
1738 if (!local) {
1739 if (IPCB(skb)->opt.router_alert) {
1740 if (ip_call_ra_chain(skb))
1741 return 0;
eddc9ec5 1742 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1743 /* IGMPv1 (and broken IGMPv2 implementations sort of
1744 Cisco IOS <= 11.2(8)) do not put router alert
1745 option to IGMP packets destined to routable
1746 groups. It is very bad, because it means
1747 that we can forward NO IGMP messages.
1748 */
1749 read_lock(&mrt_lock);
0c12295a 1750 if (mrt->mroute_sk) {
2715bcf9 1751 nf_reset(skb);
0c12295a 1752 raw_rcv(mrt->mroute_sk, skb);
1da177e4
LT
1753 read_unlock(&mrt_lock);
1754 return 0;
1755 }
1756 read_unlock(&mrt_lock);
1757 }
1758 }
1759
1760 read_lock(&mrt_lock);
0c12295a 1761 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1762
1763 /*
1764 * No usable cache entry
1765 */
c354e124 1766 if (cache == NULL) {
1da177e4
LT
1767 int vif;
1768
1769 if (local) {
1770 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1771 ip_local_deliver(skb);
1772 if (skb2 == NULL) {
1773 read_unlock(&mrt_lock);
1774 return -ENOBUFS;
1775 }
1776 skb = skb2;
1777 }
1778
0c12295a 1779 vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 1780 if (vif >= 0) {
0eae88f3 1781 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1da177e4
LT
1782 read_unlock(&mrt_lock);
1783
0eae88f3 1784 return err2;
1da177e4
LT
1785 }
1786 read_unlock(&mrt_lock);
1787 kfree_skb(skb);
1788 return -ENODEV;
1789 }
1790
0c12295a 1791 ip_mr_forward(net, mrt, skb, cache, local);
1da177e4
LT
1792
1793 read_unlock(&mrt_lock);
1794
1795 if (local)
1796 return ip_local_deliver(skb);
1797
1798 return 0;
1799
1800dont_forward:
1801 if (local)
1802 return ip_local_deliver(skb);
1803 kfree_skb(skb);
1804 return 0;
1805}
1806
b1879204 1807#ifdef CONFIG_IP_PIMSM
f0ad0860
PM
1808static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1809 unsigned int pimlen)
1da177e4 1810{
b1879204
IJ
1811 struct net_device *reg_dev = NULL;
1812 struct iphdr *encap;
1da177e4 1813
b1879204 1814 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1815 /*
1816 Check that:
1817 a. packet is really destinted to a multicast group
1818 b. packet is not a NULL-REGISTER
1819 c. packet is not truncated
1820 */
f97c1e0c 1821 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1822 encap->tot_len == 0 ||
b1879204
IJ
1823 ntohs(encap->tot_len) + pimlen > skb->len)
1824 return 1;
1da177e4
LT
1825
1826 read_lock(&mrt_lock);
0c12295a
PM
1827 if (mrt->mroute_reg_vif_num >= 0)
1828 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1da177e4
LT
1829 if (reg_dev)
1830 dev_hold(reg_dev);
1831 read_unlock(&mrt_lock);
1832
e905a9ed 1833 if (reg_dev == NULL)
b1879204 1834 return 1;
1da177e4 1835
b0e380b1 1836 skb->mac_header = skb->network_header;
1da177e4 1837 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1838 skb_reset_network_header(skb);
1da177e4
LT
1839 skb->protocol = htons(ETH_P_IP);
1840 skb->ip_summed = 0;
1841 skb->pkt_type = PACKET_HOST;
d19d56dd
ED
1842
1843 skb_tunnel_rx(skb, reg_dev);
1844
1da177e4
LT
1845 netif_rx(skb);
1846 dev_put(reg_dev);
b1879204 1847
1da177e4 1848 return 0;
b1879204
IJ
1849}
1850#endif
1851
1852#ifdef CONFIG_IP_PIMSM_V1
1853/*
1854 * Handle IGMP messages of PIMv1
1855 */
1856
1857int pim_rcv_v1(struct sk_buff * skb)
1858{
1859 struct igmphdr *pim;
4feb88e5 1860 struct net *net = dev_net(skb->dev);
f0ad0860 1861 struct mr_table *mrt;
b1879204
IJ
1862
1863 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1864 goto drop;
1865
1866 pim = igmp_hdr(skb);
1867
f0ad0860
PM
1868 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1869 goto drop;
1870
0c12295a 1871 if (!mrt->mroute_do_pim ||
b1879204
IJ
1872 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1873 goto drop;
1874
f0ad0860 1875 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1876drop:
1877 kfree_skb(skb);
1878 }
1da177e4
LT
1879 return 0;
1880}
1881#endif
1882
1883#ifdef CONFIG_IP_PIMSM_V2
1884static int pim_rcv(struct sk_buff * skb)
1885{
1886 struct pimreghdr *pim;
f0ad0860
PM
1887 struct net *net = dev_net(skb->dev);
1888 struct mr_table *mrt;
1da177e4 1889
b1879204 1890 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1891 goto drop;
1892
9c70220b 1893 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1894 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1895 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1896 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1897 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1898 goto drop;
1899
f0ad0860
PM
1900 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1901 goto drop;
1902
1903 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1904drop:
1905 kfree_skb(skb);
1906 }
1da177e4
LT
1907 return 0;
1908}
1909#endif
1910
cb6a4e46
PM
1911static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1912 struct mfc_cache *c, struct rtmsg *rtm)
1da177e4
LT
1913{
1914 int ct;
1915 struct rtnexthop *nhp;
27a884dc 1916 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1917 struct rtattr *mp_head;
1918
7438189b 1919 /* If cache is unresolved, don't try to parse IIF and OIF */
ed0f160a 1920 if (c->mfc_parent >= MAXVIFS)
7438189b
ND
1921 return -ENOENT;
1922
0c12295a
PM
1923 if (VIF_EXISTS(mrt, c->mfc_parent))
1924 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1925
c354e124 1926 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1927
1928 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 1929 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1930 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1931 goto rtattr_failure;
c354e124 1932 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1933 nhp->rtnh_flags = 0;
1934 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 1935 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
1936 nhp->rtnh_len = sizeof(*nhp);
1937 }
1938 }
1939 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1940 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1941 rtm->rtm_type = RTN_MULTICAST;
1942 return 1;
1943
1944rtattr_failure:
dc5fc579 1945 nlmsg_trim(skb, b);
1da177e4
LT
1946 return -EMSGSIZE;
1947}
1948
4feb88e5
BT
1949int ipmr_get_route(struct net *net,
1950 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1951{
1952 int err;
f0ad0860 1953 struct mr_table *mrt;
1da177e4 1954 struct mfc_cache *cache;
511c3f92 1955 struct rtable *rt = skb_rtable(skb);
1da177e4 1956
f0ad0860
PM
1957 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1958 if (mrt == NULL)
1959 return -ENOENT;
1960
1da177e4 1961 read_lock(&mrt_lock);
0c12295a 1962 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1da177e4 1963
c354e124 1964 if (cache == NULL) {
72287490 1965 struct sk_buff *skb2;
eddc9ec5 1966 struct iphdr *iph;
1da177e4
LT
1967 struct net_device *dev;
1968 int vif;
1969
1970 if (nowait) {
1971 read_unlock(&mrt_lock);
1972 return -EAGAIN;
1973 }
1974
1975 dev = skb->dev;
0c12295a 1976 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1da177e4
LT
1977 read_unlock(&mrt_lock);
1978 return -ENODEV;
1979 }
72287490
AK
1980 skb2 = skb_clone(skb, GFP_ATOMIC);
1981 if (!skb2) {
1982 read_unlock(&mrt_lock);
1983 return -ENOMEM;
1984 }
1985
e2d1bca7
ACM
1986 skb_push(skb2, sizeof(struct iphdr));
1987 skb_reset_network_header(skb2);
eddc9ec5
ACM
1988 iph = ip_hdr(skb2);
1989 iph->ihl = sizeof(struct iphdr) >> 2;
1990 iph->saddr = rt->rt_src;
1991 iph->daddr = rt->rt_dst;
1992 iph->version = 0;
0c12295a 1993 err = ipmr_cache_unresolved(mrt, vif, skb2);
1da177e4
LT
1994 read_unlock(&mrt_lock);
1995 return err;
1996 }
1997
1998 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1999 cache->mfc_flags |= MFC_NOTIFY;
cb6a4e46 2000 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4
LT
2001 read_unlock(&mrt_lock);
2002 return err;
2003}
2004
cb6a4e46
PM
2005static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2006 u32 pid, u32 seq, struct mfc_cache *c)
2007{
2008 struct nlmsghdr *nlh;
2009 struct rtmsg *rtm;
2010
2011 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2012 if (nlh == NULL)
2013 return -EMSGSIZE;
2014
2015 rtm = nlmsg_data(nlh);
2016 rtm->rtm_family = RTNL_FAMILY_IPMR;
2017 rtm->rtm_dst_len = 32;
2018 rtm->rtm_src_len = 32;
2019 rtm->rtm_tos = 0;
2020 rtm->rtm_table = mrt->id;
2021 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2022 rtm->rtm_type = RTN_MULTICAST;
2023 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2024 rtm->rtm_protocol = RTPROT_UNSPEC;
2025 rtm->rtm_flags = 0;
2026
2027 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2028 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2029
2030 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2031 goto nla_put_failure;
2032
2033 return nlmsg_end(skb, nlh);
2034
2035nla_put_failure:
2036 nlmsg_cancel(skb, nlh);
2037 return -EMSGSIZE;
2038}
2039
2040static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2041{
2042 struct net *net = sock_net(skb->sk);
2043 struct mr_table *mrt;
2044 struct mfc_cache *mfc;
2045 unsigned int t = 0, s_t;
2046 unsigned int h = 0, s_h;
2047 unsigned int e = 0, s_e;
2048
2049 s_t = cb->args[0];
2050 s_h = cb->args[1];
2051 s_e = cb->args[2];
2052
2053 read_lock(&mrt_lock);
2054 ipmr_for_each_table(mrt, net) {
2055 if (t < s_t)
2056 goto next_table;
2057 if (t > s_t)
2058 s_h = 0;
2059 for (h = s_h; h < MFC_LINES; h++) {
2060 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2061 if (e < s_e)
2062 goto next_entry;
2063 if (ipmr_fill_mroute(mrt, skb,
2064 NETLINK_CB(cb->skb).pid,
2065 cb->nlh->nlmsg_seq,
2066 mfc) < 0)
2067 goto done;
2068next_entry:
2069 e++;
2070 }
2071 e = s_e = 0;
2072 }
2073 s_h = 0;
2074next_table:
2075 t++;
2076 }
2077done:
2078 read_unlock(&mrt_lock);
2079
2080 cb->args[2] = e;
2081 cb->args[1] = h;
2082 cb->args[0] = t;
2083
2084 return skb->len;
2085}
2086
e905a9ed 2087#ifdef CONFIG_PROC_FS
1da177e4
LT
2088/*
2089 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2090 */
2091struct ipmr_vif_iter {
f6bb4514 2092 struct seq_net_private p;
f0ad0860 2093 struct mr_table *mrt;
1da177e4
LT
2094 int ct;
2095};
2096
f6bb4514
BT
2097static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2098 struct ipmr_vif_iter *iter,
1da177e4
LT
2099 loff_t pos)
2100{
f0ad0860 2101 struct mr_table *mrt = iter->mrt;
0c12295a
PM
2102
2103 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2104 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2105 continue;
e905a9ed 2106 if (pos-- == 0)
0c12295a 2107 return &mrt->vif_table[iter->ct];
1da177e4
LT
2108 }
2109 return NULL;
2110}
2111
2112static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 2113 __acquires(mrt_lock)
1da177e4 2114{
f0ad0860 2115 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2116 struct net *net = seq_file_net(seq);
f0ad0860
PM
2117 struct mr_table *mrt;
2118
2119 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2120 if (mrt == NULL)
2121 return ERR_PTR(-ENOENT);
2122
2123 iter->mrt = mrt;
f6bb4514 2124
1da177e4 2125 read_lock(&mrt_lock);
f6bb4514 2126 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2127 : SEQ_START_TOKEN;
2128}
2129
2130static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2131{
2132 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2133 struct net *net = seq_file_net(seq);
f0ad0860 2134 struct mr_table *mrt = iter->mrt;
1da177e4
LT
2135
2136 ++*pos;
2137 if (v == SEQ_START_TOKEN)
f6bb4514 2138 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 2139
0c12295a
PM
2140 while (++iter->ct < mrt->maxvif) {
2141 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2142 continue;
0c12295a 2143 return &mrt->vif_table[iter->ct];
1da177e4
LT
2144 }
2145 return NULL;
2146}
2147
2148static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 2149 __releases(mrt_lock)
1da177e4
LT
2150{
2151 read_unlock(&mrt_lock);
2152}
2153
2154static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2155{
f0ad0860
PM
2156 struct ipmr_vif_iter *iter = seq->private;
2157 struct mr_table *mrt = iter->mrt;
f6bb4514 2158
1da177e4 2159 if (v == SEQ_START_TOKEN) {
e905a9ed 2160 seq_puts(seq,
1da177e4
LT
2161 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2162 } else {
2163 const struct vif_device *vif = v;
2164 const char *name = vif->dev ? vif->dev->name : "none";
2165
2166 seq_printf(seq,
2167 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 2168 vif - mrt->vif_table,
e905a9ed 2169 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
2170 vif->bytes_out, vif->pkt_out,
2171 vif->flags, vif->local, vif->remote);
2172 }
2173 return 0;
2174}
2175
f690808e 2176static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
2177 .start = ipmr_vif_seq_start,
2178 .next = ipmr_vif_seq_next,
2179 .stop = ipmr_vif_seq_stop,
2180 .show = ipmr_vif_seq_show,
2181};
2182
2183static int ipmr_vif_open(struct inode *inode, struct file *file)
2184{
f6bb4514
BT
2185 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2186 sizeof(struct ipmr_vif_iter));
1da177e4
LT
2187}
2188
9a32144e 2189static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
2190 .owner = THIS_MODULE,
2191 .open = ipmr_vif_open,
2192 .read = seq_read,
2193 .llseek = seq_lseek,
f6bb4514 2194 .release = seq_release_net,
1da177e4
LT
2195};
2196
2197struct ipmr_mfc_iter {
f6bb4514 2198 struct seq_net_private p;
f0ad0860 2199 struct mr_table *mrt;
862465f2 2200 struct list_head *cache;
1da177e4
LT
2201 int ct;
2202};
2203
2204
f6bb4514
BT
2205static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2206 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 2207{
f0ad0860 2208 struct mr_table *mrt = it->mrt;
1da177e4
LT
2209 struct mfc_cache *mfc;
2210
1da177e4 2211 read_lock(&mrt_lock);
862465f2 2212 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
0c12295a 2213 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2 2214 list_for_each_entry(mfc, it->cache, list)
e905a9ed 2215 if (pos-- == 0)
1da177e4 2216 return mfc;
862465f2 2217 }
1da177e4
LT
2218 read_unlock(&mrt_lock);
2219
1da177e4 2220 spin_lock_bh(&mfc_unres_lock);
0c12295a 2221 it->cache = &mrt->mfc_unres_queue;
862465f2 2222 list_for_each_entry(mfc, it->cache, list)
e258beb2 2223 if (pos-- == 0)
1da177e4
LT
2224 return mfc;
2225 spin_unlock_bh(&mfc_unres_lock);
2226
2227 it->cache = NULL;
2228 return NULL;
2229}
2230
2231
2232static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2233{
2234 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2235 struct net *net = seq_file_net(seq);
f0ad0860 2236 struct mr_table *mrt;
f6bb4514 2237
f0ad0860
PM
2238 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2239 if (mrt == NULL)
2240 return ERR_PTR(-ENOENT);
f6bb4514 2241
f0ad0860 2242 it->mrt = mrt;
1da177e4
LT
2243 it->cache = NULL;
2244 it->ct = 0;
f6bb4514 2245 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2246 : SEQ_START_TOKEN;
2247}
2248
2249static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2250{
2251 struct mfc_cache *mfc = v;
2252 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2253 struct net *net = seq_file_net(seq);
f0ad0860 2254 struct mr_table *mrt = it->mrt;
1da177e4
LT
2255
2256 ++*pos;
2257
2258 if (v == SEQ_START_TOKEN)
f6bb4514 2259 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 2260
862465f2
PM
2261 if (mfc->list.next != it->cache)
2262 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 2263
0c12295a 2264 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
2265 goto end_of_list;
2266
0c12295a 2267 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1da177e4
LT
2268
2269 while (++it->ct < MFC_LINES) {
0c12295a 2270 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2
PM
2271 if (list_empty(it->cache))
2272 continue;
2273 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2274 }
2275
2276 /* exhausted cache_array, show unresolved */
2277 read_unlock(&mrt_lock);
0c12295a 2278 it->cache = &mrt->mfc_unres_queue;
1da177e4 2279 it->ct = 0;
e905a9ed 2280
1da177e4 2281 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
2282 if (!list_empty(it->cache))
2283 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2284
2285 end_of_list:
2286 spin_unlock_bh(&mfc_unres_lock);
2287 it->cache = NULL;
2288
2289 return NULL;
2290}
2291
2292static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2293{
2294 struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2295 struct mr_table *mrt = it->mrt;
1da177e4 2296
0c12295a 2297 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 2298 spin_unlock_bh(&mfc_unres_lock);
0c12295a 2299 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1da177e4
LT
2300 read_unlock(&mrt_lock);
2301}
2302
2303static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2304{
2305 int n;
2306
2307 if (v == SEQ_START_TOKEN) {
e905a9ed 2308 seq_puts(seq,
1da177e4
LT
2309 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2310 } else {
2311 const struct mfc_cache *mfc = v;
2312 const struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2313 const struct mr_table *mrt = it->mrt;
e905a9ed 2314
0eae88f3
ED
2315 seq_printf(seq, "%08X %08X %-3hd",
2316 (__force u32) mfc->mfc_mcastgrp,
2317 (__force u32) mfc->mfc_origin,
1ea472e2 2318 mfc->mfc_parent);
1da177e4 2319
0c12295a 2320 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
2321 seq_printf(seq, " %8lu %8lu %8lu",
2322 mfc->mfc_un.res.pkt,
2323 mfc->mfc_un.res.bytes,
2324 mfc->mfc_un.res.wrong_if);
132adf54
SH
2325 for (n = mfc->mfc_un.res.minvif;
2326 n < mfc->mfc_un.res.maxvif; n++ ) {
0c12295a 2327 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
2328 mfc->mfc_un.res.ttls[n] < 255)
2329 seq_printf(seq,
e905a9ed 2330 " %2d:%-3d",
1da177e4
LT
2331 n, mfc->mfc_un.res.ttls[n]);
2332 }
1ea472e2
BT
2333 } else {
2334 /* unresolved mfc_caches don't contain
2335 * pkt, bytes and wrong_if values
2336 */
2337 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
2338 }
2339 seq_putc(seq, '\n');
2340 }
2341 return 0;
2342}
2343
f690808e 2344static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
2345 .start = ipmr_mfc_seq_start,
2346 .next = ipmr_mfc_seq_next,
2347 .stop = ipmr_mfc_seq_stop,
2348 .show = ipmr_mfc_seq_show,
2349};
2350
2351static int ipmr_mfc_open(struct inode *inode, struct file *file)
2352{
f6bb4514
BT
2353 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2354 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
2355}
2356
9a32144e 2357static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
2358 .owner = THIS_MODULE,
2359 .open = ipmr_mfc_open,
2360 .read = seq_read,
2361 .llseek = seq_lseek,
f6bb4514 2362 .release = seq_release_net,
1da177e4 2363};
e905a9ed 2364#endif
1da177e4
LT
2365
2366#ifdef CONFIG_IP_PIMSM_V2
32613090 2367static const struct net_protocol pim_protocol = {
1da177e4 2368 .handler = pim_rcv,
403dbb97 2369 .netns_ok = 1,
1da177e4
LT
2370};
2371#endif
2372
2373
2374/*
2375 * Setup for IP multicast routing
2376 */
cf958ae3
BT
2377static int __net_init ipmr_net_init(struct net *net)
2378{
f0ad0860 2379 int err;
cf958ae3 2380
f0ad0860
PM
2381 err = ipmr_rules_init(net);
2382 if (err < 0)
cf958ae3 2383 goto fail;
f6bb4514
BT
2384
2385#ifdef CONFIG_PROC_FS
2386 err = -ENOMEM;
2387 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2388 goto proc_vif_fail;
2389 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2390 goto proc_cache_fail;
2391#endif
2bb8b26c
BT
2392 return 0;
2393
f6bb4514
BT
2394#ifdef CONFIG_PROC_FS
2395proc_cache_fail:
2396 proc_net_remove(net, "ip_mr_vif");
2397proc_vif_fail:
f0ad0860 2398 ipmr_rules_exit(net);
f6bb4514 2399#endif
cf958ae3
BT
2400fail:
2401 return err;
2402}
2403
2404static void __net_exit ipmr_net_exit(struct net *net)
2405{
f6bb4514
BT
2406#ifdef CONFIG_PROC_FS
2407 proc_net_remove(net, "ip_mr_cache");
2408 proc_net_remove(net, "ip_mr_vif");
2409#endif
f0ad0860 2410 ipmr_rules_exit(net);
cf958ae3
BT
2411}
2412
2413static struct pernet_operations ipmr_net_ops = {
2414 .init = ipmr_net_init,
2415 .exit = ipmr_net_exit,
2416};
e905a9ed 2417
03d2f897 2418int __init ip_mr_init(void)
1da177e4 2419{
03d2f897
WC
2420 int err;
2421
1da177e4
LT
2422 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2423 sizeof(struct mfc_cache),
e5d679f3 2424 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2425 NULL);
03d2f897
WC
2426 if (!mrt_cachep)
2427 return -ENOMEM;
2428
cf958ae3
BT
2429 err = register_pernet_subsys(&ipmr_net_ops);
2430 if (err)
2431 goto reg_pernet_fail;
2432
03d2f897
WC
2433 err = register_netdevice_notifier(&ip_mr_notifier);
2434 if (err)
2435 goto reg_notif_fail;
403dbb97
TG
2436#ifdef CONFIG_IP_PIMSM_V2
2437 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2438 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2439 err = -EAGAIN;
2440 goto add_proto_fail;
2441 }
2442#endif
cb6a4e46 2443 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
03d2f897 2444 return 0;
f6bb4514 2445
403dbb97
TG
2446#ifdef CONFIG_IP_PIMSM_V2
2447add_proto_fail:
2448 unregister_netdevice_notifier(&ip_mr_notifier);
2449#endif
c3e38896 2450reg_notif_fail:
cf958ae3
BT
2451 unregister_pernet_subsys(&ipmr_net_ops);
2452reg_pernet_fail:
c3e38896 2453 kmem_cache_destroy(mrt_cachep);
03d2f897 2454 return err;
1da177e4 2455}