]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
ipv4: ipmr: move mroute data into seperate structure
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
1da177e4
LT
66
67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68#define CONFIG_IP_PIMSM 1
69#endif
70
0c12295a
PM
71struct mr_table {
72 struct sock *mroute_sk;
73 struct timer_list ipmr_expire_timer;
74 struct list_head mfc_unres_queue;
75 struct list_head mfc_cache_array[MFC_LINES];
76 struct vif_device vif_table[MAXVIFS];
77 int maxvif;
78 atomic_t cache_resolve_queue_len;
79 int mroute_do_assert;
80 int mroute_do_pim;
81#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
82 int mroute_reg_vif_num;
83#endif
84};
85
1da177e4
LT
86/* Big lock, protecting vif table, mrt cache and mroute socket state.
87 Note that the changes are semaphored via rtnl_lock.
88 */
89
90static DEFINE_RWLOCK(mrt_lock);
91
92/*
93 * Multicast router control variables
94 */
95
0c12295a 96#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
1da177e4 97
1da177e4
LT
98/* Special spinlock for queue of unresolved entries */
99static DEFINE_SPINLOCK(mfc_unres_lock);
100
101/* We return to original Alan's scheme. Hash table of resolved
102 entries is changed only in process context and protected
103 with weak lock mrt_lock. Queue of unresolved entries is protected
104 with strong spinlock mfc_unres_lock.
105
106 In this case data path is free of exclusive locks at all.
107 */
108
e18b890b 109static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 110
0c12295a
PM
111static int ip_mr_forward(struct net *net, struct mr_table *mrt,
112 struct sk_buff *skb, struct mfc_cache *cache,
113 int local);
114static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 115 struct sk_buff *pkt, vifi_t vifi, int assert);
0c12295a 116static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
d658f8a0 117 struct mfc_cache *c, struct rtmsg *rtm);
1da177e4 118
1da177e4
LT
119/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
120
d607032d
WC
121static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
122{
4feb88e5
BT
123 struct net *net = dev_net(dev);
124
d607032d
WC
125 dev_close(dev);
126
4feb88e5 127 dev = __dev_get_by_name(net, "tunl0");
d607032d 128 if (dev) {
5bc3eb7e 129 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 130 struct ifreq ifr;
d607032d
WC
131 struct ip_tunnel_parm p;
132
133 memset(&p, 0, sizeof(p));
134 p.iph.daddr = v->vifc_rmt_addr.s_addr;
135 p.iph.saddr = v->vifc_lcl_addr.s_addr;
136 p.iph.version = 4;
137 p.iph.ihl = 5;
138 p.iph.protocol = IPPROTO_IPIP;
139 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
140 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
141
5bc3eb7e
SH
142 if (ops->ndo_do_ioctl) {
143 mm_segment_t oldfs = get_fs();
144
145 set_fs(KERNEL_DS);
146 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
147 set_fs(oldfs);
148 }
d607032d
WC
149 }
150}
151
1da177e4 152static
4feb88e5 153struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
154{
155 struct net_device *dev;
156
4feb88e5 157 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
158
159 if (dev) {
5bc3eb7e 160 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
161 int err;
162 struct ifreq ifr;
1da177e4
LT
163 struct ip_tunnel_parm p;
164 struct in_device *in_dev;
165
166 memset(&p, 0, sizeof(p));
167 p.iph.daddr = v->vifc_rmt_addr.s_addr;
168 p.iph.saddr = v->vifc_lcl_addr.s_addr;
169 p.iph.version = 4;
170 p.iph.ihl = 5;
171 p.iph.protocol = IPPROTO_IPIP;
172 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 173 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 174
5bc3eb7e
SH
175 if (ops->ndo_do_ioctl) {
176 mm_segment_t oldfs = get_fs();
177
178 set_fs(KERNEL_DS);
179 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
180 set_fs(oldfs);
181 } else
182 err = -EOPNOTSUPP;
1da177e4
LT
183
184 dev = NULL;
185
4feb88e5
BT
186 if (err == 0 &&
187 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
188 dev->flags |= IFF_MULTICAST;
189
e5ed6399 190 in_dev = __in_dev_get_rtnl(dev);
71e27da9 191 if (in_dev == NULL)
1da177e4 192 goto failure;
71e27da9
HX
193
194 ipv4_devconf_setall(in_dev);
195 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
196
197 if (dev_open(dev))
198 goto failure;
7dc00c82 199 dev_hold(dev);
1da177e4
LT
200 }
201 }
202 return dev;
203
204failure:
205 /* allow the register to be completed before unregistering. */
206 rtnl_unlock();
207 rtnl_lock();
208
209 unregister_netdevice(dev);
210 return NULL;
211}
212
213#ifdef CONFIG_IP_PIMSM
214
6fef4c0c 215static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 216{
4feb88e5 217 struct net *net = dev_net(dev);
0c12295a 218 struct mr_table *mrt = net->ipv4.mrt;
4feb88e5 219
1da177e4 220 read_lock(&mrt_lock);
cf3677ae
PE
221 dev->stats.tx_bytes += skb->len;
222 dev->stats.tx_packets++;
0c12295a 223 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
224 read_unlock(&mrt_lock);
225 kfree_skb(skb);
6ed10654 226 return NETDEV_TX_OK;
1da177e4
LT
227}
228
007c3838
SH
229static const struct net_device_ops reg_vif_netdev_ops = {
230 .ndo_start_xmit = reg_vif_xmit,
231};
232
1da177e4
LT
233static void reg_vif_setup(struct net_device *dev)
234{
235 dev->type = ARPHRD_PIMREG;
46f25dff 236 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 237 dev->flags = IFF_NOARP;
007c3838 238 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 239 dev->destructor = free_netdev;
403dbb97 240 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
241}
242
403dbb97 243static struct net_device *ipmr_reg_vif(struct net *net)
1da177e4
LT
244{
245 struct net_device *dev;
246 struct in_device *in_dev;
247
cf3677ae 248 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
249
250 if (dev == NULL)
251 return NULL;
252
403dbb97
TG
253 dev_net_set(dev, net);
254
1da177e4
LT
255 if (register_netdevice(dev)) {
256 free_netdev(dev);
257 return NULL;
258 }
259 dev->iflink = 0;
260
71e27da9
HX
261 rcu_read_lock();
262 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
263 rcu_read_unlock();
1da177e4 264 goto failure;
71e27da9 265 }
1da177e4 266
71e27da9
HX
267 ipv4_devconf_setall(in_dev);
268 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
269 rcu_read_unlock();
1da177e4
LT
270
271 if (dev_open(dev))
272 goto failure;
273
7dc00c82
WC
274 dev_hold(dev);
275
1da177e4
LT
276 return dev;
277
278failure:
279 /* allow the register to be completed before unregistering. */
280 rtnl_unlock();
281 rtnl_lock();
282
283 unregister_netdevice(dev);
284 return NULL;
285}
286#endif
287
288/*
289 * Delete a VIF entry
7dc00c82 290 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 291 */
e905a9ed 292
0c12295a 293static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 294 struct list_head *head)
1da177e4
LT
295{
296 struct vif_device *v;
297 struct net_device *dev;
298 struct in_device *in_dev;
299
0c12295a 300 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
301 return -EADDRNOTAVAIL;
302
0c12295a 303 v = &mrt->vif_table[vifi];
1da177e4
LT
304
305 write_lock_bh(&mrt_lock);
306 dev = v->dev;
307 v->dev = NULL;
308
309 if (!dev) {
310 write_unlock_bh(&mrt_lock);
311 return -EADDRNOTAVAIL;
312 }
313
314#ifdef CONFIG_IP_PIMSM
0c12295a
PM
315 if (vifi == mrt->mroute_reg_vif_num)
316 mrt->mroute_reg_vif_num = -1;
1da177e4
LT
317#endif
318
0c12295a 319 if (vifi+1 == mrt->maxvif) {
1da177e4
LT
320 int tmp;
321 for (tmp=vifi-1; tmp>=0; tmp--) {
0c12295a 322 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
323 break;
324 }
0c12295a 325 mrt->maxvif = tmp+1;
1da177e4
LT
326 }
327
328 write_unlock_bh(&mrt_lock);
329
330 dev_set_allmulti(dev, -1);
331
e5ed6399 332 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 333 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
334 ip_rt_multicast_event(in_dev);
335 }
336
7dc00c82 337 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 338 unregister_netdevice_queue(dev, head);
1da177e4
LT
339
340 dev_put(dev);
341 return 0;
342}
343
5c0a66f5
BT
344static inline void ipmr_cache_free(struct mfc_cache *c)
345{
5c0a66f5
BT
346 kmem_cache_free(mrt_cachep, c);
347}
348
1da177e4
LT
349/* Destroy an unresolved cache entry, killing queued skbs
350 and reporting error to netlink readers.
351 */
352
0c12295a 353static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 354{
0c12295a 355 struct net *net = NULL; //mrt->net;
1da177e4 356 struct sk_buff *skb;
9ef1d4c7 357 struct nlmsgerr *e;
1da177e4 358
0c12295a 359 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 360
c354e124 361 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 362 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
363 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
364 nlh->nlmsg_type = NLMSG_ERROR;
365 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
366 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
367 e = NLMSG_DATA(nlh);
368 e->error = -ETIMEDOUT;
369 memset(&e->msg, 0, sizeof(e->msg));
2942e900 370
4feb88e5 371 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
372 } else
373 kfree_skb(skb);
374 }
375
5c0a66f5 376 ipmr_cache_free(c);
1da177e4
LT
377}
378
379
e258beb2 380/* Timer process for the unresolved queue. */
1da177e4 381
e258beb2 382static void ipmr_expire_process(unsigned long arg)
1da177e4 383{
0c12295a 384 struct mr_table *mrt = (struct mr_table *)arg;
1da177e4
LT
385 unsigned long now;
386 unsigned long expires;
862465f2 387 struct mfc_cache *c, *next;
1da177e4
LT
388
389 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 390 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
391 return;
392 }
393
0c12295a 394 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
395 goto out;
396
397 now = jiffies;
398 expires = 10*HZ;
1da177e4 399
0c12295a 400 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
401 if (time_after(c->mfc_un.unres.expires, now)) {
402 unsigned long interval = c->mfc_un.unres.expires - now;
403 if (interval < expires)
404 expires = interval;
1da177e4
LT
405 continue;
406 }
407
862465f2 408 list_del(&c->list);
0c12295a 409 ipmr_destroy_unres(mrt, c);
1da177e4
LT
410 }
411
0c12295a
PM
412 if (!list_empty(&mrt->mfc_unres_queue))
413 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
414
415out:
416 spin_unlock(&mfc_unres_lock);
417}
418
419/* Fill oifs list. It is called under write locked mrt_lock. */
420
0c12295a 421static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 422 unsigned char *ttls)
1da177e4
LT
423{
424 int vifi;
425
426 cache->mfc_un.res.minvif = MAXVIFS;
427 cache->mfc_un.res.maxvif = 0;
428 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
429
0c12295a
PM
430 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
431 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 432 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
433 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
434 if (cache->mfc_un.res.minvif > vifi)
435 cache->mfc_un.res.minvif = vifi;
436 if (cache->mfc_un.res.maxvif <= vifi)
437 cache->mfc_un.res.maxvif = vifi + 1;
438 }
439 }
440}
441
0c12295a
PM
442static int vif_add(struct net *net, struct mr_table *mrt,
443 struct vifctl *vifc, int mrtsock)
1da177e4
LT
444{
445 int vifi = vifc->vifc_vifi;
0c12295a 446 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
447 struct net_device *dev;
448 struct in_device *in_dev;
d607032d 449 int err;
1da177e4
LT
450
451 /* Is vif busy ? */
0c12295a 452 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
453 return -EADDRINUSE;
454
455 switch (vifc->vifc_flags) {
456#ifdef CONFIG_IP_PIMSM
457 case VIFF_REGISTER:
458 /*
459 * Special Purpose VIF in PIM
460 * All the packets will be sent to the daemon
461 */
0c12295a 462 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 463 return -EADDRINUSE;
403dbb97 464 dev = ipmr_reg_vif(net);
1da177e4
LT
465 if (!dev)
466 return -ENOBUFS;
d607032d
WC
467 err = dev_set_allmulti(dev, 1);
468 if (err) {
469 unregister_netdevice(dev);
7dc00c82 470 dev_put(dev);
d607032d
WC
471 return err;
472 }
1da177e4
LT
473 break;
474#endif
e905a9ed 475 case VIFF_TUNNEL:
4feb88e5 476 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
477 if (!dev)
478 return -ENOBUFS;
d607032d
WC
479 err = dev_set_allmulti(dev, 1);
480 if (err) {
481 ipmr_del_tunnel(dev, vifc);
7dc00c82 482 dev_put(dev);
d607032d
WC
483 return err;
484 }
1da177e4 485 break;
ee5e81f0
I
486
487 case VIFF_USE_IFINDEX:
1da177e4 488 case 0:
ee5e81f0
I
489 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
490 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
491 if (dev && dev->ip_ptr == NULL) {
492 dev_put(dev);
493 return -EADDRNOTAVAIL;
494 }
495 } else
496 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
497
1da177e4
LT
498 if (!dev)
499 return -EADDRNOTAVAIL;
d607032d 500 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
501 if (err) {
502 dev_put(dev);
d607032d 503 return err;
7dc00c82 504 }
1da177e4
LT
505 break;
506 default:
507 return -EINVAL;
508 }
509
d0490cfd
DC
510 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
511 dev_put(dev);
1da177e4 512 return -EADDRNOTAVAIL;
d0490cfd 513 }
42f811b8 514 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
515 ip_rt_multicast_event(in_dev);
516
517 /*
518 * Fill in the VIF structures
519 */
c354e124
JK
520 v->rate_limit = vifc->vifc_rate_limit;
521 v->local = vifc->vifc_lcl_addr.s_addr;
522 v->remote = vifc->vifc_rmt_addr.s_addr;
523 v->flags = vifc->vifc_flags;
1da177e4
LT
524 if (!mrtsock)
525 v->flags |= VIFF_STATIC;
c354e124 526 v->threshold = vifc->vifc_threshold;
1da177e4
LT
527 v->bytes_in = 0;
528 v->bytes_out = 0;
529 v->pkt_in = 0;
530 v->pkt_out = 0;
531 v->link = dev->ifindex;
532 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
533 v->link = dev->iflink;
534
535 /* And finish update writing critical data */
536 write_lock_bh(&mrt_lock);
c354e124 537 v->dev = dev;
1da177e4
LT
538#ifdef CONFIG_IP_PIMSM
539 if (v->flags&VIFF_REGISTER)
0c12295a 540 mrt->mroute_reg_vif_num = vifi;
1da177e4 541#endif
0c12295a
PM
542 if (vifi+1 > mrt->maxvif)
543 mrt->maxvif = vifi+1;
1da177e4
LT
544 write_unlock_bh(&mrt_lock);
545 return 0;
546}
547
0c12295a 548static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
549 __be32 origin,
550 __be32 mcastgrp)
1da177e4 551{
c354e124 552 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
553 struct mfc_cache *c;
554
0c12295a 555 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
862465f2
PM
556 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
557 return c;
1da177e4 558 }
862465f2 559 return NULL;
1da177e4
LT
560}
561
562/*
563 * Allocate a multicast cache entry
564 */
d658f8a0 565static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 566{
c354e124
JK
567 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
568 if (c == NULL)
1da177e4 569 return NULL;
1da177e4
LT
570 c->mfc_un.res.minvif = MAXVIFS;
571 return c;
572}
573
d658f8a0 574static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 575{
c354e124
JK
576 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
577 if (c == NULL)
1da177e4 578 return NULL;
1da177e4
LT
579 skb_queue_head_init(&c->mfc_un.unres.unresolved);
580 c->mfc_un.unres.expires = jiffies + 10*HZ;
581 return c;
582}
583
584/*
585 * A cache entry has gone into a resolved state from queued
586 */
e905a9ed 587
0c12295a
PM
588static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
589 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
590{
591 struct sk_buff *skb;
9ef1d4c7 592 struct nlmsgerr *e;
1da177e4
LT
593
594 /*
595 * Play the pending entries through our router
596 */
597
c354e124 598 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 599 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
600 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
601
0c12295a 602 if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
603 nlh->nlmsg_len = (skb_tail_pointer(skb) -
604 (u8 *)nlh);
1da177e4
LT
605 } else {
606 nlh->nlmsg_type = NLMSG_ERROR;
607 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
608 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
609 e = NLMSG_DATA(nlh);
610 e->error = -EMSGSIZE;
611 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 612 }
2942e900 613
d658f8a0 614 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 615 } else
0c12295a 616 ip_mr_forward(net, mrt, skb, c, 0);
1da177e4
LT
617 }
618}
619
620/*
621 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
622 * expects the following bizarre scheme.
623 *
624 * Called under mrt_lock.
625 */
e905a9ed 626
0c12295a 627static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 628 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
629{
630 struct sk_buff *skb;
c9bdd4b5 631 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
632 struct igmphdr *igmp;
633 struct igmpmsg *msg;
634 int ret;
635
636#ifdef CONFIG_IP_PIMSM
637 if (assert == IGMPMSG_WHOLEPKT)
638 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
639 else
640#endif
641 skb = alloc_skb(128, GFP_ATOMIC);
642
132adf54 643 if (!skb)
1da177e4
LT
644 return -ENOBUFS;
645
646#ifdef CONFIG_IP_PIMSM
647 if (assert == IGMPMSG_WHOLEPKT) {
648 /* Ugly, but we have no choice with this interface.
649 Duplicate old header, fix ihl, length etc.
650 And all this only to mangle msg->im_msgtype and
651 to set msg->im_mbz to "mbz" :-)
652 */
878c8145
ACM
653 skb_push(skb, sizeof(struct iphdr));
654 skb_reset_network_header(skb);
badff6d0 655 skb_reset_transport_header(skb);
0272ffc4 656 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 657 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
658 msg->im_msgtype = IGMPMSG_WHOLEPKT;
659 msg->im_mbz = 0;
0c12295a 660 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
661 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
662 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
663 sizeof(struct iphdr));
e905a9ed 664 } else
1da177e4 665#endif
e905a9ed
YH
666 {
667
1da177e4
LT
668 /*
669 * Copy the IP header
670 */
671
27a884dc 672 skb->network_header = skb->tail;
ddc7b8e3 673 skb_put(skb, ihl);
27d7ff46 674 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
675 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
676 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 677 msg->im_vif = vifi;
adf30907 678 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
679
680 /*
681 * Add our header
682 */
683
c354e124 684 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
685 igmp->type =
686 msg->im_msgtype = assert;
687 igmp->code = 0;
eddc9ec5 688 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 689 skb->transport_header = skb->network_header;
e905a9ed 690 }
1da177e4 691
0c12295a 692 if (mrt->mroute_sk == NULL) {
1da177e4
LT
693 kfree_skb(skb);
694 return -EINVAL;
695 }
696
697 /*
698 * Deliver to mrouted
699 */
0c12295a 700 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
70a269e6 701 if (ret < 0) {
1da177e4
LT
702 if (net_ratelimit())
703 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
704 kfree_skb(skb);
705 }
706
707 return ret;
708}
709
710/*
711 * Queue a packet for resolution. It gets locked cache entry!
712 */
e905a9ed 713
1da177e4 714static int
0c12295a 715ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1da177e4 716{
862465f2 717 bool found = false;
1da177e4
LT
718 int err;
719 struct mfc_cache *c;
eddc9ec5 720 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
721
722 spin_lock_bh(&mfc_unres_lock);
0c12295a 723 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 724 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
725 c->mfc_origin == iph->saddr) {
726 found = true;
1da177e4 727 break;
862465f2 728 }
1da177e4
LT
729 }
730
862465f2 731 if (!found) {
1da177e4
LT
732 /*
733 * Create a new entry if allowable
734 */
735
0c12295a 736 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 737 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
738 spin_unlock_bh(&mfc_unres_lock);
739
740 kfree_skb(skb);
741 return -ENOBUFS;
742 }
743
744 /*
745 * Fill in the new cache entry
746 */
eddc9ec5
ACM
747 c->mfc_parent = -1;
748 c->mfc_origin = iph->saddr;
749 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
750
751 /*
752 * Reflect first query at mrouted.
753 */
0c12295a 754 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 755 if (err < 0) {
e905a9ed 756 /* If the report failed throw the cache entry
1da177e4
LT
757 out - Brad Parker
758 */
759 spin_unlock_bh(&mfc_unres_lock);
760
5c0a66f5 761 ipmr_cache_free(c);
1da177e4
LT
762 kfree_skb(skb);
763 return err;
764 }
765
0c12295a
PM
766 atomic_inc(&mrt->cache_resolve_queue_len);
767 list_add(&c->list, &mrt->mfc_unres_queue);
1da177e4 768
0c12295a 769 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
770 }
771
772 /*
773 * See if we can append the packet
774 */
775 if (c->mfc_un.unres.unresolved.qlen>3) {
776 kfree_skb(skb);
777 err = -ENOBUFS;
778 } else {
c354e124 779 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
780 err = 0;
781 }
782
783 spin_unlock_bh(&mfc_unres_lock);
784 return err;
785}
786
787/*
788 * MFC cache manipulation by user space mroute daemon
789 */
790
0c12295a 791static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1da177e4
LT
792{
793 int line;
862465f2 794 struct mfc_cache *c, *next;
1da177e4 795
c354e124 796 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 797
0c12295a 798 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1da177e4
LT
799 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
800 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
801 write_lock_bh(&mrt_lock);
862465f2 802 list_del(&c->list);
1da177e4
LT
803 write_unlock_bh(&mrt_lock);
804
5c0a66f5 805 ipmr_cache_free(c);
1da177e4
LT
806 return 0;
807 }
808 }
809 return -ENOENT;
810}
811
0c12295a
PM
812static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
813 struct mfcctl *mfc, int mrtsock)
1da177e4 814{
862465f2 815 bool found = false;
1da177e4 816 int line;
862465f2 817 struct mfc_cache *uc, *c;
1da177e4 818
a50436f2
PM
819 if (mfc->mfcc_parent >= MAXVIFS)
820 return -ENFILE;
821
c354e124 822 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 823
0c12295a 824 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1da177e4 825 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
826 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
827 found = true;
1da177e4 828 break;
862465f2 829 }
1da177e4
LT
830 }
831
862465f2 832 if (found) {
1da177e4
LT
833 write_lock_bh(&mrt_lock);
834 c->mfc_parent = mfc->mfcc_parent;
0c12295a 835 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
836 if (!mrtsock)
837 c->mfc_flags |= MFC_STATIC;
838 write_unlock_bh(&mrt_lock);
839 return 0;
840 }
841
f97c1e0c 842 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
843 return -EINVAL;
844
d658f8a0 845 c = ipmr_cache_alloc();
c354e124 846 if (c == NULL)
1da177e4
LT
847 return -ENOMEM;
848
c354e124
JK
849 c->mfc_origin = mfc->mfcc_origin.s_addr;
850 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
851 c->mfc_parent = mfc->mfcc_parent;
0c12295a 852 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
853 if (!mrtsock)
854 c->mfc_flags |= MFC_STATIC;
855
856 write_lock_bh(&mrt_lock);
0c12295a 857 list_add(&c->list, &mrt->mfc_cache_array[line]);
1da177e4
LT
858 write_unlock_bh(&mrt_lock);
859
860 /*
861 * Check to see if we resolved a queued list. If so we
862 * need to send on the frames and tidy up.
863 */
864 spin_lock_bh(&mfc_unres_lock);
0c12295a 865 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 866 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 867 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 868 list_del(&uc->list);
0c12295a 869 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4
LT
870 break;
871 }
872 }
0c12295a
PM
873 if (list_empty(&mrt->mfc_unres_queue))
874 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
875 spin_unlock_bh(&mfc_unres_lock);
876
877 if (uc) {
0c12295a 878 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 879 ipmr_cache_free(uc);
1da177e4
LT
880 }
881 return 0;
882}
883
884/*
885 * Close the multicast socket, and clear the vif tables etc
886 */
e905a9ed 887
0c12295a 888static void mroute_clean_tables(struct mr_table *mrt)
1da177e4
LT
889{
890 int i;
d17fa6fa 891 LIST_HEAD(list);
862465f2 892 struct mfc_cache *c, *next;
e905a9ed 893
1da177e4
LT
894 /*
895 * Shut down all active vif entries
896 */
0c12295a
PM
897 for (i = 0; i < mrt->maxvif; i++) {
898 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
899 vif_delete(mrt, i, 0, &list);
1da177e4 900 }
d17fa6fa 901 unregister_netdevice_many(&list);
1da177e4
LT
902
903 /*
904 * Wipe the cache
905 */
862465f2 906 for (i = 0; i < MFC_LINES; i++) {
0c12295a 907 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
862465f2 908 if (c->mfc_flags&MFC_STATIC)
1da177e4 909 continue;
1da177e4 910 write_lock_bh(&mrt_lock);
862465f2 911 list_del(&c->list);
1da177e4
LT
912 write_unlock_bh(&mrt_lock);
913
5c0a66f5 914 ipmr_cache_free(c);
1da177e4
LT
915 }
916 }
917
0c12295a 918 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 919 spin_lock_bh(&mfc_unres_lock);
0c12295a 920 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
862465f2 921 list_del(&c->list);
0c12295a 922 ipmr_destroy_unres(mrt, c);
1da177e4
LT
923 }
924 spin_unlock_bh(&mfc_unres_lock);
925 }
926}
927
928static void mrtsock_destruct(struct sock *sk)
929{
4feb88e5 930 struct net *net = sock_net(sk);
0c12295a 931 struct mr_table *mrt = net->ipv4.mrt;
4feb88e5 932
1da177e4 933 rtnl_lock();
0c12295a 934 if (sk == mrt->mroute_sk) {
4feb88e5 935 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4
LT
936
937 write_lock_bh(&mrt_lock);
0c12295a 938 mrt->mroute_sk = NULL;
1da177e4
LT
939 write_unlock_bh(&mrt_lock);
940
0c12295a 941 mroute_clean_tables(mrt);
1da177e4
LT
942 }
943 rtnl_unlock();
944}
945
946/*
947 * Socket options and virtual interface manipulation. The whole
948 * virtual interface system is a complete heap, but unfortunately
949 * that's how BSD mrouted happens to think. Maybe one day with a proper
950 * MOSPF/PIM router set up we can clean this up.
951 */
e905a9ed 952
b7058842 953int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
954{
955 int ret;
956 struct vifctl vif;
957 struct mfcctl mfc;
4feb88e5 958 struct net *net = sock_net(sk);
0c12295a 959 struct mr_table *mrt = net->ipv4.mrt;
e905a9ed 960
132adf54 961 if (optname != MRT_INIT) {
0c12295a 962 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
963 return -EACCES;
964 }
965
132adf54
SH
966 switch (optname) {
967 case MRT_INIT:
968 if (sk->sk_type != SOCK_RAW ||
c720c7e8 969 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 970 return -EOPNOTSUPP;
c354e124 971 if (optlen != sizeof(int))
132adf54 972 return -ENOPROTOOPT;
1da177e4 973
132adf54 974 rtnl_lock();
0c12295a 975 if (mrt->mroute_sk) {
1da177e4 976 rtnl_unlock();
132adf54
SH
977 return -EADDRINUSE;
978 }
979
980 ret = ip_ra_control(sk, 1, mrtsock_destruct);
981 if (ret == 0) {
982 write_lock_bh(&mrt_lock);
0c12295a 983 mrt->mroute_sk = sk;
132adf54
SH
984 write_unlock_bh(&mrt_lock);
985
4feb88e5 986 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
987 }
988 rtnl_unlock();
989 return ret;
990 case MRT_DONE:
0c12295a 991 if (sk != mrt->mroute_sk)
132adf54
SH
992 return -EACCES;
993 return ip_ra_control(sk, 0, NULL);
994 case MRT_ADD_VIF:
995 case MRT_DEL_VIF:
c354e124 996 if (optlen != sizeof(vif))
132adf54 997 return -EINVAL;
c354e124 998 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
999 return -EFAULT;
1000 if (vif.vifc_vifi >= MAXVIFS)
1001 return -ENFILE;
1002 rtnl_lock();
c354e124 1003 if (optname == MRT_ADD_VIF) {
0c12295a 1004 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
132adf54 1005 } else {
0c12295a 1006 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54
SH
1007 }
1008 rtnl_unlock();
1009 return ret;
1da177e4
LT
1010
1011 /*
1012 * Manipulate the forwarding caches. These live
1013 * in a sort of kernel/user symbiosis.
1014 */
132adf54
SH
1015 case MRT_ADD_MFC:
1016 case MRT_DEL_MFC:
c354e124 1017 if (optlen != sizeof(mfc))
132adf54 1018 return -EINVAL;
c354e124 1019 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1020 return -EFAULT;
1021 rtnl_lock();
c354e124 1022 if (optname == MRT_DEL_MFC)
0c12295a 1023 ret = ipmr_mfc_delete(mrt, &mfc);
132adf54 1024 else
0c12295a 1025 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
132adf54
SH
1026 rtnl_unlock();
1027 return ret;
1da177e4
LT
1028 /*
1029 * Control PIM assert.
1030 */
132adf54
SH
1031 case MRT_ASSERT:
1032 {
1033 int v;
1034 if (get_user(v,(int __user *)optval))
1035 return -EFAULT;
0c12295a 1036 mrt->mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1037 return 0;
1038 }
1da177e4 1039#ifdef CONFIG_IP_PIMSM
132adf54
SH
1040 case MRT_PIM:
1041 {
ba93ef74
SH
1042 int v;
1043
132adf54
SH
1044 if (get_user(v,(int __user *)optval))
1045 return -EFAULT;
ba93ef74
SH
1046 v = (v) ? 1 : 0;
1047
132adf54
SH
1048 rtnl_lock();
1049 ret = 0;
0c12295a
PM
1050 if (v != mrt->mroute_do_pim) {
1051 mrt->mroute_do_pim = v;
1052 mrt->mroute_do_assert = v;
1da177e4 1053 }
132adf54
SH
1054 rtnl_unlock();
1055 return ret;
1056 }
1da177e4 1057#endif
132adf54
SH
1058 /*
1059 * Spurious command, or MRT_VERSION which you cannot
1060 * set.
1061 */
1062 default:
1063 return -ENOPROTOOPT;
1da177e4
LT
1064 }
1065}
1066
1067/*
1068 * Getsock opt support for the multicast routing system.
1069 */
e905a9ed 1070
c354e124 1071int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1072{
1073 int olr;
1074 int val;
4feb88e5 1075 struct net *net = sock_net(sk);
0c12295a 1076 struct mr_table *mrt = net->ipv4.mrt;
1da177e4 1077
c354e124 1078 if (optname != MRT_VERSION &&
1da177e4
LT
1079#ifdef CONFIG_IP_PIMSM
1080 optname!=MRT_PIM &&
1081#endif
1082 optname!=MRT_ASSERT)
1083 return -ENOPROTOOPT;
1084
1085 if (get_user(olr, optlen))
1086 return -EFAULT;
1087
1088 olr = min_t(unsigned int, olr, sizeof(int));
1089 if (olr < 0)
1090 return -EINVAL;
e905a9ed 1091
c354e124 1092 if (put_user(olr, optlen))
1da177e4 1093 return -EFAULT;
c354e124
JK
1094 if (optname == MRT_VERSION)
1095 val = 0x0305;
1da177e4 1096#ifdef CONFIG_IP_PIMSM
c354e124 1097 else if (optname == MRT_PIM)
0c12295a 1098 val = mrt->mroute_do_pim;
1da177e4
LT
1099#endif
1100 else
0c12295a 1101 val = mrt->mroute_do_assert;
c354e124 1102 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1103 return -EFAULT;
1104 return 0;
1105}
1106
1107/*
1108 * The IP multicast ioctl support routines.
1109 */
e905a9ed 1110
1da177e4
LT
1111int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1112{
1113 struct sioc_sg_req sr;
1114 struct sioc_vif_req vr;
1115 struct vif_device *vif;
1116 struct mfc_cache *c;
4feb88e5 1117 struct net *net = sock_net(sk);
0c12295a 1118 struct mr_table *mrt = net->ipv4.mrt;
e905a9ed 1119
132adf54
SH
1120 switch (cmd) {
1121 case SIOCGETVIFCNT:
c354e124 1122 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1123 return -EFAULT;
0c12295a 1124 if (vr.vifi >= mrt->maxvif)
132adf54
SH
1125 return -EINVAL;
1126 read_lock(&mrt_lock);
0c12295a
PM
1127 vif = &mrt->vif_table[vr.vifi];
1128 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1129 vr.icount = vif->pkt_in;
1130 vr.ocount = vif->pkt_out;
1131 vr.ibytes = vif->bytes_in;
1132 vr.obytes = vif->bytes_out;
1da177e4 1133 read_unlock(&mrt_lock);
1da177e4 1134
c354e124 1135 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1136 return -EFAULT;
1137 return 0;
1138 }
1139 read_unlock(&mrt_lock);
1140 return -EADDRNOTAVAIL;
1141 case SIOCGETSGCNT:
c354e124 1142 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1143 return -EFAULT;
1144
1145 read_lock(&mrt_lock);
0c12295a 1146 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1147 if (c) {
1148 sr.pktcnt = c->mfc_un.res.pkt;
1149 sr.bytecnt = c->mfc_un.res.bytes;
1150 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1151 read_unlock(&mrt_lock);
132adf54 1152
c354e124 1153 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1154 return -EFAULT;
1155 return 0;
1156 }
1157 read_unlock(&mrt_lock);
1158 return -EADDRNOTAVAIL;
1159 default:
1160 return -ENOIOCTLCMD;
1da177e4
LT
1161 }
1162}
1163
1164
1165static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1166{
e9dc8653 1167 struct net_device *dev = ptr;
4feb88e5 1168 struct net *net = dev_net(dev);
0c12295a 1169 struct mr_table *mrt = net->ipv4.mrt;
1da177e4
LT
1170 struct vif_device *v;
1171 int ct;
d17fa6fa 1172 LIST_HEAD(list);
e9dc8653 1173
1da177e4
LT
1174 if (event != NETDEV_UNREGISTER)
1175 return NOTIFY_DONE;
0c12295a
PM
1176 v = &mrt->vif_table[0];
1177 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
c354e124 1178 if (v->dev == dev)
0c12295a 1179 vif_delete(mrt, ct, 1, &list);
1da177e4 1180 }
d17fa6fa 1181 unregister_netdevice_many(&list);
1da177e4
LT
1182 return NOTIFY_DONE;
1183}
1184
1185
c354e124 1186static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1187 .notifier_call = ipmr_device_event,
1188};
1189
1190/*
1191 * Encapsulate a packet by attaching a valid IPIP header to it.
1192 * This avoids tunnel drivers and other mess and gives us the speed so
1193 * important for multicast video.
1194 */
e905a9ed 1195
114c7844 1196static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1197{
8856dfa3 1198 struct iphdr *iph;
eddc9ec5 1199 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1200
1201 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1202 skb->transport_header = skb->network_header;
8856dfa3 1203 skb_reset_network_header(skb);
eddc9ec5 1204 iph = ip_hdr(skb);
1da177e4
LT
1205
1206 iph->version = 4;
e023dd64
ACM
1207 iph->tos = old_iph->tos;
1208 iph->ttl = old_iph->ttl;
1da177e4
LT
1209 iph->frag_off = 0;
1210 iph->daddr = daddr;
1211 iph->saddr = saddr;
1212 iph->protocol = IPPROTO_IPIP;
1213 iph->ihl = 5;
1214 iph->tot_len = htons(skb->len);
adf30907 1215 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1216 ip_send_check(iph);
1217
1da177e4
LT
1218 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1219 nf_reset(skb);
1220}
1221
1222static inline int ipmr_forward_finish(struct sk_buff *skb)
1223{
1224 struct ip_options * opt = &(IPCB(skb)->opt);
1225
adf30907 1226 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1227
1228 if (unlikely(opt->optlen))
1229 ip_forward_options(skb);
1230
1231 return dst_output(skb);
1232}
1233
1234/*
1235 * Processing handlers for ipmr_forward
1236 */
1237
0c12295a
PM
1238static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1239 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1da177e4 1240{
eddc9ec5 1241 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1242 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1243 struct net_device *dev;
1244 struct rtable *rt;
1245 int encap = 0;
1246
1247 if (vif->dev == NULL)
1248 goto out_free;
1249
1250#ifdef CONFIG_IP_PIMSM
1251 if (vif->flags & VIFF_REGISTER) {
1252 vif->pkt_out++;
c354e124 1253 vif->bytes_out += skb->len;
cf3677ae
PE
1254 vif->dev->stats.tx_bytes += skb->len;
1255 vif->dev->stats.tx_packets++;
0c12295a 1256 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1257 goto out_free;
1da177e4
LT
1258 }
1259#endif
1260
1261 if (vif->flags&VIFF_TUNNEL) {
1262 struct flowi fl = { .oif = vif->link,
1263 .nl_u = { .ip4_u =
1264 { .daddr = vif->remote,
1265 .saddr = vif->local,
1266 .tos = RT_TOS(iph->tos) } },
1267 .proto = IPPROTO_IPIP };
4feb88e5 1268 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1269 goto out_free;
1270 encap = sizeof(struct iphdr);
1271 } else {
1272 struct flowi fl = { .oif = vif->link,
1273 .nl_u = { .ip4_u =
1274 { .daddr = iph->daddr,
1275 .tos = RT_TOS(iph->tos) } },
1276 .proto = IPPROTO_IPIP };
4feb88e5 1277 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1278 goto out_free;
1279 }
1280
1281 dev = rt->u.dst.dev;
1282
1283 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1284 /* Do not fragment multicasts. Alas, IPv4 does not
1285 allow to send ICMP, so that packets will disappear
1286 to blackhole.
1287 */
1288
7c73a6fa 1289 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1290 ip_rt_put(rt);
1291 goto out_free;
1292 }
1293
1294 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1295
1296 if (skb_cow(skb, encap)) {
e905a9ed 1297 ip_rt_put(rt);
1da177e4
LT
1298 goto out_free;
1299 }
1300
1301 vif->pkt_out++;
c354e124 1302 vif->bytes_out += skb->len;
1da177e4 1303
adf30907
ED
1304 skb_dst_drop(skb);
1305 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1306 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1307
1308 /* FIXME: forward and output firewalls used to be called here.
1309 * What do we do with netfilter? -- RR */
1310 if (vif->flags & VIFF_TUNNEL) {
1311 ip_encap(skb, vif->local, vif->remote);
1312 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1313 vif->dev->stats.tx_packets++;
1314 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1315 }
1316
1317 IPCB(skb)->flags |= IPSKB_FORWARDED;
1318
1319 /*
1320 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1321 * not only before forwarding, but after forwarding on all output
1322 * interfaces. It is clear, if mrouter runs a multicasting
1323 * program, it should receive packets not depending to what interface
1324 * program is joined.
1325 * If we will not make it, the program will have to join on all
1326 * interfaces. On the other hand, multihoming host (or router, but
1327 * not mrouter) cannot join to more than one interface - it will
1328 * result in receiving multiple packets.
1329 */
6e23ae2a 1330 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1331 ipmr_forward_finish);
1332 return;
1333
1334out_free:
1335 kfree_skb(skb);
1336 return;
1337}
1338
0c12295a 1339static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1340{
1341 int ct;
0c12295a
PM
1342
1343 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1344 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
1345 break;
1346 }
1347 return ct;
1348}
1349
1350/* "local" means that we should preserve one skb (for local delivery) */
1351
0c12295a
PM
1352static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1353 struct sk_buff *skb, struct mfc_cache *cache,
1354 int local)
1da177e4
LT
1355{
1356 int psend = -1;
1357 int vif, ct;
1358
1359 vif = cache->mfc_parent;
1360 cache->mfc_un.res.pkt++;
1361 cache->mfc_un.res.bytes += skb->len;
1362
1363 /*
1364 * Wrong interface: drop packet and (maybe) send PIM assert.
1365 */
0c12295a 1366 if (mrt->vif_table[vif].dev != skb->dev) {
1da177e4
LT
1367 int true_vifi;
1368
511c3f92 1369 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1370 /* It is our own packet, looped back.
1371 Very complicated situation...
1372
1373 The best workaround until routing daemons will be
1374 fixed is not to redistribute packet, if it was
1375 send through wrong interface. It means, that
1376 multicast applications WILL NOT work for
1377 (S,G), which have default multicast route pointing
1378 to wrong oif. In any case, it is not a good
1379 idea to use multicasting applications on router.
1380 */
1381 goto dont_forward;
1382 }
1383
1384 cache->mfc_un.res.wrong_if++;
0c12295a 1385 true_vifi = ipmr_find_vif(mrt, skb->dev);
1da177e4 1386
0c12295a 1387 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4
LT
1388 /* pimsm uses asserts, when switching from RPT to SPT,
1389 so that we cannot check that packet arrived on an oif.
1390 It is bad, but otherwise we would need to move pretty
1391 large chunk of pimd to kernel. Ough... --ANK
1392 */
0c12295a 1393 (mrt->mroute_do_pim ||
6f9374a9 1394 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1395 time_after(jiffies,
1da177e4
LT
1396 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1397 cache->mfc_un.res.last_assert = jiffies;
0c12295a 1398 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1399 }
1400 goto dont_forward;
1401 }
1402
0c12295a
PM
1403 mrt->vif_table[vif].pkt_in++;
1404 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1405
1406 /*
1407 * Forward the frame
1408 */
1409 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1410 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1411 if (psend != -1) {
1412 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1413 if (skb2)
0c12295a
PM
1414 ipmr_queue_xmit(net, mrt, skb2, cache,
1415 psend);
1da177e4 1416 }
c354e124 1417 psend = ct;
1da177e4
LT
1418 }
1419 }
1420 if (psend != -1) {
1421 if (local) {
1422 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1423 if (skb2)
0c12295a 1424 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1da177e4 1425 } else {
0c12295a 1426 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1da177e4
LT
1427 return 0;
1428 }
1429 }
1430
1431dont_forward:
1432 if (!local)
1433 kfree_skb(skb);
1434 return 0;
1435}
1436
1437
1438/*
1439 * Multicast packets for forwarding arrive here
1440 */
1441
1442int ip_mr_input(struct sk_buff *skb)
1443{
1444 struct mfc_cache *cache;
4feb88e5 1445 struct net *net = dev_net(skb->dev);
0c12295a 1446 struct mr_table *mrt = net->ipv4.mrt;
511c3f92 1447 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1da177e4
LT
1448
1449 /* Packet is looped back after forward, it should not be
1450 forwarded second time, but still can be delivered locally.
1451 */
1452 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1453 goto dont_forward;
1454
1455 if (!local) {
1456 if (IPCB(skb)->opt.router_alert) {
1457 if (ip_call_ra_chain(skb))
1458 return 0;
eddc9ec5 1459 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1460 /* IGMPv1 (and broken IGMPv2 implementations sort of
1461 Cisco IOS <= 11.2(8)) do not put router alert
1462 option to IGMP packets destined to routable
1463 groups. It is very bad, because it means
1464 that we can forward NO IGMP messages.
1465 */
1466 read_lock(&mrt_lock);
0c12295a 1467 if (mrt->mroute_sk) {
2715bcf9 1468 nf_reset(skb);
0c12295a 1469 raw_rcv(mrt->mroute_sk, skb);
1da177e4
LT
1470 read_unlock(&mrt_lock);
1471 return 0;
1472 }
1473 read_unlock(&mrt_lock);
1474 }
1475 }
1476
1477 read_lock(&mrt_lock);
0c12295a 1478 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1479
1480 /*
1481 * No usable cache entry
1482 */
c354e124 1483 if (cache == NULL) {
1da177e4
LT
1484 int vif;
1485
1486 if (local) {
1487 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1488 ip_local_deliver(skb);
1489 if (skb2 == NULL) {
1490 read_unlock(&mrt_lock);
1491 return -ENOBUFS;
1492 }
1493 skb = skb2;
1494 }
1495
0c12295a 1496 vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 1497 if (vif >= 0) {
0c12295a 1498 int err = ipmr_cache_unresolved(mrt, vif, skb);
1da177e4
LT
1499 read_unlock(&mrt_lock);
1500
1501 return err;
1502 }
1503 read_unlock(&mrt_lock);
1504 kfree_skb(skb);
1505 return -ENODEV;
1506 }
1507
0c12295a 1508 ip_mr_forward(net, mrt, skb, cache, local);
1da177e4
LT
1509
1510 read_unlock(&mrt_lock);
1511
1512 if (local)
1513 return ip_local_deliver(skb);
1514
1515 return 0;
1516
1517dont_forward:
1518 if (local)
1519 return ip_local_deliver(skb);
1520 kfree_skb(skb);
1521 return 0;
1522}
1523
b1879204
IJ
1524#ifdef CONFIG_IP_PIMSM
1525static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1526{
b1879204
IJ
1527 struct net_device *reg_dev = NULL;
1528 struct iphdr *encap;
4feb88e5 1529 struct net *net = dev_net(skb->dev);
0c12295a 1530 struct mr_table *mrt = net->ipv4.mrt;
1da177e4 1531
b1879204 1532 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1533 /*
1534 Check that:
1535 a. packet is really destinted to a multicast group
1536 b. packet is not a NULL-REGISTER
1537 c. packet is not truncated
1538 */
f97c1e0c 1539 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1540 encap->tot_len == 0 ||
b1879204
IJ
1541 ntohs(encap->tot_len) + pimlen > skb->len)
1542 return 1;
1da177e4
LT
1543
1544 read_lock(&mrt_lock);
0c12295a
PM
1545 if (mrt->mroute_reg_vif_num >= 0)
1546 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1da177e4
LT
1547 if (reg_dev)
1548 dev_hold(reg_dev);
1549 read_unlock(&mrt_lock);
1550
e905a9ed 1551 if (reg_dev == NULL)
b1879204 1552 return 1;
1da177e4 1553
b0e380b1 1554 skb->mac_header = skb->network_header;
1da177e4 1555 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1556 skb_reset_network_header(skb);
1da177e4 1557 skb->dev = reg_dev;
1da177e4
LT
1558 skb->protocol = htons(ETH_P_IP);
1559 skb->ip_summed = 0;
1560 skb->pkt_type = PACKET_HOST;
adf30907 1561 skb_dst_drop(skb);
cf3677ae
PE
1562 reg_dev->stats.rx_bytes += skb->len;
1563 reg_dev->stats.rx_packets++;
1da177e4
LT
1564 nf_reset(skb);
1565 netif_rx(skb);
1566 dev_put(reg_dev);
b1879204 1567
1da177e4 1568 return 0;
b1879204
IJ
1569}
1570#endif
1571
1572#ifdef CONFIG_IP_PIMSM_V1
1573/*
1574 * Handle IGMP messages of PIMv1
1575 */
1576
1577int pim_rcv_v1(struct sk_buff * skb)
1578{
1579 struct igmphdr *pim;
4feb88e5 1580 struct net *net = dev_net(skb->dev);
0c12295a 1581 struct mr_table *mrt = net->ipv4.mrt;
b1879204
IJ
1582
1583 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1584 goto drop;
1585
1586 pim = igmp_hdr(skb);
1587
0c12295a 1588 if (!mrt->mroute_do_pim ||
b1879204
IJ
1589 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1590 goto drop;
1591
1592 if (__pim_rcv(skb, sizeof(*pim))) {
1593drop:
1594 kfree_skb(skb);
1595 }
1da177e4
LT
1596 return 0;
1597}
1598#endif
1599
1600#ifdef CONFIG_IP_PIMSM_V2
1601static int pim_rcv(struct sk_buff * skb)
1602{
1603 struct pimreghdr *pim;
1da177e4 1604
b1879204 1605 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1606 goto drop;
1607
9c70220b 1608 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1609 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1610 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1611 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1612 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1613 goto drop;
1614
b1879204
IJ
1615 if (__pim_rcv(skb, sizeof(*pim))) {
1616drop:
1617 kfree_skb(skb);
1618 }
1da177e4
LT
1619 return 0;
1620}
1621#endif
1622
1623static int
0c12295a 1624ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
d658f8a0 1625 struct rtmsg *rtm)
1da177e4
LT
1626{
1627 int ct;
1628 struct rtnexthop *nhp;
27a884dc 1629 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1630 struct rtattr *mp_head;
1631
7438189b
ND
1632 /* If cache is unresolved, don't try to parse IIF and OIF */
1633 if (c->mfc_parent > MAXVIFS)
1634 return -ENOENT;
1635
0c12295a
PM
1636 if (VIF_EXISTS(mrt, c->mfc_parent))
1637 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1638
c354e124 1639 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1640
1641 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 1642 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1643 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1644 goto rtattr_failure;
c354e124 1645 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1646 nhp->rtnh_flags = 0;
1647 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 1648 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
1649 nhp->rtnh_len = sizeof(*nhp);
1650 }
1651 }
1652 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1653 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1654 rtm->rtm_type = RTN_MULTICAST;
1655 return 1;
1656
1657rtattr_failure:
dc5fc579 1658 nlmsg_trim(skb, b);
1da177e4
LT
1659 return -EMSGSIZE;
1660}
1661
4feb88e5
BT
1662int ipmr_get_route(struct net *net,
1663 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1664{
1665 int err;
0c12295a 1666 struct mr_table *mrt = net->ipv4.mrt;
1da177e4 1667 struct mfc_cache *cache;
511c3f92 1668 struct rtable *rt = skb_rtable(skb);
1da177e4
LT
1669
1670 read_lock(&mrt_lock);
0c12295a 1671 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1da177e4 1672
c354e124 1673 if (cache == NULL) {
72287490 1674 struct sk_buff *skb2;
eddc9ec5 1675 struct iphdr *iph;
1da177e4
LT
1676 struct net_device *dev;
1677 int vif;
1678
1679 if (nowait) {
1680 read_unlock(&mrt_lock);
1681 return -EAGAIN;
1682 }
1683
1684 dev = skb->dev;
0c12295a 1685 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1da177e4
LT
1686 read_unlock(&mrt_lock);
1687 return -ENODEV;
1688 }
72287490
AK
1689 skb2 = skb_clone(skb, GFP_ATOMIC);
1690 if (!skb2) {
1691 read_unlock(&mrt_lock);
1692 return -ENOMEM;
1693 }
1694
e2d1bca7
ACM
1695 skb_push(skb2, sizeof(struct iphdr));
1696 skb_reset_network_header(skb2);
eddc9ec5
ACM
1697 iph = ip_hdr(skb2);
1698 iph->ihl = sizeof(struct iphdr) >> 2;
1699 iph->saddr = rt->rt_src;
1700 iph->daddr = rt->rt_dst;
1701 iph->version = 0;
0c12295a 1702 err = ipmr_cache_unresolved(mrt, vif, skb2);
1da177e4
LT
1703 read_unlock(&mrt_lock);
1704 return err;
1705 }
1706
1707 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1708 cache->mfc_flags |= MFC_NOTIFY;
0c12295a 1709 err = ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4
LT
1710 read_unlock(&mrt_lock);
1711 return err;
1712}
1713
e905a9ed 1714#ifdef CONFIG_PROC_FS
1da177e4
LT
1715/*
1716 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1717 */
1718struct ipmr_vif_iter {
f6bb4514 1719 struct seq_net_private p;
1da177e4
LT
1720 int ct;
1721};
1722
f6bb4514
BT
1723static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1724 struct ipmr_vif_iter *iter,
1da177e4
LT
1725 loff_t pos)
1726{
0c12295a
PM
1727 struct mr_table *mrt = net->ipv4.mrt;
1728
1729 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
1730 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 1731 continue;
e905a9ed 1732 if (pos-- == 0)
0c12295a 1733 return &mrt->vif_table[iter->ct];
1da177e4
LT
1734 }
1735 return NULL;
1736}
1737
1738static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1739 __acquires(mrt_lock)
1da177e4 1740{
f6bb4514
BT
1741 struct net *net = seq_file_net(seq);
1742
1da177e4 1743 read_lock(&mrt_lock);
f6bb4514 1744 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1745 : SEQ_START_TOKEN;
1746}
1747
1748static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1749{
1750 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 1751 struct net *net = seq_file_net(seq);
0c12295a 1752 struct mr_table *mrt = net->ipv4.mrt;
1da177e4
LT
1753
1754 ++*pos;
1755 if (v == SEQ_START_TOKEN)
f6bb4514 1756 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 1757
0c12295a
PM
1758 while (++iter->ct < mrt->maxvif) {
1759 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 1760 continue;
0c12295a 1761 return &mrt->vif_table[iter->ct];
1da177e4
LT
1762 }
1763 return NULL;
1764}
1765
1766static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1767 __releases(mrt_lock)
1da177e4
LT
1768{
1769 read_unlock(&mrt_lock);
1770}
1771
1772static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1773{
f6bb4514 1774 struct net *net = seq_file_net(seq);
0c12295a 1775 struct mr_table *mrt = net->ipv4.mrt;
f6bb4514 1776
1da177e4 1777 if (v == SEQ_START_TOKEN) {
e905a9ed 1778 seq_puts(seq,
1da177e4
LT
1779 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1780 } else {
1781 const struct vif_device *vif = v;
1782 const char *name = vif->dev ? vif->dev->name : "none";
1783
1784 seq_printf(seq,
1785 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 1786 vif - mrt->vif_table,
e905a9ed 1787 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1788 vif->bytes_out, vif->pkt_out,
1789 vif->flags, vif->local, vif->remote);
1790 }
1791 return 0;
1792}
1793
f690808e 1794static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1795 .start = ipmr_vif_seq_start,
1796 .next = ipmr_vif_seq_next,
1797 .stop = ipmr_vif_seq_stop,
1798 .show = ipmr_vif_seq_show,
1799};
1800
1801static int ipmr_vif_open(struct inode *inode, struct file *file)
1802{
f6bb4514
BT
1803 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1804 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1805}
1806
9a32144e 1807static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1808 .owner = THIS_MODULE,
1809 .open = ipmr_vif_open,
1810 .read = seq_read,
1811 .llseek = seq_lseek,
f6bb4514 1812 .release = seq_release_net,
1da177e4
LT
1813};
1814
1815struct ipmr_mfc_iter {
f6bb4514 1816 struct seq_net_private p;
862465f2 1817 struct list_head *cache;
1da177e4
LT
1818 int ct;
1819};
1820
1821
f6bb4514
BT
1822static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1823 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 1824{
0c12295a 1825 struct mr_table *mrt = net->ipv4.mrt;
1da177e4
LT
1826 struct mfc_cache *mfc;
1827
1da177e4 1828 read_lock(&mrt_lock);
862465f2 1829 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
0c12295a 1830 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2 1831 list_for_each_entry(mfc, it->cache, list)
e905a9ed 1832 if (pos-- == 0)
1da177e4 1833 return mfc;
862465f2 1834 }
1da177e4
LT
1835 read_unlock(&mrt_lock);
1836
1da177e4 1837 spin_lock_bh(&mfc_unres_lock);
0c12295a 1838 it->cache = &mrt->mfc_unres_queue;
862465f2 1839 list_for_each_entry(mfc, it->cache, list)
e258beb2 1840 if (pos-- == 0)
1da177e4
LT
1841 return mfc;
1842 spin_unlock_bh(&mfc_unres_lock);
1843
1844 it->cache = NULL;
1845 return NULL;
1846}
1847
1848
1849static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1850{
1851 struct ipmr_mfc_iter *it = seq->private;
f6bb4514
BT
1852 struct net *net = seq_file_net(seq);
1853
1da177e4
LT
1854 it->cache = NULL;
1855 it->ct = 0;
f6bb4514 1856 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1857 : SEQ_START_TOKEN;
1858}
1859
1860static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1861{
1862 struct mfc_cache *mfc = v;
1863 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1864 struct net *net = seq_file_net(seq);
0c12295a 1865 struct mr_table *mrt = net->ipv4.mrt;
1da177e4
LT
1866
1867 ++*pos;
1868
1869 if (v == SEQ_START_TOKEN)
f6bb4514 1870 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 1871
862465f2
PM
1872 if (mfc->list.next != it->cache)
1873 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 1874
0c12295a 1875 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
1876 goto end_of_list;
1877
0c12295a 1878 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1da177e4
LT
1879
1880 while (++it->ct < MFC_LINES) {
0c12295a 1881 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2
PM
1882 if (list_empty(it->cache))
1883 continue;
1884 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
1885 }
1886
1887 /* exhausted cache_array, show unresolved */
1888 read_unlock(&mrt_lock);
0c12295a 1889 it->cache = &mrt->mfc_unres_queue;
1da177e4 1890 it->ct = 0;
e905a9ed 1891
1da177e4 1892 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
1893 if (!list_empty(it->cache))
1894 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
1895
1896 end_of_list:
1897 spin_unlock_bh(&mfc_unres_lock);
1898 it->cache = NULL;
1899
1900 return NULL;
1901}
1902
1903static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1904{
1905 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1906 struct net *net = seq_file_net(seq);
0c12295a 1907 struct mr_table *mrt = net->ipv4.mrt;
1da177e4 1908
0c12295a 1909 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 1910 spin_unlock_bh(&mfc_unres_lock);
0c12295a 1911 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1da177e4
LT
1912 read_unlock(&mrt_lock);
1913}
1914
1915static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1916{
1917 int n;
f6bb4514 1918 struct net *net = seq_file_net(seq);
0c12295a 1919 struct mr_table *mrt = net->ipv4.mrt;
1da177e4
LT
1920
1921 if (v == SEQ_START_TOKEN) {
e905a9ed 1922 seq_puts(seq,
1da177e4
LT
1923 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1924 } else {
1925 const struct mfc_cache *mfc = v;
1926 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1927
999890b2 1928 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1929 (unsigned long) mfc->mfc_mcastgrp,
1930 (unsigned long) mfc->mfc_origin,
1ea472e2 1931 mfc->mfc_parent);
1da177e4 1932
0c12295a 1933 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
1934 seq_printf(seq, " %8lu %8lu %8lu",
1935 mfc->mfc_un.res.pkt,
1936 mfc->mfc_un.res.bytes,
1937 mfc->mfc_un.res.wrong_if);
132adf54
SH
1938 for (n = mfc->mfc_un.res.minvif;
1939 n < mfc->mfc_un.res.maxvif; n++ ) {
0c12295a 1940 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
1941 mfc->mfc_un.res.ttls[n] < 255)
1942 seq_printf(seq,
e905a9ed 1943 " %2d:%-3d",
1da177e4
LT
1944 n, mfc->mfc_un.res.ttls[n]);
1945 }
1ea472e2
BT
1946 } else {
1947 /* unresolved mfc_caches don't contain
1948 * pkt, bytes and wrong_if values
1949 */
1950 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1951 }
1952 seq_putc(seq, '\n');
1953 }
1954 return 0;
1955}
1956
f690808e 1957static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1958 .start = ipmr_mfc_seq_start,
1959 .next = ipmr_mfc_seq_next,
1960 .stop = ipmr_mfc_seq_stop,
1961 .show = ipmr_mfc_seq_show,
1962};
1963
1964static int ipmr_mfc_open(struct inode *inode, struct file *file)
1965{
f6bb4514
BT
1966 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1967 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1968}
1969
9a32144e 1970static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1971 .owner = THIS_MODULE,
1972 .open = ipmr_mfc_open,
1973 .read = seq_read,
1974 .llseek = seq_lseek,
f6bb4514 1975 .release = seq_release_net,
1da177e4 1976};
e905a9ed 1977#endif
1da177e4
LT
1978
1979#ifdef CONFIG_IP_PIMSM_V2
32613090 1980static const struct net_protocol pim_protocol = {
1da177e4 1981 .handler = pim_rcv,
403dbb97 1982 .netns_ok = 1,
1da177e4
LT
1983};
1984#endif
1985
1986
1987/*
1988 * Setup for IP multicast routing
1989 */
cf958ae3
BT
1990static int __net_init ipmr_net_init(struct net *net)
1991{
0c12295a 1992 struct mr_table *mrt;
862465f2 1993 unsigned int i;
cf958ae3
BT
1994 int err = 0;
1995
0c12295a
PM
1996 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
1997 if (mrt == NULL) {
cf958ae3
BT
1998 err = -ENOMEM;
1999 goto fail;
2000 }
2bb8b26c
BT
2001
2002 /* Forwarding cache */
862465f2 2003 for (i = 0; i < MFC_LINES; i++)
0c12295a 2004 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
862465f2 2005
0c12295a 2006 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
862465f2 2007
0c12295a 2008 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
e258beb2
PM
2009 (unsigned long)net);
2010
6c5143db 2011#ifdef CONFIG_IP_PIMSM
0c12295a 2012 mrt->mroute_reg_vif_num = -1;
6c5143db 2013#endif
f6bb4514
BT
2014
2015#ifdef CONFIG_PROC_FS
2016 err = -ENOMEM;
2017 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2018 goto proc_vif_fail;
2019 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2020 goto proc_cache_fail;
2021#endif
0c12295a
PM
2022
2023 net->ipv4.mrt = mrt;
2bb8b26c
BT
2024 return 0;
2025
f6bb4514
BT
2026#ifdef CONFIG_PROC_FS
2027proc_cache_fail:
2028 proc_net_remove(net, "ip_mr_vif");
2029proc_vif_fail:
0c12295a 2030 kfree(mrt);
f6bb4514 2031#endif
cf958ae3
BT
2032fail:
2033 return err;
2034}
2035
2036static void __net_exit ipmr_net_exit(struct net *net)
2037{
f6bb4514
BT
2038#ifdef CONFIG_PROC_FS
2039 proc_net_remove(net, "ip_mr_cache");
2040 proc_net_remove(net, "ip_mr_vif");
2041#endif
0c12295a 2042 kfree(net->ipv4.mrt);
cf958ae3
BT
2043}
2044
2045static struct pernet_operations ipmr_net_ops = {
2046 .init = ipmr_net_init,
2047 .exit = ipmr_net_exit,
2048};
e905a9ed 2049
03d2f897 2050int __init ip_mr_init(void)
1da177e4 2051{
03d2f897
WC
2052 int err;
2053
1da177e4
LT
2054 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2055 sizeof(struct mfc_cache),
e5d679f3 2056 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2057 NULL);
03d2f897
WC
2058 if (!mrt_cachep)
2059 return -ENOMEM;
2060
cf958ae3
BT
2061 err = register_pernet_subsys(&ipmr_net_ops);
2062 if (err)
2063 goto reg_pernet_fail;
2064
03d2f897
WC
2065 err = register_netdevice_notifier(&ip_mr_notifier);
2066 if (err)
2067 goto reg_notif_fail;
403dbb97
TG
2068#ifdef CONFIG_IP_PIMSM_V2
2069 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2070 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2071 err = -EAGAIN;
2072 goto add_proto_fail;
2073 }
2074#endif
03d2f897 2075 return 0;
f6bb4514 2076
403dbb97
TG
2077#ifdef CONFIG_IP_PIMSM_V2
2078add_proto_fail:
2079 unregister_netdevice_notifier(&ip_mr_notifier);
2080#endif
c3e38896 2081reg_notif_fail:
cf958ae3
BT
2082 unregister_pernet_subsys(&ipmr_net_ops);
2083reg_pernet_fail:
c3e38896 2084 kmem_cache_destroy(mrt_cachep);
03d2f897 2085 return err;
1da177e4 2086}