]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
ipv4: ipmr: convert struct mfc_cache to struct list_head
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
1da177e4
LT
66
67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68#define CONFIG_IP_PIMSM 1
69#endif
70
1da177e4
LT
71/* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
73 */
74
75static DEFINE_RWLOCK(mrt_lock);
76
77/*
78 * Multicast router control variables
79 */
80
cf958ae3 81#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4 82
1da177e4
LT
83/* Special spinlock for queue of unresolved entries */
84static DEFINE_SPINLOCK(mfc_unres_lock);
85
86/* We return to original Alan's scheme. Hash table of resolved
87 entries is changed only in process context and protected
88 with weak lock mrt_lock. Queue of unresolved entries is protected
89 with strong spinlock mfc_unres_lock.
90
91 In this case data path is free of exclusive locks at all.
92 */
93
e18b890b 94static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 95
d658f8a0
PM
96static int ip_mr_forward(struct net *net, struct sk_buff *skb,
97 struct mfc_cache *cache, int local);
4feb88e5
BT
98static int ipmr_cache_report(struct net *net,
99 struct sk_buff *pkt, vifi_t vifi, int assert);
d658f8a0
PM
100static int ipmr_fill_mroute(struct net *net, struct sk_buff *skb,
101 struct mfc_cache *c, struct rtmsg *rtm);
1da177e4 102
1da177e4
LT
103/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
104
d607032d
WC
105static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
106{
4feb88e5
BT
107 struct net *net = dev_net(dev);
108
d607032d
WC
109 dev_close(dev);
110
4feb88e5 111 dev = __dev_get_by_name(net, "tunl0");
d607032d 112 if (dev) {
5bc3eb7e 113 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 114 struct ifreq ifr;
d607032d
WC
115 struct ip_tunnel_parm p;
116
117 memset(&p, 0, sizeof(p));
118 p.iph.daddr = v->vifc_rmt_addr.s_addr;
119 p.iph.saddr = v->vifc_lcl_addr.s_addr;
120 p.iph.version = 4;
121 p.iph.ihl = 5;
122 p.iph.protocol = IPPROTO_IPIP;
123 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
124 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
125
5bc3eb7e
SH
126 if (ops->ndo_do_ioctl) {
127 mm_segment_t oldfs = get_fs();
128
129 set_fs(KERNEL_DS);
130 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
131 set_fs(oldfs);
132 }
d607032d
WC
133 }
134}
135
1da177e4 136static
4feb88e5 137struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
138{
139 struct net_device *dev;
140
4feb88e5 141 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
142
143 if (dev) {
5bc3eb7e 144 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
145 int err;
146 struct ifreq ifr;
1da177e4
LT
147 struct ip_tunnel_parm p;
148 struct in_device *in_dev;
149
150 memset(&p, 0, sizeof(p));
151 p.iph.daddr = v->vifc_rmt_addr.s_addr;
152 p.iph.saddr = v->vifc_lcl_addr.s_addr;
153 p.iph.version = 4;
154 p.iph.ihl = 5;
155 p.iph.protocol = IPPROTO_IPIP;
156 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 157 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 158
5bc3eb7e
SH
159 if (ops->ndo_do_ioctl) {
160 mm_segment_t oldfs = get_fs();
161
162 set_fs(KERNEL_DS);
163 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
164 set_fs(oldfs);
165 } else
166 err = -EOPNOTSUPP;
1da177e4
LT
167
168 dev = NULL;
169
4feb88e5
BT
170 if (err == 0 &&
171 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
172 dev->flags |= IFF_MULTICAST;
173
e5ed6399 174 in_dev = __in_dev_get_rtnl(dev);
71e27da9 175 if (in_dev == NULL)
1da177e4 176 goto failure;
71e27da9
HX
177
178 ipv4_devconf_setall(in_dev);
179 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
180
181 if (dev_open(dev))
182 goto failure;
7dc00c82 183 dev_hold(dev);
1da177e4
LT
184 }
185 }
186 return dev;
187
188failure:
189 /* allow the register to be completed before unregistering. */
190 rtnl_unlock();
191 rtnl_lock();
192
193 unregister_netdevice(dev);
194 return NULL;
195}
196
197#ifdef CONFIG_IP_PIMSM
198
6fef4c0c 199static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 200{
4feb88e5
BT
201 struct net *net = dev_net(dev);
202
1da177e4 203 read_lock(&mrt_lock);
cf3677ae
PE
204 dev->stats.tx_bytes += skb->len;
205 dev->stats.tx_packets++;
4feb88e5
BT
206 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
207 IGMPMSG_WHOLEPKT);
1da177e4
LT
208 read_unlock(&mrt_lock);
209 kfree_skb(skb);
6ed10654 210 return NETDEV_TX_OK;
1da177e4
LT
211}
212
007c3838
SH
213static const struct net_device_ops reg_vif_netdev_ops = {
214 .ndo_start_xmit = reg_vif_xmit,
215};
216
1da177e4
LT
217static void reg_vif_setup(struct net_device *dev)
218{
219 dev->type = ARPHRD_PIMREG;
46f25dff 220 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 221 dev->flags = IFF_NOARP;
007c3838 222 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 223 dev->destructor = free_netdev;
403dbb97 224 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
225}
226
403dbb97 227static struct net_device *ipmr_reg_vif(struct net *net)
1da177e4
LT
228{
229 struct net_device *dev;
230 struct in_device *in_dev;
231
cf3677ae 232 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
233
234 if (dev == NULL)
235 return NULL;
236
403dbb97
TG
237 dev_net_set(dev, net);
238
1da177e4
LT
239 if (register_netdevice(dev)) {
240 free_netdev(dev);
241 return NULL;
242 }
243 dev->iflink = 0;
244
71e27da9
HX
245 rcu_read_lock();
246 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
247 rcu_read_unlock();
1da177e4 248 goto failure;
71e27da9 249 }
1da177e4 250
71e27da9
HX
251 ipv4_devconf_setall(in_dev);
252 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
253 rcu_read_unlock();
1da177e4
LT
254
255 if (dev_open(dev))
256 goto failure;
257
7dc00c82
WC
258 dev_hold(dev);
259
1da177e4
LT
260 return dev;
261
262failure:
263 /* allow the register to be completed before unregistering. */
264 rtnl_unlock();
265 rtnl_lock();
266
267 unregister_netdevice(dev);
268 return NULL;
269}
270#endif
271
272/*
273 * Delete a VIF entry
7dc00c82 274 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 275 */
e905a9ed 276
d17fa6fa
ED
277static int vif_delete(struct net *net, int vifi, int notify,
278 struct list_head *head)
1da177e4
LT
279{
280 struct vif_device *v;
281 struct net_device *dev;
282 struct in_device *in_dev;
283
4feb88e5 284 if (vifi < 0 || vifi >= net->ipv4.maxvif)
1da177e4
LT
285 return -EADDRNOTAVAIL;
286
4feb88e5 287 v = &net->ipv4.vif_table[vifi];
1da177e4
LT
288
289 write_lock_bh(&mrt_lock);
290 dev = v->dev;
291 v->dev = NULL;
292
293 if (!dev) {
294 write_unlock_bh(&mrt_lock);
295 return -EADDRNOTAVAIL;
296 }
297
298#ifdef CONFIG_IP_PIMSM
4feb88e5
BT
299 if (vifi == net->ipv4.mroute_reg_vif_num)
300 net->ipv4.mroute_reg_vif_num = -1;
1da177e4
LT
301#endif
302
4feb88e5 303 if (vifi+1 == net->ipv4.maxvif) {
1da177e4
LT
304 int tmp;
305 for (tmp=vifi-1; tmp>=0; tmp--) {
4feb88e5 306 if (VIF_EXISTS(net, tmp))
1da177e4
LT
307 break;
308 }
4feb88e5 309 net->ipv4.maxvif = tmp+1;
1da177e4
LT
310 }
311
312 write_unlock_bh(&mrt_lock);
313
314 dev_set_allmulti(dev, -1);
315
e5ed6399 316 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 317 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
318 ip_rt_multicast_event(in_dev);
319 }
320
7dc00c82 321 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 322 unregister_netdevice_queue(dev, head);
1da177e4
LT
323
324 dev_put(dev);
325 return 0;
326}
327
5c0a66f5
BT
328static inline void ipmr_cache_free(struct mfc_cache *c)
329{
5c0a66f5
BT
330 kmem_cache_free(mrt_cachep, c);
331}
332
1da177e4
LT
333/* Destroy an unresolved cache entry, killing queued skbs
334 and reporting error to netlink readers.
335 */
336
d658f8a0 337static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
1da177e4
LT
338{
339 struct sk_buff *skb;
9ef1d4c7 340 struct nlmsgerr *e;
1da177e4 341
4feb88e5 342 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4 343
c354e124 344 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 345 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
346 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
347 nlh->nlmsg_type = NLMSG_ERROR;
348 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
349 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
350 e = NLMSG_DATA(nlh);
351 e->error = -ETIMEDOUT;
352 memset(&e->msg, 0, sizeof(e->msg));
2942e900 353
4feb88e5 354 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
355 } else
356 kfree_skb(skb);
357 }
358
5c0a66f5 359 ipmr_cache_free(c);
1da177e4
LT
360}
361
362
e258beb2 363/* Timer process for the unresolved queue. */
1da177e4 364
e258beb2 365static void ipmr_expire_process(unsigned long arg)
1da177e4 366{
e258beb2 367 struct net *net = (struct net *)arg;
1da177e4
LT
368 unsigned long now;
369 unsigned long expires;
862465f2 370 struct mfc_cache *c, *next;
1da177e4
LT
371
372 if (!spin_trylock(&mfc_unres_lock)) {
e258beb2 373 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
374 return;
375 }
376
862465f2 377 if (list_empty(&net->ipv4.mfc_unres_queue))
1da177e4
LT
378 goto out;
379
380 now = jiffies;
381 expires = 10*HZ;
1da177e4 382
862465f2 383 list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
1da177e4
LT
384 if (time_after(c->mfc_un.unres.expires, now)) {
385 unsigned long interval = c->mfc_un.unres.expires - now;
386 if (interval < expires)
387 expires = interval;
1da177e4
LT
388 continue;
389 }
390
862465f2 391 list_del(&c->list);
d658f8a0 392 ipmr_destroy_unres(net, c);
1da177e4
LT
393 }
394
862465f2 395 if (!list_empty(&net->ipv4.mfc_unres_queue))
e258beb2 396 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
1da177e4
LT
397
398out:
399 spin_unlock(&mfc_unres_lock);
400}
401
402/* Fill oifs list. It is called under write locked mrt_lock. */
403
d658f8a0
PM
404static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
405 unsigned char *ttls)
1da177e4
LT
406{
407 int vifi;
408
409 cache->mfc_un.res.minvif = MAXVIFS;
410 cache->mfc_un.res.maxvif = 0;
411 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
412
4feb88e5
BT
413 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
414 if (VIF_EXISTS(net, vifi) &&
cf958ae3 415 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
416 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
417 if (cache->mfc_un.res.minvif > vifi)
418 cache->mfc_un.res.minvif = vifi;
419 if (cache->mfc_un.res.maxvif <= vifi)
420 cache->mfc_un.res.maxvif = vifi + 1;
421 }
422 }
423}
424
4feb88e5 425static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
1da177e4
LT
426{
427 int vifi = vifc->vifc_vifi;
4feb88e5 428 struct vif_device *v = &net->ipv4.vif_table[vifi];
1da177e4
LT
429 struct net_device *dev;
430 struct in_device *in_dev;
d607032d 431 int err;
1da177e4
LT
432
433 /* Is vif busy ? */
4feb88e5 434 if (VIF_EXISTS(net, vifi))
1da177e4
LT
435 return -EADDRINUSE;
436
437 switch (vifc->vifc_flags) {
438#ifdef CONFIG_IP_PIMSM
439 case VIFF_REGISTER:
440 /*
441 * Special Purpose VIF in PIM
442 * All the packets will be sent to the daemon
443 */
4feb88e5 444 if (net->ipv4.mroute_reg_vif_num >= 0)
1da177e4 445 return -EADDRINUSE;
403dbb97 446 dev = ipmr_reg_vif(net);
1da177e4
LT
447 if (!dev)
448 return -ENOBUFS;
d607032d
WC
449 err = dev_set_allmulti(dev, 1);
450 if (err) {
451 unregister_netdevice(dev);
7dc00c82 452 dev_put(dev);
d607032d
WC
453 return err;
454 }
1da177e4
LT
455 break;
456#endif
e905a9ed 457 case VIFF_TUNNEL:
4feb88e5 458 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
459 if (!dev)
460 return -ENOBUFS;
d607032d
WC
461 err = dev_set_allmulti(dev, 1);
462 if (err) {
463 ipmr_del_tunnel(dev, vifc);
7dc00c82 464 dev_put(dev);
d607032d
WC
465 return err;
466 }
1da177e4 467 break;
ee5e81f0
I
468
469 case VIFF_USE_IFINDEX:
1da177e4 470 case 0:
ee5e81f0
I
471 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
472 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
473 if (dev && dev->ip_ptr == NULL) {
474 dev_put(dev);
475 return -EADDRNOTAVAIL;
476 }
477 } else
478 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
479
1da177e4
LT
480 if (!dev)
481 return -EADDRNOTAVAIL;
d607032d 482 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
483 if (err) {
484 dev_put(dev);
d607032d 485 return err;
7dc00c82 486 }
1da177e4
LT
487 break;
488 default:
489 return -EINVAL;
490 }
491
d0490cfd
DC
492 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
493 dev_put(dev);
1da177e4 494 return -EADDRNOTAVAIL;
d0490cfd 495 }
42f811b8 496 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
497 ip_rt_multicast_event(in_dev);
498
499 /*
500 * Fill in the VIF structures
501 */
c354e124
JK
502 v->rate_limit = vifc->vifc_rate_limit;
503 v->local = vifc->vifc_lcl_addr.s_addr;
504 v->remote = vifc->vifc_rmt_addr.s_addr;
505 v->flags = vifc->vifc_flags;
1da177e4
LT
506 if (!mrtsock)
507 v->flags |= VIFF_STATIC;
c354e124 508 v->threshold = vifc->vifc_threshold;
1da177e4
LT
509 v->bytes_in = 0;
510 v->bytes_out = 0;
511 v->pkt_in = 0;
512 v->pkt_out = 0;
513 v->link = dev->ifindex;
514 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
515 v->link = dev->iflink;
516
517 /* And finish update writing critical data */
518 write_lock_bh(&mrt_lock);
c354e124 519 v->dev = dev;
1da177e4
LT
520#ifdef CONFIG_IP_PIMSM
521 if (v->flags&VIFF_REGISTER)
4feb88e5 522 net->ipv4.mroute_reg_vif_num = vifi;
1da177e4 523#endif
4feb88e5
BT
524 if (vifi+1 > net->ipv4.maxvif)
525 net->ipv4.maxvif = vifi+1;
1da177e4
LT
526 write_unlock_bh(&mrt_lock);
527 return 0;
528}
529
4feb88e5
BT
530static struct mfc_cache *ipmr_cache_find(struct net *net,
531 __be32 origin,
532 __be32 mcastgrp)
1da177e4 533{
c354e124 534 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
535 struct mfc_cache *c;
536
862465f2
PM
537 list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
538 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
539 return c;
1da177e4 540 }
862465f2 541 return NULL;
1da177e4
LT
542}
543
544/*
545 * Allocate a multicast cache entry
546 */
d658f8a0 547static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 548{
c354e124
JK
549 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
550 if (c == NULL)
1da177e4 551 return NULL;
1da177e4
LT
552 c->mfc_un.res.minvif = MAXVIFS;
553 return c;
554}
555
d658f8a0 556static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 557{
c354e124
JK
558 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
559 if (c == NULL)
1da177e4 560 return NULL;
1da177e4
LT
561 skb_queue_head_init(&c->mfc_un.unres.unresolved);
562 c->mfc_un.unres.expires = jiffies + 10*HZ;
563 return c;
564}
565
566/*
567 * A cache entry has gone into a resolved state from queued
568 */
e905a9ed 569
d658f8a0
PM
570static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
571 struct mfc_cache *c)
1da177e4
LT
572{
573 struct sk_buff *skb;
9ef1d4c7 574 struct nlmsgerr *e;
1da177e4
LT
575
576 /*
577 * Play the pending entries through our router
578 */
579
c354e124 580 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 581 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
582 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
583
d658f8a0 584 if (ipmr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
585 nlh->nlmsg_len = (skb_tail_pointer(skb) -
586 (u8 *)nlh);
1da177e4
LT
587 } else {
588 nlh->nlmsg_type = NLMSG_ERROR;
589 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
590 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
591 e = NLMSG_DATA(nlh);
592 e->error = -EMSGSIZE;
593 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 594 }
2942e900 595
d658f8a0 596 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 597 } else
d658f8a0 598 ip_mr_forward(net, skb, c, 0);
1da177e4
LT
599 }
600}
601
602/*
603 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
604 * expects the following bizarre scheme.
605 *
606 * Called under mrt_lock.
607 */
e905a9ed 608
4feb88e5
BT
609static int ipmr_cache_report(struct net *net,
610 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
611{
612 struct sk_buff *skb;
c9bdd4b5 613 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
614 struct igmphdr *igmp;
615 struct igmpmsg *msg;
616 int ret;
617
618#ifdef CONFIG_IP_PIMSM
619 if (assert == IGMPMSG_WHOLEPKT)
620 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
621 else
622#endif
623 skb = alloc_skb(128, GFP_ATOMIC);
624
132adf54 625 if (!skb)
1da177e4
LT
626 return -ENOBUFS;
627
628#ifdef CONFIG_IP_PIMSM
629 if (assert == IGMPMSG_WHOLEPKT) {
630 /* Ugly, but we have no choice with this interface.
631 Duplicate old header, fix ihl, length etc.
632 And all this only to mangle msg->im_msgtype and
633 to set msg->im_mbz to "mbz" :-)
634 */
878c8145
ACM
635 skb_push(skb, sizeof(struct iphdr));
636 skb_reset_network_header(skb);
badff6d0 637 skb_reset_transport_header(skb);
0272ffc4 638 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 639 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
640 msg->im_msgtype = IGMPMSG_WHOLEPKT;
641 msg->im_mbz = 0;
4feb88e5 642 msg->im_vif = net->ipv4.mroute_reg_vif_num;
eddc9ec5
ACM
643 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
644 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
645 sizeof(struct iphdr));
e905a9ed 646 } else
1da177e4 647#endif
e905a9ed
YH
648 {
649
1da177e4
LT
650 /*
651 * Copy the IP header
652 */
653
27a884dc 654 skb->network_header = skb->tail;
ddc7b8e3 655 skb_put(skb, ihl);
27d7ff46 656 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
657 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
658 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 659 msg->im_vif = vifi;
adf30907 660 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
661
662 /*
663 * Add our header
664 */
665
c354e124 666 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
667 igmp->type =
668 msg->im_msgtype = assert;
669 igmp->code = 0;
eddc9ec5 670 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 671 skb->transport_header = skb->network_header;
e905a9ed 672 }
1da177e4 673
4feb88e5 674 if (net->ipv4.mroute_sk == NULL) {
1da177e4
LT
675 kfree_skb(skb);
676 return -EINVAL;
677 }
678
679 /*
680 * Deliver to mrouted
681 */
4feb88e5 682 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
70a269e6 683 if (ret < 0) {
1da177e4
LT
684 if (net_ratelimit())
685 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
686 kfree_skb(skb);
687 }
688
689 return ret;
690}
691
692/*
693 * Queue a packet for resolution. It gets locked cache entry!
694 */
e905a9ed 695
1da177e4 696static int
4feb88e5 697ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
1da177e4 698{
862465f2 699 bool found = false;
1da177e4
LT
700 int err;
701 struct mfc_cache *c;
eddc9ec5 702 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
703
704 spin_lock_bh(&mfc_unres_lock);
862465f2 705 list_for_each_entry(c, &net->ipv4.mfc_unres_queue, list) {
e258beb2 706 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
707 c->mfc_origin == iph->saddr) {
708 found = true;
1da177e4 709 break;
862465f2 710 }
1da177e4
LT
711 }
712
862465f2 713 if (!found) {
1da177e4
LT
714 /*
715 * Create a new entry if allowable
716 */
717
4feb88e5 718 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
d658f8a0 719 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
720 spin_unlock_bh(&mfc_unres_lock);
721
722 kfree_skb(skb);
723 return -ENOBUFS;
724 }
725
726 /*
727 * Fill in the new cache entry
728 */
eddc9ec5
ACM
729 c->mfc_parent = -1;
730 c->mfc_origin = iph->saddr;
731 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
732
733 /*
734 * Reflect first query at mrouted.
735 */
4feb88e5
BT
736 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
737 if (err < 0) {
e905a9ed 738 /* If the report failed throw the cache entry
1da177e4
LT
739 out - Brad Parker
740 */
741 spin_unlock_bh(&mfc_unres_lock);
742
5c0a66f5 743 ipmr_cache_free(c);
1da177e4
LT
744 kfree_skb(skb);
745 return err;
746 }
747
4feb88e5 748 atomic_inc(&net->ipv4.cache_resolve_queue_len);
862465f2 749 list_add(&c->list, &net->ipv4.mfc_unres_queue);
1da177e4 750
e258beb2 751 mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
752 }
753
754 /*
755 * See if we can append the packet
756 */
757 if (c->mfc_un.unres.unresolved.qlen>3) {
758 kfree_skb(skb);
759 err = -ENOBUFS;
760 } else {
c354e124 761 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
762 err = 0;
763 }
764
765 spin_unlock_bh(&mfc_unres_lock);
766 return err;
767}
768
769/*
770 * MFC cache manipulation by user space mroute daemon
771 */
772
4feb88e5 773static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
1da177e4
LT
774{
775 int line;
862465f2 776 struct mfc_cache *c, *next;
1da177e4 777
c354e124 778 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 779
862465f2 780 list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[line], list) {
1da177e4
LT
781 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
782 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
783 write_lock_bh(&mrt_lock);
862465f2 784 list_del(&c->list);
1da177e4
LT
785 write_unlock_bh(&mrt_lock);
786
5c0a66f5 787 ipmr_cache_free(c);
1da177e4
LT
788 return 0;
789 }
790 }
791 return -ENOENT;
792}
793
4feb88e5 794static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
1da177e4 795{
862465f2 796 bool found = false;
1da177e4 797 int line;
862465f2 798 struct mfc_cache *uc, *c;
1da177e4 799
a50436f2
PM
800 if (mfc->mfcc_parent >= MAXVIFS)
801 return -ENFILE;
802
c354e124 803 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 804
862465f2 805 list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
1da177e4 806 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
807 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
808 found = true;
1da177e4 809 break;
862465f2 810 }
1da177e4
LT
811 }
812
862465f2 813 if (found) {
1da177e4
LT
814 write_lock_bh(&mrt_lock);
815 c->mfc_parent = mfc->mfcc_parent;
d658f8a0 816 ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
1da177e4
LT
817 if (!mrtsock)
818 c->mfc_flags |= MFC_STATIC;
819 write_unlock_bh(&mrt_lock);
820 return 0;
821 }
822
f97c1e0c 823 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
824 return -EINVAL;
825
d658f8a0 826 c = ipmr_cache_alloc();
c354e124 827 if (c == NULL)
1da177e4
LT
828 return -ENOMEM;
829
c354e124
JK
830 c->mfc_origin = mfc->mfcc_origin.s_addr;
831 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
832 c->mfc_parent = mfc->mfcc_parent;
d658f8a0 833 ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
1da177e4
LT
834 if (!mrtsock)
835 c->mfc_flags |= MFC_STATIC;
836
837 write_lock_bh(&mrt_lock);
862465f2 838 list_add(&c->list, &net->ipv4.mfc_cache_array[line]);
1da177e4
LT
839 write_unlock_bh(&mrt_lock);
840
841 /*
842 * Check to see if we resolved a queued list. If so we
843 * need to send on the frames and tidy up.
844 */
845 spin_lock_bh(&mfc_unres_lock);
862465f2 846 list_for_each_entry(uc, &net->ipv4.mfc_unres_queue, list) {
e258beb2 847 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 848 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 849 list_del(&uc->list);
4feb88e5 850 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4
LT
851 break;
852 }
853 }
862465f2 854 if (list_empty(&net->ipv4.mfc_unres_queue))
e258beb2 855 del_timer(&net->ipv4.ipmr_expire_timer);
1da177e4
LT
856 spin_unlock_bh(&mfc_unres_lock);
857
858 if (uc) {
d658f8a0 859 ipmr_cache_resolve(net, uc, c);
5c0a66f5 860 ipmr_cache_free(uc);
1da177e4
LT
861 }
862 return 0;
863}
864
865/*
866 * Close the multicast socket, and clear the vif tables etc
867 */
e905a9ed 868
4feb88e5 869static void mroute_clean_tables(struct net *net)
1da177e4
LT
870{
871 int i;
d17fa6fa 872 LIST_HEAD(list);
862465f2 873 struct mfc_cache *c, *next;
e905a9ed 874
1da177e4
LT
875 /*
876 * Shut down all active vif entries
877 */
4feb88e5
BT
878 for (i = 0; i < net->ipv4.maxvif; i++) {
879 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
d17fa6fa 880 vif_delete(net, i, 0, &list);
1da177e4 881 }
d17fa6fa 882 unregister_netdevice_many(&list);
1da177e4
LT
883
884 /*
885 * Wipe the cache
886 */
862465f2
PM
887 for (i = 0; i < MFC_LINES; i++) {
888 list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[i], list) {
889 if (c->mfc_flags&MFC_STATIC)
1da177e4 890 continue;
1da177e4 891 write_lock_bh(&mrt_lock);
862465f2 892 list_del(&c->list);
1da177e4
LT
893 write_unlock_bh(&mrt_lock);
894
5c0a66f5 895 ipmr_cache_free(c);
1da177e4
LT
896 }
897 }
898
4feb88e5 899 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
1da177e4 900 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
901 list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
902 list_del(&c->list);
d658f8a0 903 ipmr_destroy_unres(net, c);
1da177e4
LT
904 }
905 spin_unlock_bh(&mfc_unres_lock);
906 }
907}
908
909static void mrtsock_destruct(struct sock *sk)
910{
4feb88e5
BT
911 struct net *net = sock_net(sk);
912
1da177e4 913 rtnl_lock();
4feb88e5
BT
914 if (sk == net->ipv4.mroute_sk) {
915 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4
LT
916
917 write_lock_bh(&mrt_lock);
4feb88e5 918 net->ipv4.mroute_sk = NULL;
1da177e4
LT
919 write_unlock_bh(&mrt_lock);
920
4feb88e5 921 mroute_clean_tables(net);
1da177e4
LT
922 }
923 rtnl_unlock();
924}
925
926/*
927 * Socket options and virtual interface manipulation. The whole
928 * virtual interface system is a complete heap, but unfortunately
929 * that's how BSD mrouted happens to think. Maybe one day with a proper
930 * MOSPF/PIM router set up we can clean this up.
931 */
e905a9ed 932
b7058842 933int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
934{
935 int ret;
936 struct vifctl vif;
937 struct mfcctl mfc;
4feb88e5 938 struct net *net = sock_net(sk);
e905a9ed 939
132adf54 940 if (optname != MRT_INIT) {
4feb88e5 941 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
942 return -EACCES;
943 }
944
132adf54
SH
945 switch (optname) {
946 case MRT_INIT:
947 if (sk->sk_type != SOCK_RAW ||
c720c7e8 948 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 949 return -EOPNOTSUPP;
c354e124 950 if (optlen != sizeof(int))
132adf54 951 return -ENOPROTOOPT;
1da177e4 952
132adf54 953 rtnl_lock();
4feb88e5 954 if (net->ipv4.mroute_sk) {
1da177e4 955 rtnl_unlock();
132adf54
SH
956 return -EADDRINUSE;
957 }
958
959 ret = ip_ra_control(sk, 1, mrtsock_destruct);
960 if (ret == 0) {
961 write_lock_bh(&mrt_lock);
4feb88e5 962 net->ipv4.mroute_sk = sk;
132adf54
SH
963 write_unlock_bh(&mrt_lock);
964
4feb88e5 965 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
966 }
967 rtnl_unlock();
968 return ret;
969 case MRT_DONE:
4feb88e5 970 if (sk != net->ipv4.mroute_sk)
132adf54
SH
971 return -EACCES;
972 return ip_ra_control(sk, 0, NULL);
973 case MRT_ADD_VIF:
974 case MRT_DEL_VIF:
c354e124 975 if (optlen != sizeof(vif))
132adf54 976 return -EINVAL;
c354e124 977 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
978 return -EFAULT;
979 if (vif.vifc_vifi >= MAXVIFS)
980 return -ENFILE;
981 rtnl_lock();
c354e124 982 if (optname == MRT_ADD_VIF) {
4feb88e5 983 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
132adf54 984 } else {
d17fa6fa 985 ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
132adf54
SH
986 }
987 rtnl_unlock();
988 return ret;
1da177e4
LT
989
990 /*
991 * Manipulate the forwarding caches. These live
992 * in a sort of kernel/user symbiosis.
993 */
132adf54
SH
994 case MRT_ADD_MFC:
995 case MRT_DEL_MFC:
c354e124 996 if (optlen != sizeof(mfc))
132adf54 997 return -EINVAL;
c354e124 998 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
999 return -EFAULT;
1000 rtnl_lock();
c354e124 1001 if (optname == MRT_DEL_MFC)
4feb88e5 1002 ret = ipmr_mfc_delete(net, &mfc);
132adf54 1003 else
4feb88e5 1004 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
132adf54
SH
1005 rtnl_unlock();
1006 return ret;
1da177e4
LT
1007 /*
1008 * Control PIM assert.
1009 */
132adf54
SH
1010 case MRT_ASSERT:
1011 {
1012 int v;
1013 if (get_user(v,(int __user *)optval))
1014 return -EFAULT;
4feb88e5 1015 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1016 return 0;
1017 }
1da177e4 1018#ifdef CONFIG_IP_PIMSM
132adf54
SH
1019 case MRT_PIM:
1020 {
ba93ef74
SH
1021 int v;
1022
132adf54
SH
1023 if (get_user(v,(int __user *)optval))
1024 return -EFAULT;
ba93ef74
SH
1025 v = (v) ? 1 : 0;
1026
132adf54
SH
1027 rtnl_lock();
1028 ret = 0;
4feb88e5
BT
1029 if (v != net->ipv4.mroute_do_pim) {
1030 net->ipv4.mroute_do_pim = v;
1031 net->ipv4.mroute_do_assert = v;
1da177e4 1032 }
132adf54
SH
1033 rtnl_unlock();
1034 return ret;
1035 }
1da177e4 1036#endif
132adf54
SH
1037 /*
1038 * Spurious command, or MRT_VERSION which you cannot
1039 * set.
1040 */
1041 default:
1042 return -ENOPROTOOPT;
1da177e4
LT
1043 }
1044}
1045
1046/*
1047 * Getsock opt support for the multicast routing system.
1048 */
e905a9ed 1049
c354e124 1050int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1051{
1052 int olr;
1053 int val;
4feb88e5 1054 struct net *net = sock_net(sk);
1da177e4 1055
c354e124 1056 if (optname != MRT_VERSION &&
1da177e4
LT
1057#ifdef CONFIG_IP_PIMSM
1058 optname!=MRT_PIM &&
1059#endif
1060 optname!=MRT_ASSERT)
1061 return -ENOPROTOOPT;
1062
1063 if (get_user(olr, optlen))
1064 return -EFAULT;
1065
1066 olr = min_t(unsigned int, olr, sizeof(int));
1067 if (olr < 0)
1068 return -EINVAL;
e905a9ed 1069
c354e124 1070 if (put_user(olr, optlen))
1da177e4 1071 return -EFAULT;
c354e124
JK
1072 if (optname == MRT_VERSION)
1073 val = 0x0305;
1da177e4 1074#ifdef CONFIG_IP_PIMSM
c354e124 1075 else if (optname == MRT_PIM)
4feb88e5 1076 val = net->ipv4.mroute_do_pim;
1da177e4
LT
1077#endif
1078 else
4feb88e5 1079 val = net->ipv4.mroute_do_assert;
c354e124 1080 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1081 return -EFAULT;
1082 return 0;
1083}
1084
1085/*
1086 * The IP multicast ioctl support routines.
1087 */
e905a9ed 1088
1da177e4
LT
1089int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1090{
1091 struct sioc_sg_req sr;
1092 struct sioc_vif_req vr;
1093 struct vif_device *vif;
1094 struct mfc_cache *c;
4feb88e5 1095 struct net *net = sock_net(sk);
e905a9ed 1096
132adf54
SH
1097 switch (cmd) {
1098 case SIOCGETVIFCNT:
c354e124 1099 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1100 return -EFAULT;
4feb88e5 1101 if (vr.vifi >= net->ipv4.maxvif)
132adf54
SH
1102 return -EINVAL;
1103 read_lock(&mrt_lock);
4feb88e5
BT
1104 vif = &net->ipv4.vif_table[vr.vifi];
1105 if (VIF_EXISTS(net, vr.vifi)) {
c354e124
JK
1106 vr.icount = vif->pkt_in;
1107 vr.ocount = vif->pkt_out;
1108 vr.ibytes = vif->bytes_in;
1109 vr.obytes = vif->bytes_out;
1da177e4 1110 read_unlock(&mrt_lock);
1da177e4 1111
c354e124 1112 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1113 return -EFAULT;
1114 return 0;
1115 }
1116 read_unlock(&mrt_lock);
1117 return -EADDRNOTAVAIL;
1118 case SIOCGETSGCNT:
c354e124 1119 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1120 return -EFAULT;
1121
1122 read_lock(&mrt_lock);
4feb88e5 1123 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1124 if (c) {
1125 sr.pktcnt = c->mfc_un.res.pkt;
1126 sr.bytecnt = c->mfc_un.res.bytes;
1127 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1128 read_unlock(&mrt_lock);
132adf54 1129
c354e124 1130 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1131 return -EFAULT;
1132 return 0;
1133 }
1134 read_unlock(&mrt_lock);
1135 return -EADDRNOTAVAIL;
1136 default:
1137 return -ENOIOCTLCMD;
1da177e4
LT
1138 }
1139}
1140
1141
1142static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1143{
e9dc8653 1144 struct net_device *dev = ptr;
4feb88e5 1145 struct net *net = dev_net(dev);
1da177e4
LT
1146 struct vif_device *v;
1147 int ct;
d17fa6fa 1148 LIST_HEAD(list);
e9dc8653 1149
1da177e4
LT
1150 if (event != NETDEV_UNREGISTER)
1151 return NOTIFY_DONE;
4feb88e5
BT
1152 v = &net->ipv4.vif_table[0];
1153 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
c354e124 1154 if (v->dev == dev)
d17fa6fa 1155 vif_delete(net, ct, 1, &list);
1da177e4 1156 }
d17fa6fa 1157 unregister_netdevice_many(&list);
1da177e4
LT
1158 return NOTIFY_DONE;
1159}
1160
1161
c354e124 1162static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1163 .notifier_call = ipmr_device_event,
1164};
1165
1166/*
1167 * Encapsulate a packet by attaching a valid IPIP header to it.
1168 * This avoids tunnel drivers and other mess and gives us the speed so
1169 * important for multicast video.
1170 */
e905a9ed 1171
114c7844 1172static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1173{
8856dfa3 1174 struct iphdr *iph;
eddc9ec5 1175 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1176
1177 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1178 skb->transport_header = skb->network_header;
8856dfa3 1179 skb_reset_network_header(skb);
eddc9ec5 1180 iph = ip_hdr(skb);
1da177e4
LT
1181
1182 iph->version = 4;
e023dd64
ACM
1183 iph->tos = old_iph->tos;
1184 iph->ttl = old_iph->ttl;
1da177e4
LT
1185 iph->frag_off = 0;
1186 iph->daddr = daddr;
1187 iph->saddr = saddr;
1188 iph->protocol = IPPROTO_IPIP;
1189 iph->ihl = 5;
1190 iph->tot_len = htons(skb->len);
adf30907 1191 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1192 ip_send_check(iph);
1193
1da177e4
LT
1194 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1195 nf_reset(skb);
1196}
1197
1198static inline int ipmr_forward_finish(struct sk_buff *skb)
1199{
1200 struct ip_options * opt = &(IPCB(skb)->opt);
1201
adf30907 1202 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1203
1204 if (unlikely(opt->optlen))
1205 ip_forward_options(skb);
1206
1207 return dst_output(skb);
1208}
1209
1210/*
1211 * Processing handlers for ipmr_forward
1212 */
1213
d658f8a0
PM
1214static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
1215 struct mfc_cache *c, int vifi)
1da177e4 1216{
eddc9ec5 1217 const struct iphdr *iph = ip_hdr(skb);
4feb88e5 1218 struct vif_device *vif = &net->ipv4.vif_table[vifi];
1da177e4
LT
1219 struct net_device *dev;
1220 struct rtable *rt;
1221 int encap = 0;
1222
1223 if (vif->dev == NULL)
1224 goto out_free;
1225
1226#ifdef CONFIG_IP_PIMSM
1227 if (vif->flags & VIFF_REGISTER) {
1228 vif->pkt_out++;
c354e124 1229 vif->bytes_out += skb->len;
cf3677ae
PE
1230 vif->dev->stats.tx_bytes += skb->len;
1231 vif->dev->stats.tx_packets++;
4feb88e5 1232 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1233 goto out_free;
1da177e4
LT
1234 }
1235#endif
1236
1237 if (vif->flags&VIFF_TUNNEL) {
1238 struct flowi fl = { .oif = vif->link,
1239 .nl_u = { .ip4_u =
1240 { .daddr = vif->remote,
1241 .saddr = vif->local,
1242 .tos = RT_TOS(iph->tos) } },
1243 .proto = IPPROTO_IPIP };
4feb88e5 1244 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1245 goto out_free;
1246 encap = sizeof(struct iphdr);
1247 } else {
1248 struct flowi fl = { .oif = vif->link,
1249 .nl_u = { .ip4_u =
1250 { .daddr = iph->daddr,
1251 .tos = RT_TOS(iph->tos) } },
1252 .proto = IPPROTO_IPIP };
4feb88e5 1253 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1254 goto out_free;
1255 }
1256
1257 dev = rt->u.dst.dev;
1258
1259 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1260 /* Do not fragment multicasts. Alas, IPv4 does not
1261 allow to send ICMP, so that packets will disappear
1262 to blackhole.
1263 */
1264
7c73a6fa 1265 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1266 ip_rt_put(rt);
1267 goto out_free;
1268 }
1269
1270 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1271
1272 if (skb_cow(skb, encap)) {
e905a9ed 1273 ip_rt_put(rt);
1da177e4
LT
1274 goto out_free;
1275 }
1276
1277 vif->pkt_out++;
c354e124 1278 vif->bytes_out += skb->len;
1da177e4 1279
adf30907
ED
1280 skb_dst_drop(skb);
1281 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1282 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1283
1284 /* FIXME: forward and output firewalls used to be called here.
1285 * What do we do with netfilter? -- RR */
1286 if (vif->flags & VIFF_TUNNEL) {
1287 ip_encap(skb, vif->local, vif->remote);
1288 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1289 vif->dev->stats.tx_packets++;
1290 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1291 }
1292
1293 IPCB(skb)->flags |= IPSKB_FORWARDED;
1294
1295 /*
1296 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1297 * not only before forwarding, but after forwarding on all output
1298 * interfaces. It is clear, if mrouter runs a multicasting
1299 * program, it should receive packets not depending to what interface
1300 * program is joined.
1301 * If we will not make it, the program will have to join on all
1302 * interfaces. On the other hand, multihoming host (or router, but
1303 * not mrouter) cannot join to more than one interface - it will
1304 * result in receiving multiple packets.
1305 */
6e23ae2a 1306 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1307 ipmr_forward_finish);
1308 return;
1309
1310out_free:
1311 kfree_skb(skb);
1312 return;
1313}
1314
1315static int ipmr_find_vif(struct net_device *dev)
1316{
4feb88e5 1317 struct net *net = dev_net(dev);
1da177e4 1318 int ct;
4feb88e5
BT
1319 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1320 if (net->ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1321 break;
1322 }
1323 return ct;
1324}
1325
1326/* "local" means that we should preserve one skb (for local delivery) */
1327
d658f8a0
PM
1328static int ip_mr_forward(struct net *net, struct sk_buff *skb,
1329 struct mfc_cache *cache, int local)
1da177e4
LT
1330{
1331 int psend = -1;
1332 int vif, ct;
1333
1334 vif = cache->mfc_parent;
1335 cache->mfc_un.res.pkt++;
1336 cache->mfc_un.res.bytes += skb->len;
1337
1338 /*
1339 * Wrong interface: drop packet and (maybe) send PIM assert.
1340 */
4feb88e5 1341 if (net->ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1342 int true_vifi;
1343
511c3f92 1344 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1345 /* It is our own packet, looped back.
1346 Very complicated situation...
1347
1348 The best workaround until routing daemons will be
1349 fixed is not to redistribute packet, if it was
1350 send through wrong interface. It means, that
1351 multicast applications WILL NOT work for
1352 (S,G), which have default multicast route pointing
1353 to wrong oif. In any case, it is not a good
1354 idea to use multicasting applications on router.
1355 */
1356 goto dont_forward;
1357 }
1358
1359 cache->mfc_un.res.wrong_if++;
1360 true_vifi = ipmr_find_vif(skb->dev);
1361
4feb88e5 1362 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1da177e4
LT
1363 /* pimsm uses asserts, when switching from RPT to SPT,
1364 so that we cannot check that packet arrived on an oif.
1365 It is bad, but otherwise we would need to move pretty
1366 large chunk of pimd to kernel. Ough... --ANK
1367 */
4feb88e5 1368 (net->ipv4.mroute_do_pim ||
6f9374a9 1369 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1370 time_after(jiffies,
1da177e4
LT
1371 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1372 cache->mfc_un.res.last_assert = jiffies;
4feb88e5 1373 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1374 }
1375 goto dont_forward;
1376 }
1377
4feb88e5
BT
1378 net->ipv4.vif_table[vif].pkt_in++;
1379 net->ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1380
1381 /*
1382 * Forward the frame
1383 */
1384 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1385 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1386 if (psend != -1) {
1387 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1388 if (skb2)
d658f8a0 1389 ipmr_queue_xmit(net, skb2, cache, psend);
1da177e4 1390 }
c354e124 1391 psend = ct;
1da177e4
LT
1392 }
1393 }
1394 if (psend != -1) {
1395 if (local) {
1396 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1397 if (skb2)
d658f8a0 1398 ipmr_queue_xmit(net, skb2, cache, psend);
1da177e4 1399 } else {
d658f8a0 1400 ipmr_queue_xmit(net, skb, cache, psend);
1da177e4
LT
1401 return 0;
1402 }
1403 }
1404
1405dont_forward:
1406 if (!local)
1407 kfree_skb(skb);
1408 return 0;
1409}
1410
1411
1412/*
1413 * Multicast packets for forwarding arrive here
1414 */
1415
1416int ip_mr_input(struct sk_buff *skb)
1417{
1418 struct mfc_cache *cache;
4feb88e5 1419 struct net *net = dev_net(skb->dev);
511c3f92 1420 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1da177e4
LT
1421
1422 /* Packet is looped back after forward, it should not be
1423 forwarded second time, but still can be delivered locally.
1424 */
1425 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1426 goto dont_forward;
1427
1428 if (!local) {
1429 if (IPCB(skb)->opt.router_alert) {
1430 if (ip_call_ra_chain(skb))
1431 return 0;
eddc9ec5 1432 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1433 /* IGMPv1 (and broken IGMPv2 implementations sort of
1434 Cisco IOS <= 11.2(8)) do not put router alert
1435 option to IGMP packets destined to routable
1436 groups. It is very bad, because it means
1437 that we can forward NO IGMP messages.
1438 */
1439 read_lock(&mrt_lock);
4feb88e5 1440 if (net->ipv4.mroute_sk) {
2715bcf9 1441 nf_reset(skb);
4feb88e5 1442 raw_rcv(net->ipv4.mroute_sk, skb);
1da177e4
LT
1443 read_unlock(&mrt_lock);
1444 return 0;
1445 }
1446 read_unlock(&mrt_lock);
1447 }
1448 }
1449
1450 read_lock(&mrt_lock);
4feb88e5 1451 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1452
1453 /*
1454 * No usable cache entry
1455 */
c354e124 1456 if (cache == NULL) {
1da177e4
LT
1457 int vif;
1458
1459 if (local) {
1460 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1461 ip_local_deliver(skb);
1462 if (skb2 == NULL) {
1463 read_unlock(&mrt_lock);
1464 return -ENOBUFS;
1465 }
1466 skb = skb2;
1467 }
1468
1469 vif = ipmr_find_vif(skb->dev);
1470 if (vif >= 0) {
4feb88e5 1471 int err = ipmr_cache_unresolved(net, vif, skb);
1da177e4
LT
1472 read_unlock(&mrt_lock);
1473
1474 return err;
1475 }
1476 read_unlock(&mrt_lock);
1477 kfree_skb(skb);
1478 return -ENODEV;
1479 }
1480
d658f8a0 1481 ip_mr_forward(net, skb, cache, local);
1da177e4
LT
1482
1483 read_unlock(&mrt_lock);
1484
1485 if (local)
1486 return ip_local_deliver(skb);
1487
1488 return 0;
1489
1490dont_forward:
1491 if (local)
1492 return ip_local_deliver(skb);
1493 kfree_skb(skb);
1494 return 0;
1495}
1496
b1879204
IJ
1497#ifdef CONFIG_IP_PIMSM
1498static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1499{
b1879204
IJ
1500 struct net_device *reg_dev = NULL;
1501 struct iphdr *encap;
4feb88e5 1502 struct net *net = dev_net(skb->dev);
1da177e4 1503
b1879204 1504 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1505 /*
1506 Check that:
1507 a. packet is really destinted to a multicast group
1508 b. packet is not a NULL-REGISTER
1509 c. packet is not truncated
1510 */
f97c1e0c 1511 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1512 encap->tot_len == 0 ||
b1879204
IJ
1513 ntohs(encap->tot_len) + pimlen > skb->len)
1514 return 1;
1da177e4
LT
1515
1516 read_lock(&mrt_lock);
4feb88e5
BT
1517 if (net->ipv4.mroute_reg_vif_num >= 0)
1518 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1da177e4
LT
1519 if (reg_dev)
1520 dev_hold(reg_dev);
1521 read_unlock(&mrt_lock);
1522
e905a9ed 1523 if (reg_dev == NULL)
b1879204 1524 return 1;
1da177e4 1525
b0e380b1 1526 skb->mac_header = skb->network_header;
1da177e4 1527 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1528 skb_reset_network_header(skb);
1da177e4 1529 skb->dev = reg_dev;
1da177e4
LT
1530 skb->protocol = htons(ETH_P_IP);
1531 skb->ip_summed = 0;
1532 skb->pkt_type = PACKET_HOST;
adf30907 1533 skb_dst_drop(skb);
cf3677ae
PE
1534 reg_dev->stats.rx_bytes += skb->len;
1535 reg_dev->stats.rx_packets++;
1da177e4
LT
1536 nf_reset(skb);
1537 netif_rx(skb);
1538 dev_put(reg_dev);
b1879204 1539
1da177e4 1540 return 0;
b1879204
IJ
1541}
1542#endif
1543
1544#ifdef CONFIG_IP_PIMSM_V1
1545/*
1546 * Handle IGMP messages of PIMv1
1547 */
1548
1549int pim_rcv_v1(struct sk_buff * skb)
1550{
1551 struct igmphdr *pim;
4feb88e5 1552 struct net *net = dev_net(skb->dev);
b1879204
IJ
1553
1554 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1555 goto drop;
1556
1557 pim = igmp_hdr(skb);
1558
4feb88e5 1559 if (!net->ipv4.mroute_do_pim ||
b1879204
IJ
1560 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1561 goto drop;
1562
1563 if (__pim_rcv(skb, sizeof(*pim))) {
1564drop:
1565 kfree_skb(skb);
1566 }
1da177e4
LT
1567 return 0;
1568}
1569#endif
1570
1571#ifdef CONFIG_IP_PIMSM_V2
1572static int pim_rcv(struct sk_buff * skb)
1573{
1574 struct pimreghdr *pim;
1da177e4 1575
b1879204 1576 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1577 goto drop;
1578
9c70220b 1579 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1580 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1581 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1582 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1583 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1584 goto drop;
1585
b1879204
IJ
1586 if (__pim_rcv(skb, sizeof(*pim))) {
1587drop:
1588 kfree_skb(skb);
1589 }
1da177e4
LT
1590 return 0;
1591}
1592#endif
1593
1594static int
d658f8a0
PM
1595ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
1596 struct rtmsg *rtm)
1da177e4
LT
1597{
1598 int ct;
1599 struct rtnexthop *nhp;
27a884dc 1600 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1601 struct rtattr *mp_head;
1602
7438189b
ND
1603 /* If cache is unresolved, don't try to parse IIF and OIF */
1604 if (c->mfc_parent > MAXVIFS)
1605 return -ENOENT;
1606
1607 if (VIF_EXISTS(net, c->mfc_parent))
1608 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1609
c354e124 1610 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1611
1612 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
7438189b 1613 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1614 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1615 goto rtattr_failure;
c354e124 1616 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1617 nhp->rtnh_flags = 0;
1618 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4feb88e5 1619 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1620 nhp->rtnh_len = sizeof(*nhp);
1621 }
1622 }
1623 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1624 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1625 rtm->rtm_type = RTN_MULTICAST;
1626 return 1;
1627
1628rtattr_failure:
dc5fc579 1629 nlmsg_trim(skb, b);
1da177e4
LT
1630 return -EMSGSIZE;
1631}
1632
4feb88e5
BT
1633int ipmr_get_route(struct net *net,
1634 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1635{
1636 int err;
1637 struct mfc_cache *cache;
511c3f92 1638 struct rtable *rt = skb_rtable(skb);
1da177e4
LT
1639
1640 read_lock(&mrt_lock);
4feb88e5 1641 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1da177e4 1642
c354e124 1643 if (cache == NULL) {
72287490 1644 struct sk_buff *skb2;
eddc9ec5 1645 struct iphdr *iph;
1da177e4
LT
1646 struct net_device *dev;
1647 int vif;
1648
1649 if (nowait) {
1650 read_unlock(&mrt_lock);
1651 return -EAGAIN;
1652 }
1653
1654 dev = skb->dev;
1655 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1656 read_unlock(&mrt_lock);
1657 return -ENODEV;
1658 }
72287490
AK
1659 skb2 = skb_clone(skb, GFP_ATOMIC);
1660 if (!skb2) {
1661 read_unlock(&mrt_lock);
1662 return -ENOMEM;
1663 }
1664
e2d1bca7
ACM
1665 skb_push(skb2, sizeof(struct iphdr));
1666 skb_reset_network_header(skb2);
eddc9ec5
ACM
1667 iph = ip_hdr(skb2);
1668 iph->ihl = sizeof(struct iphdr) >> 2;
1669 iph->saddr = rt->rt_src;
1670 iph->daddr = rt->rt_dst;
1671 iph->version = 0;
4feb88e5 1672 err = ipmr_cache_unresolved(net, vif, skb2);
1da177e4
LT
1673 read_unlock(&mrt_lock);
1674 return err;
1675 }
1676
1677 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1678 cache->mfc_flags |= MFC_NOTIFY;
d658f8a0 1679 err = ipmr_fill_mroute(net, skb, cache, rtm);
1da177e4
LT
1680 read_unlock(&mrt_lock);
1681 return err;
1682}
1683
e905a9ed 1684#ifdef CONFIG_PROC_FS
1da177e4
LT
1685/*
1686 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1687 */
1688struct ipmr_vif_iter {
f6bb4514 1689 struct seq_net_private p;
1da177e4
LT
1690 int ct;
1691};
1692
f6bb4514
BT
1693static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1694 struct ipmr_vif_iter *iter,
1da177e4
LT
1695 loff_t pos)
1696{
f6bb4514
BT
1697 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1698 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1699 continue;
e905a9ed 1700 if (pos-- == 0)
f6bb4514 1701 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1702 }
1703 return NULL;
1704}
1705
1706static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1707 __acquires(mrt_lock)
1da177e4 1708{
f6bb4514
BT
1709 struct net *net = seq_file_net(seq);
1710
1da177e4 1711 read_lock(&mrt_lock);
f6bb4514 1712 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1713 : SEQ_START_TOKEN;
1714}
1715
1716static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1717{
1718 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 1719 struct net *net = seq_file_net(seq);
1da177e4
LT
1720
1721 ++*pos;
1722 if (v == SEQ_START_TOKEN)
f6bb4514 1723 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 1724
f6bb4514
BT
1725 while (++iter->ct < net->ipv4.maxvif) {
1726 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1727 continue;
f6bb4514 1728 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1729 }
1730 return NULL;
1731}
1732
1733static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1734 __releases(mrt_lock)
1da177e4
LT
1735{
1736 read_unlock(&mrt_lock);
1737}
1738
1739static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1740{
f6bb4514
BT
1741 struct net *net = seq_file_net(seq);
1742
1da177e4 1743 if (v == SEQ_START_TOKEN) {
e905a9ed 1744 seq_puts(seq,
1da177e4
LT
1745 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1746 } else {
1747 const struct vif_device *vif = v;
1748 const char *name = vif->dev ? vif->dev->name : "none";
1749
1750 seq_printf(seq,
1751 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
f6bb4514 1752 vif - net->ipv4.vif_table,
e905a9ed 1753 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1754 vif->bytes_out, vif->pkt_out,
1755 vif->flags, vif->local, vif->remote);
1756 }
1757 return 0;
1758}
1759
f690808e 1760static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1761 .start = ipmr_vif_seq_start,
1762 .next = ipmr_vif_seq_next,
1763 .stop = ipmr_vif_seq_stop,
1764 .show = ipmr_vif_seq_show,
1765};
1766
1767static int ipmr_vif_open(struct inode *inode, struct file *file)
1768{
f6bb4514
BT
1769 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1770 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1771}
1772
9a32144e 1773static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1774 .owner = THIS_MODULE,
1775 .open = ipmr_vif_open,
1776 .read = seq_read,
1777 .llseek = seq_lseek,
f6bb4514 1778 .release = seq_release_net,
1da177e4
LT
1779};
1780
1781struct ipmr_mfc_iter {
f6bb4514 1782 struct seq_net_private p;
862465f2 1783 struct list_head *cache;
1da177e4
LT
1784 int ct;
1785};
1786
1787
f6bb4514
BT
1788static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1789 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4
LT
1790{
1791 struct mfc_cache *mfc;
1792
1da177e4 1793 read_lock(&mrt_lock);
862465f2
PM
1794 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
1795 it->cache = &net->ipv4.mfc_cache_array[it->ct];
1796 list_for_each_entry(mfc, it->cache, list)
e905a9ed 1797 if (pos-- == 0)
1da177e4 1798 return mfc;
862465f2 1799 }
1da177e4
LT
1800 read_unlock(&mrt_lock);
1801
1da177e4 1802 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
1803 it->cache = &net->ipv4.mfc_unres_queue;
1804 list_for_each_entry(mfc, it->cache, list)
e258beb2 1805 if (pos-- == 0)
1da177e4
LT
1806 return mfc;
1807 spin_unlock_bh(&mfc_unres_lock);
1808
1809 it->cache = NULL;
1810 return NULL;
1811}
1812
1813
1814static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1815{
1816 struct ipmr_mfc_iter *it = seq->private;
f6bb4514
BT
1817 struct net *net = seq_file_net(seq);
1818
1da177e4
LT
1819 it->cache = NULL;
1820 it->ct = 0;
f6bb4514 1821 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1822 : SEQ_START_TOKEN;
1823}
1824
1825static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1826{
1827 struct mfc_cache *mfc = v;
1828 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1829 struct net *net = seq_file_net(seq);
1da177e4
LT
1830
1831 ++*pos;
1832
1833 if (v == SEQ_START_TOKEN)
f6bb4514 1834 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 1835
862465f2
PM
1836 if (mfc->list.next != it->cache)
1837 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 1838
e258beb2 1839 if (it->cache == &net->ipv4.mfc_unres_queue)
1da177e4
LT
1840 goto end_of_list;
1841
862465f2 1842 BUG_ON(it->cache != &net->ipv4.mfc_cache_array[it->ct]);
1da177e4
LT
1843
1844 while (++it->ct < MFC_LINES) {
862465f2
PM
1845 it->cache = &net->ipv4.mfc_cache_array[it->ct];
1846 if (list_empty(it->cache))
1847 continue;
1848 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
1849 }
1850
1851 /* exhausted cache_array, show unresolved */
1852 read_unlock(&mrt_lock);
e258beb2 1853 it->cache = &net->ipv4.mfc_unres_queue;
1da177e4 1854 it->ct = 0;
e905a9ed 1855
1da177e4 1856 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
1857 if (!list_empty(it->cache))
1858 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
1859
1860 end_of_list:
1861 spin_unlock_bh(&mfc_unres_lock);
1862 it->cache = NULL;
1863
1864 return NULL;
1865}
1866
1867static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1868{
1869 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1870 struct net *net = seq_file_net(seq);
1da177e4 1871
e258beb2 1872 if (it->cache == &net->ipv4.mfc_unres_queue)
1da177e4 1873 spin_unlock_bh(&mfc_unres_lock);
862465f2 1874 else if (it->cache == &net->ipv4.mfc_cache_array[it->ct])
1da177e4
LT
1875 read_unlock(&mrt_lock);
1876}
1877
1878static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1879{
1880 int n;
f6bb4514 1881 struct net *net = seq_file_net(seq);
1da177e4
LT
1882
1883 if (v == SEQ_START_TOKEN) {
e905a9ed 1884 seq_puts(seq,
1da177e4
LT
1885 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1886 } else {
1887 const struct mfc_cache *mfc = v;
1888 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1889
999890b2 1890 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1891 (unsigned long) mfc->mfc_mcastgrp,
1892 (unsigned long) mfc->mfc_origin,
1ea472e2 1893 mfc->mfc_parent);
1da177e4 1894
e258beb2 1895 if (it->cache != &net->ipv4.mfc_unres_queue) {
1ea472e2
BT
1896 seq_printf(seq, " %8lu %8lu %8lu",
1897 mfc->mfc_un.res.pkt,
1898 mfc->mfc_un.res.bytes,
1899 mfc->mfc_un.res.wrong_if);
132adf54
SH
1900 for (n = mfc->mfc_un.res.minvif;
1901 n < mfc->mfc_un.res.maxvif; n++ ) {
f6bb4514 1902 if (VIF_EXISTS(net, n) &&
cf958ae3
BT
1903 mfc->mfc_un.res.ttls[n] < 255)
1904 seq_printf(seq,
e905a9ed 1905 " %2d:%-3d",
1da177e4
LT
1906 n, mfc->mfc_un.res.ttls[n]);
1907 }
1ea472e2
BT
1908 } else {
1909 /* unresolved mfc_caches don't contain
1910 * pkt, bytes and wrong_if values
1911 */
1912 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1913 }
1914 seq_putc(seq, '\n');
1915 }
1916 return 0;
1917}
1918
f690808e 1919static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1920 .start = ipmr_mfc_seq_start,
1921 .next = ipmr_mfc_seq_next,
1922 .stop = ipmr_mfc_seq_stop,
1923 .show = ipmr_mfc_seq_show,
1924};
1925
1926static int ipmr_mfc_open(struct inode *inode, struct file *file)
1927{
f6bb4514
BT
1928 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1929 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1930}
1931
9a32144e 1932static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1933 .owner = THIS_MODULE,
1934 .open = ipmr_mfc_open,
1935 .read = seq_read,
1936 .llseek = seq_lseek,
f6bb4514 1937 .release = seq_release_net,
1da177e4 1938};
e905a9ed 1939#endif
1da177e4
LT
1940
1941#ifdef CONFIG_IP_PIMSM_V2
32613090 1942static const struct net_protocol pim_protocol = {
1da177e4 1943 .handler = pim_rcv,
403dbb97 1944 .netns_ok = 1,
1da177e4
LT
1945};
1946#endif
1947
1948
1949/*
1950 * Setup for IP multicast routing
1951 */
cf958ae3
BT
1952static int __net_init ipmr_net_init(struct net *net)
1953{
862465f2 1954 unsigned int i;
cf958ae3
BT
1955 int err = 0;
1956
1957 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1958 GFP_KERNEL);
1959 if (!net->ipv4.vif_table) {
1960 err = -ENOMEM;
1961 goto fail;
1962 }
2bb8b26c
BT
1963
1964 /* Forwarding cache */
1965 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
862465f2 1966 sizeof(struct list_head),
2bb8b26c
BT
1967 GFP_KERNEL);
1968 if (!net->ipv4.mfc_cache_array) {
1969 err = -ENOMEM;
1970 goto fail_mfc_cache;
1971 }
6c5143db 1972
862465f2
PM
1973 for (i = 0; i < MFC_LINES; i++)
1974 INIT_LIST_HEAD(&net->ipv4.mfc_cache_array[i]);
1975
1976 INIT_LIST_HEAD(&net->ipv4.mfc_unres_queue);
1977
e258beb2
PM
1978 setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
1979 (unsigned long)net);
1980
6c5143db
BT
1981#ifdef CONFIG_IP_PIMSM
1982 net->ipv4.mroute_reg_vif_num = -1;
1983#endif
f6bb4514
BT
1984
1985#ifdef CONFIG_PROC_FS
1986 err = -ENOMEM;
1987 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1988 goto proc_vif_fail;
1989 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1990 goto proc_cache_fail;
1991#endif
2bb8b26c
BT
1992 return 0;
1993
f6bb4514
BT
1994#ifdef CONFIG_PROC_FS
1995proc_cache_fail:
1996 proc_net_remove(net, "ip_mr_vif");
1997proc_vif_fail:
1998 kfree(net->ipv4.mfc_cache_array);
1999#endif
2bb8b26c
BT
2000fail_mfc_cache:
2001 kfree(net->ipv4.vif_table);
cf958ae3
BT
2002fail:
2003 return err;
2004}
2005
2006static void __net_exit ipmr_net_exit(struct net *net)
2007{
f6bb4514
BT
2008#ifdef CONFIG_PROC_FS
2009 proc_net_remove(net, "ip_mr_cache");
2010 proc_net_remove(net, "ip_mr_vif");
2011#endif
2bb8b26c 2012 kfree(net->ipv4.mfc_cache_array);
cf958ae3
BT
2013 kfree(net->ipv4.vif_table);
2014}
2015
2016static struct pernet_operations ipmr_net_ops = {
2017 .init = ipmr_net_init,
2018 .exit = ipmr_net_exit,
2019};
e905a9ed 2020
03d2f897 2021int __init ip_mr_init(void)
1da177e4 2022{
03d2f897
WC
2023 int err;
2024
1da177e4
LT
2025 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2026 sizeof(struct mfc_cache),
e5d679f3 2027 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2028 NULL);
03d2f897
WC
2029 if (!mrt_cachep)
2030 return -ENOMEM;
2031
cf958ae3
BT
2032 err = register_pernet_subsys(&ipmr_net_ops);
2033 if (err)
2034 goto reg_pernet_fail;
2035
03d2f897
WC
2036 err = register_netdevice_notifier(&ip_mr_notifier);
2037 if (err)
2038 goto reg_notif_fail;
403dbb97
TG
2039#ifdef CONFIG_IP_PIMSM_V2
2040 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2041 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2042 err = -EAGAIN;
2043 goto add_proto_fail;
2044 }
2045#endif
03d2f897 2046 return 0;
f6bb4514 2047
403dbb97
TG
2048#ifdef CONFIG_IP_PIMSM_V2
2049add_proto_fail:
2050 unregister_netdevice_notifier(&ip_mr_notifier);
2051#endif
c3e38896 2052reg_notif_fail:
cf958ae3
BT
2053 unregister_pernet_subsys(&ipmr_net_ops);
2054reg_pernet_fail:
c3e38896 2055 kmem_cache_destroy(mrt_cachep);
03d2f897 2056 return err;
1da177e4 2057}