]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
ipv4: raw: move struct raw_sock and raw_sk() to include/net/raw.h
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
1da177e4
LT
66
67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68#define CONFIG_IP_PIMSM 1
69#endif
70
1da177e4
LT
71/* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
73 */
74
75static DEFINE_RWLOCK(mrt_lock);
76
77/*
78 * Multicast router control variables
79 */
80
cf958ae3 81#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4 82
1da177e4 83static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
1da177e4
LT
84
85/* Special spinlock for queue of unresolved entries */
86static DEFINE_SPINLOCK(mfc_unres_lock);
87
88/* We return to original Alan's scheme. Hash table of resolved
89 entries is changed only in process context and protected
90 with weak lock mrt_lock. Queue of unresolved entries is protected
91 with strong spinlock mfc_unres_lock.
92
93 In this case data path is free of exclusive locks at all.
94 */
95
e18b890b 96static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4
LT
97
98static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
4feb88e5
BT
99static int ipmr_cache_report(struct net *net,
100 struct sk_buff *pkt, vifi_t vifi, int assert);
1da177e4
LT
101static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
102
1da177e4
LT
103static struct timer_list ipmr_expire_timer;
104
105/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
106
d607032d
WC
107static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
108{
4feb88e5
BT
109 struct net *net = dev_net(dev);
110
d607032d
WC
111 dev_close(dev);
112
4feb88e5 113 dev = __dev_get_by_name(net, "tunl0");
d607032d 114 if (dev) {
5bc3eb7e 115 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 116 struct ifreq ifr;
d607032d
WC
117 struct ip_tunnel_parm p;
118
119 memset(&p, 0, sizeof(p));
120 p.iph.daddr = v->vifc_rmt_addr.s_addr;
121 p.iph.saddr = v->vifc_lcl_addr.s_addr;
122 p.iph.version = 4;
123 p.iph.ihl = 5;
124 p.iph.protocol = IPPROTO_IPIP;
125 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
126 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
127
5bc3eb7e
SH
128 if (ops->ndo_do_ioctl) {
129 mm_segment_t oldfs = get_fs();
130
131 set_fs(KERNEL_DS);
132 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
133 set_fs(oldfs);
134 }
d607032d
WC
135 }
136}
137
1da177e4 138static
4feb88e5 139struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
140{
141 struct net_device *dev;
142
4feb88e5 143 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
144
145 if (dev) {
5bc3eb7e 146 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
147 int err;
148 struct ifreq ifr;
1da177e4
LT
149 struct ip_tunnel_parm p;
150 struct in_device *in_dev;
151
152 memset(&p, 0, sizeof(p));
153 p.iph.daddr = v->vifc_rmt_addr.s_addr;
154 p.iph.saddr = v->vifc_lcl_addr.s_addr;
155 p.iph.version = 4;
156 p.iph.ihl = 5;
157 p.iph.protocol = IPPROTO_IPIP;
158 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 159 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 160
5bc3eb7e
SH
161 if (ops->ndo_do_ioctl) {
162 mm_segment_t oldfs = get_fs();
163
164 set_fs(KERNEL_DS);
165 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
166 set_fs(oldfs);
167 } else
168 err = -EOPNOTSUPP;
1da177e4
LT
169
170 dev = NULL;
171
4feb88e5
BT
172 if (err == 0 &&
173 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
174 dev->flags |= IFF_MULTICAST;
175
e5ed6399 176 in_dev = __in_dev_get_rtnl(dev);
71e27da9 177 if (in_dev == NULL)
1da177e4 178 goto failure;
71e27da9
HX
179
180 ipv4_devconf_setall(in_dev);
181 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
182
183 if (dev_open(dev))
184 goto failure;
7dc00c82 185 dev_hold(dev);
1da177e4
LT
186 }
187 }
188 return dev;
189
190failure:
191 /* allow the register to be completed before unregistering. */
192 rtnl_unlock();
193 rtnl_lock();
194
195 unregister_netdevice(dev);
196 return NULL;
197}
198
199#ifdef CONFIG_IP_PIMSM
200
6fef4c0c 201static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 202{
4feb88e5
BT
203 struct net *net = dev_net(dev);
204
1da177e4 205 read_lock(&mrt_lock);
cf3677ae
PE
206 dev->stats.tx_bytes += skb->len;
207 dev->stats.tx_packets++;
4feb88e5
BT
208 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
209 IGMPMSG_WHOLEPKT);
1da177e4
LT
210 read_unlock(&mrt_lock);
211 kfree_skb(skb);
6ed10654 212 return NETDEV_TX_OK;
1da177e4
LT
213}
214
007c3838
SH
215static const struct net_device_ops reg_vif_netdev_ops = {
216 .ndo_start_xmit = reg_vif_xmit,
217};
218
1da177e4
LT
219static void reg_vif_setup(struct net_device *dev)
220{
221 dev->type = ARPHRD_PIMREG;
46f25dff 222 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 223 dev->flags = IFF_NOARP;
007c3838 224 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 225 dev->destructor = free_netdev;
403dbb97 226 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
227}
228
403dbb97 229static struct net_device *ipmr_reg_vif(struct net *net)
1da177e4
LT
230{
231 struct net_device *dev;
232 struct in_device *in_dev;
233
cf3677ae 234 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
235
236 if (dev == NULL)
237 return NULL;
238
403dbb97
TG
239 dev_net_set(dev, net);
240
1da177e4
LT
241 if (register_netdevice(dev)) {
242 free_netdev(dev);
243 return NULL;
244 }
245 dev->iflink = 0;
246
71e27da9
HX
247 rcu_read_lock();
248 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249 rcu_read_unlock();
1da177e4 250 goto failure;
71e27da9 251 }
1da177e4 252
71e27da9
HX
253 ipv4_devconf_setall(in_dev);
254 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255 rcu_read_unlock();
1da177e4
LT
256
257 if (dev_open(dev))
258 goto failure;
259
7dc00c82
WC
260 dev_hold(dev);
261
1da177e4
LT
262 return dev;
263
264failure:
265 /* allow the register to be completed before unregistering. */
266 rtnl_unlock();
267 rtnl_lock();
268
269 unregister_netdevice(dev);
270 return NULL;
271}
272#endif
273
274/*
275 * Delete a VIF entry
7dc00c82 276 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 277 */
e905a9ed 278
d17fa6fa
ED
279static int vif_delete(struct net *net, int vifi, int notify,
280 struct list_head *head)
1da177e4
LT
281{
282 struct vif_device *v;
283 struct net_device *dev;
284 struct in_device *in_dev;
285
4feb88e5 286 if (vifi < 0 || vifi >= net->ipv4.maxvif)
1da177e4
LT
287 return -EADDRNOTAVAIL;
288
4feb88e5 289 v = &net->ipv4.vif_table[vifi];
1da177e4
LT
290
291 write_lock_bh(&mrt_lock);
292 dev = v->dev;
293 v->dev = NULL;
294
295 if (!dev) {
296 write_unlock_bh(&mrt_lock);
297 return -EADDRNOTAVAIL;
298 }
299
300#ifdef CONFIG_IP_PIMSM
4feb88e5
BT
301 if (vifi == net->ipv4.mroute_reg_vif_num)
302 net->ipv4.mroute_reg_vif_num = -1;
1da177e4
LT
303#endif
304
4feb88e5 305 if (vifi+1 == net->ipv4.maxvif) {
1da177e4
LT
306 int tmp;
307 for (tmp=vifi-1; tmp>=0; tmp--) {
4feb88e5 308 if (VIF_EXISTS(net, tmp))
1da177e4
LT
309 break;
310 }
4feb88e5 311 net->ipv4.maxvif = tmp+1;
1da177e4
LT
312 }
313
314 write_unlock_bh(&mrt_lock);
315
316 dev_set_allmulti(dev, -1);
317
e5ed6399 318 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 319 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
320 ip_rt_multicast_event(in_dev);
321 }
322
7dc00c82 323 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 324 unregister_netdevice_queue(dev, head);
1da177e4
LT
325
326 dev_put(dev);
327 return 0;
328}
329
5c0a66f5
BT
330static inline void ipmr_cache_free(struct mfc_cache *c)
331{
332 release_net(mfc_net(c));
333 kmem_cache_free(mrt_cachep, c);
334}
335
1da177e4
LT
336/* Destroy an unresolved cache entry, killing queued skbs
337 and reporting error to netlink readers.
338 */
339
340static void ipmr_destroy_unres(struct mfc_cache *c)
341{
342 struct sk_buff *skb;
9ef1d4c7 343 struct nlmsgerr *e;
4feb88e5 344 struct net *net = mfc_net(c);
1da177e4 345
4feb88e5 346 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4 347
c354e124 348 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 349 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
350 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
351 nlh->nlmsg_type = NLMSG_ERROR;
352 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
353 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
354 e = NLMSG_DATA(nlh);
355 e->error = -ETIMEDOUT;
356 memset(&e->msg, 0, sizeof(e->msg));
2942e900 357
4feb88e5 358 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
359 } else
360 kfree_skb(skb);
361 }
362
5c0a66f5 363 ipmr_cache_free(c);
1da177e4
LT
364}
365
366
367/* Single timer process for all the unresolved queue. */
368
369static void ipmr_expire_process(unsigned long dummy)
370{
371 unsigned long now;
372 unsigned long expires;
373 struct mfc_cache *c, **cp;
374
375 if (!spin_trylock(&mfc_unres_lock)) {
376 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
377 return;
378 }
379
1e8fb3b6 380 if (mfc_unres_queue == NULL)
1da177e4
LT
381 goto out;
382
383 now = jiffies;
384 expires = 10*HZ;
385 cp = &mfc_unres_queue;
386
387 while ((c=*cp) != NULL) {
388 if (time_after(c->mfc_un.unres.expires, now)) {
389 unsigned long interval = c->mfc_un.unres.expires - now;
390 if (interval < expires)
391 expires = interval;
392 cp = &c->next;
393 continue;
394 }
395
396 *cp = c->next;
397
398 ipmr_destroy_unres(c);
399 }
400
1e8fb3b6 401 if (mfc_unres_queue != NULL)
1da177e4
LT
402 mod_timer(&ipmr_expire_timer, jiffies + expires);
403
404out:
405 spin_unlock(&mfc_unres_lock);
406}
407
408/* Fill oifs list. It is called under write locked mrt_lock. */
409
d1b04c08 410static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
1da177e4
LT
411{
412 int vifi;
4feb88e5 413 struct net *net = mfc_net(cache);
1da177e4
LT
414
415 cache->mfc_un.res.minvif = MAXVIFS;
416 cache->mfc_un.res.maxvif = 0;
417 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
418
4feb88e5
BT
419 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
420 if (VIF_EXISTS(net, vifi) &&
cf958ae3 421 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
422 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
423 if (cache->mfc_un.res.minvif > vifi)
424 cache->mfc_un.res.minvif = vifi;
425 if (cache->mfc_un.res.maxvif <= vifi)
426 cache->mfc_un.res.maxvif = vifi + 1;
427 }
428 }
429}
430
4feb88e5 431static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
1da177e4
LT
432{
433 int vifi = vifc->vifc_vifi;
4feb88e5 434 struct vif_device *v = &net->ipv4.vif_table[vifi];
1da177e4
LT
435 struct net_device *dev;
436 struct in_device *in_dev;
d607032d 437 int err;
1da177e4
LT
438
439 /* Is vif busy ? */
4feb88e5 440 if (VIF_EXISTS(net, vifi))
1da177e4
LT
441 return -EADDRINUSE;
442
443 switch (vifc->vifc_flags) {
444#ifdef CONFIG_IP_PIMSM
445 case VIFF_REGISTER:
446 /*
447 * Special Purpose VIF in PIM
448 * All the packets will be sent to the daemon
449 */
4feb88e5 450 if (net->ipv4.mroute_reg_vif_num >= 0)
1da177e4 451 return -EADDRINUSE;
403dbb97 452 dev = ipmr_reg_vif(net);
1da177e4
LT
453 if (!dev)
454 return -ENOBUFS;
d607032d
WC
455 err = dev_set_allmulti(dev, 1);
456 if (err) {
457 unregister_netdevice(dev);
7dc00c82 458 dev_put(dev);
d607032d
WC
459 return err;
460 }
1da177e4
LT
461 break;
462#endif
e905a9ed 463 case VIFF_TUNNEL:
4feb88e5 464 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
465 if (!dev)
466 return -ENOBUFS;
d607032d
WC
467 err = dev_set_allmulti(dev, 1);
468 if (err) {
469 ipmr_del_tunnel(dev, vifc);
7dc00c82 470 dev_put(dev);
d607032d
WC
471 return err;
472 }
1da177e4 473 break;
ee5e81f0
I
474
475 case VIFF_USE_IFINDEX:
1da177e4 476 case 0:
ee5e81f0
I
477 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
478 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
479 if (dev && dev->ip_ptr == NULL) {
480 dev_put(dev);
481 return -EADDRNOTAVAIL;
482 }
483 } else
484 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
485
1da177e4
LT
486 if (!dev)
487 return -EADDRNOTAVAIL;
d607032d 488 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
489 if (err) {
490 dev_put(dev);
d607032d 491 return err;
7dc00c82 492 }
1da177e4
LT
493 break;
494 default:
495 return -EINVAL;
496 }
497
d0490cfd
DC
498 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
499 dev_put(dev);
1da177e4 500 return -EADDRNOTAVAIL;
d0490cfd 501 }
42f811b8 502 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
503 ip_rt_multicast_event(in_dev);
504
505 /*
506 * Fill in the VIF structures
507 */
c354e124
JK
508 v->rate_limit = vifc->vifc_rate_limit;
509 v->local = vifc->vifc_lcl_addr.s_addr;
510 v->remote = vifc->vifc_rmt_addr.s_addr;
511 v->flags = vifc->vifc_flags;
1da177e4
LT
512 if (!mrtsock)
513 v->flags |= VIFF_STATIC;
c354e124 514 v->threshold = vifc->vifc_threshold;
1da177e4
LT
515 v->bytes_in = 0;
516 v->bytes_out = 0;
517 v->pkt_in = 0;
518 v->pkt_out = 0;
519 v->link = dev->ifindex;
520 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
521 v->link = dev->iflink;
522
523 /* And finish update writing critical data */
524 write_lock_bh(&mrt_lock);
c354e124 525 v->dev = dev;
1da177e4
LT
526#ifdef CONFIG_IP_PIMSM
527 if (v->flags&VIFF_REGISTER)
4feb88e5 528 net->ipv4.mroute_reg_vif_num = vifi;
1da177e4 529#endif
4feb88e5
BT
530 if (vifi+1 > net->ipv4.maxvif)
531 net->ipv4.maxvif = vifi+1;
1da177e4
LT
532 write_unlock_bh(&mrt_lock);
533 return 0;
534}
535
4feb88e5
BT
536static struct mfc_cache *ipmr_cache_find(struct net *net,
537 __be32 origin,
538 __be32 mcastgrp)
1da177e4 539{
c354e124 540 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
541 struct mfc_cache *c;
542
4feb88e5 543 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
1da177e4
LT
544 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
545 break;
546 }
547 return c;
548}
549
550/*
551 * Allocate a multicast cache entry
552 */
5c0a66f5 553static struct mfc_cache *ipmr_cache_alloc(struct net *net)
1da177e4 554{
c354e124
JK
555 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
556 if (c == NULL)
1da177e4 557 return NULL;
1da177e4 558 c->mfc_un.res.minvif = MAXVIFS;
5c0a66f5 559 mfc_net_set(c, net);
1da177e4
LT
560 return c;
561}
562
5c0a66f5 563static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
1da177e4 564{
c354e124
JK
565 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
566 if (c == NULL)
1da177e4 567 return NULL;
1da177e4
LT
568 skb_queue_head_init(&c->mfc_un.unres.unresolved);
569 c->mfc_un.unres.expires = jiffies + 10*HZ;
5c0a66f5 570 mfc_net_set(c, net);
1da177e4
LT
571 return c;
572}
573
574/*
575 * A cache entry has gone into a resolved state from queued
576 */
e905a9ed 577
1da177e4
LT
578static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
579{
580 struct sk_buff *skb;
9ef1d4c7 581 struct nlmsgerr *e;
1da177e4
LT
582
583 /*
584 * Play the pending entries through our router
585 */
586
c354e124 587 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 588 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
589 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
590
591 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
592 nlh->nlmsg_len = (skb_tail_pointer(skb) -
593 (u8 *)nlh);
1da177e4
LT
594 } else {
595 nlh->nlmsg_type = NLMSG_ERROR;
596 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
597 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
598 e = NLMSG_DATA(nlh);
599 e->error = -EMSGSIZE;
600 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 601 }
2942e900 602
4feb88e5 603 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
1da177e4
LT
604 } else
605 ip_mr_forward(skb, c, 0);
606 }
607}
608
609/*
610 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
611 * expects the following bizarre scheme.
612 *
613 * Called under mrt_lock.
614 */
e905a9ed 615
4feb88e5
BT
616static int ipmr_cache_report(struct net *net,
617 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
618{
619 struct sk_buff *skb;
c9bdd4b5 620 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
621 struct igmphdr *igmp;
622 struct igmpmsg *msg;
623 int ret;
624
625#ifdef CONFIG_IP_PIMSM
626 if (assert == IGMPMSG_WHOLEPKT)
627 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
628 else
629#endif
630 skb = alloc_skb(128, GFP_ATOMIC);
631
132adf54 632 if (!skb)
1da177e4
LT
633 return -ENOBUFS;
634
635#ifdef CONFIG_IP_PIMSM
636 if (assert == IGMPMSG_WHOLEPKT) {
637 /* Ugly, but we have no choice with this interface.
638 Duplicate old header, fix ihl, length etc.
639 And all this only to mangle msg->im_msgtype and
640 to set msg->im_mbz to "mbz" :-)
641 */
878c8145
ACM
642 skb_push(skb, sizeof(struct iphdr));
643 skb_reset_network_header(skb);
badff6d0 644 skb_reset_transport_header(skb);
0272ffc4 645 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 646 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
647 msg->im_msgtype = IGMPMSG_WHOLEPKT;
648 msg->im_mbz = 0;
4feb88e5 649 msg->im_vif = net->ipv4.mroute_reg_vif_num;
eddc9ec5
ACM
650 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
651 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
652 sizeof(struct iphdr));
e905a9ed 653 } else
1da177e4 654#endif
e905a9ed
YH
655 {
656
1da177e4
LT
657 /*
658 * Copy the IP header
659 */
660
27a884dc 661 skb->network_header = skb->tail;
ddc7b8e3 662 skb_put(skb, ihl);
27d7ff46 663 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
664 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
665 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 666 msg->im_vif = vifi;
adf30907 667 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
668
669 /*
670 * Add our header
671 */
672
c354e124 673 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
674 igmp->type =
675 msg->im_msgtype = assert;
676 igmp->code = 0;
eddc9ec5 677 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 678 skb->transport_header = skb->network_header;
e905a9ed 679 }
1da177e4 680
4feb88e5 681 if (net->ipv4.mroute_sk == NULL) {
1da177e4
LT
682 kfree_skb(skb);
683 return -EINVAL;
684 }
685
686 /*
687 * Deliver to mrouted
688 */
4feb88e5 689 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
70a269e6 690 if (ret < 0) {
1da177e4
LT
691 if (net_ratelimit())
692 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
693 kfree_skb(skb);
694 }
695
696 return ret;
697}
698
699/*
700 * Queue a packet for resolution. It gets locked cache entry!
701 */
e905a9ed 702
1da177e4 703static int
4feb88e5 704ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
1da177e4
LT
705{
706 int err;
707 struct mfc_cache *c;
eddc9ec5 708 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
709
710 spin_lock_bh(&mfc_unres_lock);
711 for (c=mfc_unres_queue; c; c=c->next) {
4feb88e5 712 if (net_eq(mfc_net(c), net) &&
1e8fb3b6 713 c->mfc_mcastgrp == iph->daddr &&
eddc9ec5 714 c->mfc_origin == iph->saddr)
1da177e4
LT
715 break;
716 }
717
718 if (c == NULL) {
719 /*
720 * Create a new entry if allowable
721 */
722
4feb88e5
BT
723 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
724 (c = ipmr_cache_alloc_unres(net)) == NULL) {
1da177e4
LT
725 spin_unlock_bh(&mfc_unres_lock);
726
727 kfree_skb(skb);
728 return -ENOBUFS;
729 }
730
731 /*
732 * Fill in the new cache entry
733 */
eddc9ec5
ACM
734 c->mfc_parent = -1;
735 c->mfc_origin = iph->saddr;
736 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
737
738 /*
739 * Reflect first query at mrouted.
740 */
4feb88e5
BT
741 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
742 if (err < 0) {
e905a9ed 743 /* If the report failed throw the cache entry
1da177e4
LT
744 out - Brad Parker
745 */
746 spin_unlock_bh(&mfc_unres_lock);
747
5c0a66f5 748 ipmr_cache_free(c);
1da177e4
LT
749 kfree_skb(skb);
750 return err;
751 }
752
4feb88e5 753 atomic_inc(&net->ipv4.cache_resolve_queue_len);
1da177e4
LT
754 c->next = mfc_unres_queue;
755 mfc_unres_queue = c;
756
757 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
758 }
759
760 /*
761 * See if we can append the packet
762 */
763 if (c->mfc_un.unres.unresolved.qlen>3) {
764 kfree_skb(skb);
765 err = -ENOBUFS;
766 } else {
c354e124 767 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
768 err = 0;
769 }
770
771 spin_unlock_bh(&mfc_unres_lock);
772 return err;
773}
774
775/*
776 * MFC cache manipulation by user space mroute daemon
777 */
778
4feb88e5 779static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
1da177e4
LT
780{
781 int line;
782 struct mfc_cache *c, **cp;
783
c354e124 784 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 785
4feb88e5 786 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 787 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
788 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
789 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
790 write_lock_bh(&mrt_lock);
791 *cp = c->next;
792 write_unlock_bh(&mrt_lock);
793
5c0a66f5 794 ipmr_cache_free(c);
1da177e4
LT
795 return 0;
796 }
797 }
798 return -ENOENT;
799}
800
4feb88e5 801static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
1da177e4
LT
802{
803 int line;
804 struct mfc_cache *uc, *c, **cp;
805
a50436f2
PM
806 if (mfc->mfcc_parent >= MAXVIFS)
807 return -ENFILE;
808
c354e124 809 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 810
4feb88e5 811 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 812 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
813 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
814 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
815 break;
816 }
817
818 if (c != NULL) {
819 write_lock_bh(&mrt_lock);
820 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 821 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
822 if (!mrtsock)
823 c->mfc_flags |= MFC_STATIC;
824 write_unlock_bh(&mrt_lock);
825 return 0;
826 }
827
f97c1e0c 828 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
829 return -EINVAL;
830
4feb88e5 831 c = ipmr_cache_alloc(net);
c354e124 832 if (c == NULL)
1da177e4
LT
833 return -ENOMEM;
834
c354e124
JK
835 c->mfc_origin = mfc->mfcc_origin.s_addr;
836 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
837 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 838 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
839 if (!mrtsock)
840 c->mfc_flags |= MFC_STATIC;
841
842 write_lock_bh(&mrt_lock);
4feb88e5
BT
843 c->next = net->ipv4.mfc_cache_array[line];
844 net->ipv4.mfc_cache_array[line] = c;
1da177e4
LT
845 write_unlock_bh(&mrt_lock);
846
847 /*
848 * Check to see if we resolved a queued list. If so we
849 * need to send on the frames and tidy up.
850 */
851 spin_lock_bh(&mfc_unres_lock);
852 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
853 cp = &uc->next) {
4feb88e5 854 if (net_eq(mfc_net(uc), net) &&
1e8fb3b6 855 uc->mfc_origin == c->mfc_origin &&
1da177e4
LT
856 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
857 *cp = uc->next;
4feb88e5 858 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4
LT
859 break;
860 }
861 }
1e8fb3b6
BT
862 if (mfc_unres_queue == NULL)
863 del_timer(&ipmr_expire_timer);
1da177e4
LT
864 spin_unlock_bh(&mfc_unres_lock);
865
866 if (uc) {
867 ipmr_cache_resolve(uc, c);
5c0a66f5 868 ipmr_cache_free(uc);
1da177e4
LT
869 }
870 return 0;
871}
872
873/*
874 * Close the multicast socket, and clear the vif tables etc
875 */
e905a9ed 876
4feb88e5 877static void mroute_clean_tables(struct net *net)
1da177e4
LT
878{
879 int i;
d17fa6fa 880 LIST_HEAD(list);
e905a9ed 881
1da177e4
LT
882 /*
883 * Shut down all active vif entries
884 */
4feb88e5
BT
885 for (i = 0; i < net->ipv4.maxvif; i++) {
886 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
d17fa6fa 887 vif_delete(net, i, 0, &list);
1da177e4 888 }
d17fa6fa 889 unregister_netdevice_many(&list);
1da177e4
LT
890
891 /*
892 * Wipe the cache
893 */
c354e124 894 for (i=0; i<MFC_LINES; i++) {
1da177e4
LT
895 struct mfc_cache *c, **cp;
896
4feb88e5 897 cp = &net->ipv4.mfc_cache_array[i];
1da177e4
LT
898 while ((c = *cp) != NULL) {
899 if (c->mfc_flags&MFC_STATIC) {
900 cp = &c->next;
901 continue;
902 }
903 write_lock_bh(&mrt_lock);
904 *cp = c->next;
905 write_unlock_bh(&mrt_lock);
906
5c0a66f5 907 ipmr_cache_free(c);
1da177e4
LT
908 }
909 }
910
4feb88e5 911 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
1e8fb3b6 912 struct mfc_cache *c, **cp;
1da177e4
LT
913
914 spin_lock_bh(&mfc_unres_lock);
1e8fb3b6
BT
915 cp = &mfc_unres_queue;
916 while ((c = *cp) != NULL) {
4feb88e5 917 if (!net_eq(mfc_net(c), net)) {
1e8fb3b6
BT
918 cp = &c->next;
919 continue;
920 }
921 *cp = c->next;
1da177e4
LT
922
923 ipmr_destroy_unres(c);
1da177e4
LT
924 }
925 spin_unlock_bh(&mfc_unres_lock);
926 }
927}
928
929static void mrtsock_destruct(struct sock *sk)
930{
4feb88e5
BT
931 struct net *net = sock_net(sk);
932
1da177e4 933 rtnl_lock();
4feb88e5
BT
934 if (sk == net->ipv4.mroute_sk) {
935 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4
LT
936
937 write_lock_bh(&mrt_lock);
4feb88e5 938 net->ipv4.mroute_sk = NULL;
1da177e4
LT
939 write_unlock_bh(&mrt_lock);
940
4feb88e5 941 mroute_clean_tables(net);
1da177e4
LT
942 }
943 rtnl_unlock();
944}
945
946/*
947 * Socket options and virtual interface manipulation. The whole
948 * virtual interface system is a complete heap, but unfortunately
949 * that's how BSD mrouted happens to think. Maybe one day with a proper
950 * MOSPF/PIM router set up we can clean this up.
951 */
e905a9ed 952
b7058842 953int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
954{
955 int ret;
956 struct vifctl vif;
957 struct mfcctl mfc;
4feb88e5 958 struct net *net = sock_net(sk);
e905a9ed 959
132adf54 960 if (optname != MRT_INIT) {
4feb88e5 961 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
962 return -EACCES;
963 }
964
132adf54
SH
965 switch (optname) {
966 case MRT_INIT:
967 if (sk->sk_type != SOCK_RAW ||
c720c7e8 968 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 969 return -EOPNOTSUPP;
c354e124 970 if (optlen != sizeof(int))
132adf54 971 return -ENOPROTOOPT;
1da177e4 972
132adf54 973 rtnl_lock();
4feb88e5 974 if (net->ipv4.mroute_sk) {
1da177e4 975 rtnl_unlock();
132adf54
SH
976 return -EADDRINUSE;
977 }
978
979 ret = ip_ra_control(sk, 1, mrtsock_destruct);
980 if (ret == 0) {
981 write_lock_bh(&mrt_lock);
4feb88e5 982 net->ipv4.mroute_sk = sk;
132adf54
SH
983 write_unlock_bh(&mrt_lock);
984
4feb88e5 985 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
986 }
987 rtnl_unlock();
988 return ret;
989 case MRT_DONE:
4feb88e5 990 if (sk != net->ipv4.mroute_sk)
132adf54
SH
991 return -EACCES;
992 return ip_ra_control(sk, 0, NULL);
993 case MRT_ADD_VIF:
994 case MRT_DEL_VIF:
c354e124 995 if (optlen != sizeof(vif))
132adf54 996 return -EINVAL;
c354e124 997 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
998 return -EFAULT;
999 if (vif.vifc_vifi >= MAXVIFS)
1000 return -ENFILE;
1001 rtnl_lock();
c354e124 1002 if (optname == MRT_ADD_VIF) {
4feb88e5 1003 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
132adf54 1004 } else {
d17fa6fa 1005 ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
132adf54
SH
1006 }
1007 rtnl_unlock();
1008 return ret;
1da177e4
LT
1009
1010 /*
1011 * Manipulate the forwarding caches. These live
1012 * in a sort of kernel/user symbiosis.
1013 */
132adf54
SH
1014 case MRT_ADD_MFC:
1015 case MRT_DEL_MFC:
c354e124 1016 if (optlen != sizeof(mfc))
132adf54 1017 return -EINVAL;
c354e124 1018 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1019 return -EFAULT;
1020 rtnl_lock();
c354e124 1021 if (optname == MRT_DEL_MFC)
4feb88e5 1022 ret = ipmr_mfc_delete(net, &mfc);
132adf54 1023 else
4feb88e5 1024 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
132adf54
SH
1025 rtnl_unlock();
1026 return ret;
1da177e4
LT
1027 /*
1028 * Control PIM assert.
1029 */
132adf54
SH
1030 case MRT_ASSERT:
1031 {
1032 int v;
1033 if (get_user(v,(int __user *)optval))
1034 return -EFAULT;
4feb88e5 1035 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1036 return 0;
1037 }
1da177e4 1038#ifdef CONFIG_IP_PIMSM
132adf54
SH
1039 case MRT_PIM:
1040 {
ba93ef74
SH
1041 int v;
1042
132adf54
SH
1043 if (get_user(v,(int __user *)optval))
1044 return -EFAULT;
ba93ef74
SH
1045 v = (v) ? 1 : 0;
1046
132adf54
SH
1047 rtnl_lock();
1048 ret = 0;
4feb88e5
BT
1049 if (v != net->ipv4.mroute_do_pim) {
1050 net->ipv4.mroute_do_pim = v;
1051 net->ipv4.mroute_do_assert = v;
1da177e4 1052 }
132adf54
SH
1053 rtnl_unlock();
1054 return ret;
1055 }
1da177e4 1056#endif
132adf54
SH
1057 /*
1058 * Spurious command, or MRT_VERSION which you cannot
1059 * set.
1060 */
1061 default:
1062 return -ENOPROTOOPT;
1da177e4
LT
1063 }
1064}
1065
1066/*
1067 * Getsock opt support for the multicast routing system.
1068 */
e905a9ed 1069
c354e124 1070int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1071{
1072 int olr;
1073 int val;
4feb88e5 1074 struct net *net = sock_net(sk);
1da177e4 1075
c354e124 1076 if (optname != MRT_VERSION &&
1da177e4
LT
1077#ifdef CONFIG_IP_PIMSM
1078 optname!=MRT_PIM &&
1079#endif
1080 optname!=MRT_ASSERT)
1081 return -ENOPROTOOPT;
1082
1083 if (get_user(olr, optlen))
1084 return -EFAULT;
1085
1086 olr = min_t(unsigned int, olr, sizeof(int));
1087 if (olr < 0)
1088 return -EINVAL;
e905a9ed 1089
c354e124 1090 if (put_user(olr, optlen))
1da177e4 1091 return -EFAULT;
c354e124
JK
1092 if (optname == MRT_VERSION)
1093 val = 0x0305;
1da177e4 1094#ifdef CONFIG_IP_PIMSM
c354e124 1095 else if (optname == MRT_PIM)
4feb88e5 1096 val = net->ipv4.mroute_do_pim;
1da177e4
LT
1097#endif
1098 else
4feb88e5 1099 val = net->ipv4.mroute_do_assert;
c354e124 1100 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1101 return -EFAULT;
1102 return 0;
1103}
1104
1105/*
1106 * The IP multicast ioctl support routines.
1107 */
e905a9ed 1108
1da177e4
LT
1109int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1110{
1111 struct sioc_sg_req sr;
1112 struct sioc_vif_req vr;
1113 struct vif_device *vif;
1114 struct mfc_cache *c;
4feb88e5 1115 struct net *net = sock_net(sk);
e905a9ed 1116
132adf54
SH
1117 switch (cmd) {
1118 case SIOCGETVIFCNT:
c354e124 1119 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1120 return -EFAULT;
4feb88e5 1121 if (vr.vifi >= net->ipv4.maxvif)
132adf54
SH
1122 return -EINVAL;
1123 read_lock(&mrt_lock);
4feb88e5
BT
1124 vif = &net->ipv4.vif_table[vr.vifi];
1125 if (VIF_EXISTS(net, vr.vifi)) {
c354e124
JK
1126 vr.icount = vif->pkt_in;
1127 vr.ocount = vif->pkt_out;
1128 vr.ibytes = vif->bytes_in;
1129 vr.obytes = vif->bytes_out;
1da177e4 1130 read_unlock(&mrt_lock);
1da177e4 1131
c354e124 1132 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1133 return -EFAULT;
1134 return 0;
1135 }
1136 read_unlock(&mrt_lock);
1137 return -EADDRNOTAVAIL;
1138 case SIOCGETSGCNT:
c354e124 1139 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1140 return -EFAULT;
1141
1142 read_lock(&mrt_lock);
4feb88e5 1143 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1144 if (c) {
1145 sr.pktcnt = c->mfc_un.res.pkt;
1146 sr.bytecnt = c->mfc_un.res.bytes;
1147 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1148 read_unlock(&mrt_lock);
132adf54 1149
c354e124 1150 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1151 return -EFAULT;
1152 return 0;
1153 }
1154 read_unlock(&mrt_lock);
1155 return -EADDRNOTAVAIL;
1156 default:
1157 return -ENOIOCTLCMD;
1da177e4
LT
1158 }
1159}
1160
1161
1162static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1163{
e9dc8653 1164 struct net_device *dev = ptr;
4feb88e5 1165 struct net *net = dev_net(dev);
1da177e4
LT
1166 struct vif_device *v;
1167 int ct;
d17fa6fa 1168 LIST_HEAD(list);
e9dc8653 1169
1da177e4
LT
1170 if (event != NETDEV_UNREGISTER)
1171 return NOTIFY_DONE;
4feb88e5
BT
1172 v = &net->ipv4.vif_table[0];
1173 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
c354e124 1174 if (v->dev == dev)
d17fa6fa 1175 vif_delete(net, ct, 1, &list);
1da177e4 1176 }
d17fa6fa 1177 unregister_netdevice_many(&list);
1da177e4
LT
1178 return NOTIFY_DONE;
1179}
1180
1181
c354e124 1182static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1183 .notifier_call = ipmr_device_event,
1184};
1185
1186/*
1187 * Encapsulate a packet by attaching a valid IPIP header to it.
1188 * This avoids tunnel drivers and other mess and gives us the speed so
1189 * important for multicast video.
1190 */
e905a9ed 1191
114c7844 1192static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1193{
8856dfa3 1194 struct iphdr *iph;
eddc9ec5 1195 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1196
1197 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1198 skb->transport_header = skb->network_header;
8856dfa3 1199 skb_reset_network_header(skb);
eddc9ec5 1200 iph = ip_hdr(skb);
1da177e4
LT
1201
1202 iph->version = 4;
e023dd64
ACM
1203 iph->tos = old_iph->tos;
1204 iph->ttl = old_iph->ttl;
1da177e4
LT
1205 iph->frag_off = 0;
1206 iph->daddr = daddr;
1207 iph->saddr = saddr;
1208 iph->protocol = IPPROTO_IPIP;
1209 iph->ihl = 5;
1210 iph->tot_len = htons(skb->len);
adf30907 1211 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1212 ip_send_check(iph);
1213
1da177e4
LT
1214 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1215 nf_reset(skb);
1216}
1217
1218static inline int ipmr_forward_finish(struct sk_buff *skb)
1219{
1220 struct ip_options * opt = &(IPCB(skb)->opt);
1221
adf30907 1222 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1223
1224 if (unlikely(opt->optlen))
1225 ip_forward_options(skb);
1226
1227 return dst_output(skb);
1228}
1229
1230/*
1231 * Processing handlers for ipmr_forward
1232 */
1233
1234static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1235{
4feb88e5 1236 struct net *net = mfc_net(c);
eddc9ec5 1237 const struct iphdr *iph = ip_hdr(skb);
4feb88e5 1238 struct vif_device *vif = &net->ipv4.vif_table[vifi];
1da177e4
LT
1239 struct net_device *dev;
1240 struct rtable *rt;
1241 int encap = 0;
1242
1243 if (vif->dev == NULL)
1244 goto out_free;
1245
1246#ifdef CONFIG_IP_PIMSM
1247 if (vif->flags & VIFF_REGISTER) {
1248 vif->pkt_out++;
c354e124 1249 vif->bytes_out += skb->len;
cf3677ae
PE
1250 vif->dev->stats.tx_bytes += skb->len;
1251 vif->dev->stats.tx_packets++;
4feb88e5 1252 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1253 goto out_free;
1da177e4
LT
1254 }
1255#endif
1256
1257 if (vif->flags&VIFF_TUNNEL) {
1258 struct flowi fl = { .oif = vif->link,
1259 .nl_u = { .ip4_u =
1260 { .daddr = vif->remote,
1261 .saddr = vif->local,
1262 .tos = RT_TOS(iph->tos) } },
1263 .proto = IPPROTO_IPIP };
4feb88e5 1264 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1265 goto out_free;
1266 encap = sizeof(struct iphdr);
1267 } else {
1268 struct flowi fl = { .oif = vif->link,
1269 .nl_u = { .ip4_u =
1270 { .daddr = iph->daddr,
1271 .tos = RT_TOS(iph->tos) } },
1272 .proto = IPPROTO_IPIP };
4feb88e5 1273 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1274 goto out_free;
1275 }
1276
1277 dev = rt->u.dst.dev;
1278
1279 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1280 /* Do not fragment multicasts. Alas, IPv4 does not
1281 allow to send ICMP, so that packets will disappear
1282 to blackhole.
1283 */
1284
7c73a6fa 1285 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1286 ip_rt_put(rt);
1287 goto out_free;
1288 }
1289
1290 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1291
1292 if (skb_cow(skb, encap)) {
e905a9ed 1293 ip_rt_put(rt);
1da177e4
LT
1294 goto out_free;
1295 }
1296
1297 vif->pkt_out++;
c354e124 1298 vif->bytes_out += skb->len;
1da177e4 1299
adf30907
ED
1300 skb_dst_drop(skb);
1301 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1302 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1303
1304 /* FIXME: forward and output firewalls used to be called here.
1305 * What do we do with netfilter? -- RR */
1306 if (vif->flags & VIFF_TUNNEL) {
1307 ip_encap(skb, vif->local, vif->remote);
1308 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1309 vif->dev->stats.tx_packets++;
1310 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1311 }
1312
1313 IPCB(skb)->flags |= IPSKB_FORWARDED;
1314
1315 /*
1316 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1317 * not only before forwarding, but after forwarding on all output
1318 * interfaces. It is clear, if mrouter runs a multicasting
1319 * program, it should receive packets not depending to what interface
1320 * program is joined.
1321 * If we will not make it, the program will have to join on all
1322 * interfaces. On the other hand, multihoming host (or router, but
1323 * not mrouter) cannot join to more than one interface - it will
1324 * result in receiving multiple packets.
1325 */
6e23ae2a 1326 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1327 ipmr_forward_finish);
1328 return;
1329
1330out_free:
1331 kfree_skb(skb);
1332 return;
1333}
1334
1335static int ipmr_find_vif(struct net_device *dev)
1336{
4feb88e5 1337 struct net *net = dev_net(dev);
1da177e4 1338 int ct;
4feb88e5
BT
1339 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1340 if (net->ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1341 break;
1342 }
1343 return ct;
1344}
1345
1346/* "local" means that we should preserve one skb (for local delivery) */
1347
1348static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1349{
1350 int psend = -1;
1351 int vif, ct;
4feb88e5 1352 struct net *net = mfc_net(cache);
1da177e4
LT
1353
1354 vif = cache->mfc_parent;
1355 cache->mfc_un.res.pkt++;
1356 cache->mfc_un.res.bytes += skb->len;
1357
1358 /*
1359 * Wrong interface: drop packet and (maybe) send PIM assert.
1360 */
4feb88e5 1361 if (net->ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1362 int true_vifi;
1363
511c3f92 1364 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1365 /* It is our own packet, looped back.
1366 Very complicated situation...
1367
1368 The best workaround until routing daemons will be
1369 fixed is not to redistribute packet, if it was
1370 send through wrong interface. It means, that
1371 multicast applications WILL NOT work for
1372 (S,G), which have default multicast route pointing
1373 to wrong oif. In any case, it is not a good
1374 idea to use multicasting applications on router.
1375 */
1376 goto dont_forward;
1377 }
1378
1379 cache->mfc_un.res.wrong_if++;
1380 true_vifi = ipmr_find_vif(skb->dev);
1381
4feb88e5 1382 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1da177e4
LT
1383 /* pimsm uses asserts, when switching from RPT to SPT,
1384 so that we cannot check that packet arrived on an oif.
1385 It is bad, but otherwise we would need to move pretty
1386 large chunk of pimd to kernel. Ough... --ANK
1387 */
4feb88e5 1388 (net->ipv4.mroute_do_pim ||
6f9374a9 1389 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1390 time_after(jiffies,
1da177e4
LT
1391 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1392 cache->mfc_un.res.last_assert = jiffies;
4feb88e5 1393 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1394 }
1395 goto dont_forward;
1396 }
1397
4feb88e5
BT
1398 net->ipv4.vif_table[vif].pkt_in++;
1399 net->ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1400
1401 /*
1402 * Forward the frame
1403 */
1404 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1405 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1406 if (psend != -1) {
1407 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1408 if (skb2)
1409 ipmr_queue_xmit(skb2, cache, psend);
1410 }
c354e124 1411 psend = ct;
1da177e4
LT
1412 }
1413 }
1414 if (psend != -1) {
1415 if (local) {
1416 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1417 if (skb2)
1418 ipmr_queue_xmit(skb2, cache, psend);
1419 } else {
1420 ipmr_queue_xmit(skb, cache, psend);
1421 return 0;
1422 }
1423 }
1424
1425dont_forward:
1426 if (!local)
1427 kfree_skb(skb);
1428 return 0;
1429}
1430
1431
1432/*
1433 * Multicast packets for forwarding arrive here
1434 */
1435
1436int ip_mr_input(struct sk_buff *skb)
1437{
1438 struct mfc_cache *cache;
4feb88e5 1439 struct net *net = dev_net(skb->dev);
511c3f92 1440 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1da177e4
LT
1441
1442 /* Packet is looped back after forward, it should not be
1443 forwarded second time, but still can be delivered locally.
1444 */
1445 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1446 goto dont_forward;
1447
1448 if (!local) {
1449 if (IPCB(skb)->opt.router_alert) {
1450 if (ip_call_ra_chain(skb))
1451 return 0;
eddc9ec5 1452 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1453 /* IGMPv1 (and broken IGMPv2 implementations sort of
1454 Cisco IOS <= 11.2(8)) do not put router alert
1455 option to IGMP packets destined to routable
1456 groups. It is very bad, because it means
1457 that we can forward NO IGMP messages.
1458 */
1459 read_lock(&mrt_lock);
4feb88e5 1460 if (net->ipv4.mroute_sk) {
2715bcf9 1461 nf_reset(skb);
4feb88e5 1462 raw_rcv(net->ipv4.mroute_sk, skb);
1da177e4
LT
1463 read_unlock(&mrt_lock);
1464 return 0;
1465 }
1466 read_unlock(&mrt_lock);
1467 }
1468 }
1469
1470 read_lock(&mrt_lock);
4feb88e5 1471 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1472
1473 /*
1474 * No usable cache entry
1475 */
c354e124 1476 if (cache == NULL) {
1da177e4
LT
1477 int vif;
1478
1479 if (local) {
1480 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1481 ip_local_deliver(skb);
1482 if (skb2 == NULL) {
1483 read_unlock(&mrt_lock);
1484 return -ENOBUFS;
1485 }
1486 skb = skb2;
1487 }
1488
1489 vif = ipmr_find_vif(skb->dev);
1490 if (vif >= 0) {
4feb88e5 1491 int err = ipmr_cache_unresolved(net, vif, skb);
1da177e4
LT
1492 read_unlock(&mrt_lock);
1493
1494 return err;
1495 }
1496 read_unlock(&mrt_lock);
1497 kfree_skb(skb);
1498 return -ENODEV;
1499 }
1500
1501 ip_mr_forward(skb, cache, local);
1502
1503 read_unlock(&mrt_lock);
1504
1505 if (local)
1506 return ip_local_deliver(skb);
1507
1508 return 0;
1509
1510dont_forward:
1511 if (local)
1512 return ip_local_deliver(skb);
1513 kfree_skb(skb);
1514 return 0;
1515}
1516
b1879204
IJ
1517#ifdef CONFIG_IP_PIMSM
1518static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1519{
b1879204
IJ
1520 struct net_device *reg_dev = NULL;
1521 struct iphdr *encap;
4feb88e5 1522 struct net *net = dev_net(skb->dev);
1da177e4 1523
b1879204 1524 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1525 /*
1526 Check that:
1527 a. packet is really destinted to a multicast group
1528 b. packet is not a NULL-REGISTER
1529 c. packet is not truncated
1530 */
f97c1e0c 1531 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1532 encap->tot_len == 0 ||
b1879204
IJ
1533 ntohs(encap->tot_len) + pimlen > skb->len)
1534 return 1;
1da177e4
LT
1535
1536 read_lock(&mrt_lock);
4feb88e5
BT
1537 if (net->ipv4.mroute_reg_vif_num >= 0)
1538 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1da177e4
LT
1539 if (reg_dev)
1540 dev_hold(reg_dev);
1541 read_unlock(&mrt_lock);
1542
e905a9ed 1543 if (reg_dev == NULL)
b1879204 1544 return 1;
1da177e4 1545
b0e380b1 1546 skb->mac_header = skb->network_header;
1da177e4 1547 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1548 skb_reset_network_header(skb);
1da177e4 1549 skb->dev = reg_dev;
1da177e4
LT
1550 skb->protocol = htons(ETH_P_IP);
1551 skb->ip_summed = 0;
1552 skb->pkt_type = PACKET_HOST;
adf30907 1553 skb_dst_drop(skb);
cf3677ae
PE
1554 reg_dev->stats.rx_bytes += skb->len;
1555 reg_dev->stats.rx_packets++;
1da177e4
LT
1556 nf_reset(skb);
1557 netif_rx(skb);
1558 dev_put(reg_dev);
b1879204 1559
1da177e4 1560 return 0;
b1879204
IJ
1561}
1562#endif
1563
1564#ifdef CONFIG_IP_PIMSM_V1
1565/*
1566 * Handle IGMP messages of PIMv1
1567 */
1568
1569int pim_rcv_v1(struct sk_buff * skb)
1570{
1571 struct igmphdr *pim;
4feb88e5 1572 struct net *net = dev_net(skb->dev);
b1879204
IJ
1573
1574 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1575 goto drop;
1576
1577 pim = igmp_hdr(skb);
1578
4feb88e5 1579 if (!net->ipv4.mroute_do_pim ||
b1879204
IJ
1580 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1581 goto drop;
1582
1583 if (__pim_rcv(skb, sizeof(*pim))) {
1584drop:
1585 kfree_skb(skb);
1586 }
1da177e4
LT
1587 return 0;
1588}
1589#endif
1590
1591#ifdef CONFIG_IP_PIMSM_V2
1592static int pim_rcv(struct sk_buff * skb)
1593{
1594 struct pimreghdr *pim;
1da177e4 1595
b1879204 1596 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1597 goto drop;
1598
9c70220b 1599 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1600 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1601 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1602 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1603 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1604 goto drop;
1605
b1879204
IJ
1606 if (__pim_rcv(skb, sizeof(*pim))) {
1607drop:
1608 kfree_skb(skb);
1609 }
1da177e4
LT
1610 return 0;
1611}
1612#endif
1613
1614static int
1615ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1616{
1617 int ct;
1618 struct rtnexthop *nhp;
4feb88e5 1619 struct net *net = mfc_net(c);
27a884dc 1620 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1621 struct rtattr *mp_head;
1622
7438189b
ND
1623 /* If cache is unresolved, don't try to parse IIF and OIF */
1624 if (c->mfc_parent > MAXVIFS)
1625 return -ENOENT;
1626
1627 if (VIF_EXISTS(net, c->mfc_parent))
1628 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1629
c354e124 1630 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1631
1632 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
7438189b 1633 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1634 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1635 goto rtattr_failure;
c354e124 1636 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1637 nhp->rtnh_flags = 0;
1638 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4feb88e5 1639 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1640 nhp->rtnh_len = sizeof(*nhp);
1641 }
1642 }
1643 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1644 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1645 rtm->rtm_type = RTN_MULTICAST;
1646 return 1;
1647
1648rtattr_failure:
dc5fc579 1649 nlmsg_trim(skb, b);
1da177e4
LT
1650 return -EMSGSIZE;
1651}
1652
4feb88e5
BT
1653int ipmr_get_route(struct net *net,
1654 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1655{
1656 int err;
1657 struct mfc_cache *cache;
511c3f92 1658 struct rtable *rt = skb_rtable(skb);
1da177e4
LT
1659
1660 read_lock(&mrt_lock);
4feb88e5 1661 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1da177e4 1662
c354e124 1663 if (cache == NULL) {
72287490 1664 struct sk_buff *skb2;
eddc9ec5 1665 struct iphdr *iph;
1da177e4
LT
1666 struct net_device *dev;
1667 int vif;
1668
1669 if (nowait) {
1670 read_unlock(&mrt_lock);
1671 return -EAGAIN;
1672 }
1673
1674 dev = skb->dev;
1675 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1676 read_unlock(&mrt_lock);
1677 return -ENODEV;
1678 }
72287490
AK
1679 skb2 = skb_clone(skb, GFP_ATOMIC);
1680 if (!skb2) {
1681 read_unlock(&mrt_lock);
1682 return -ENOMEM;
1683 }
1684
e2d1bca7
ACM
1685 skb_push(skb2, sizeof(struct iphdr));
1686 skb_reset_network_header(skb2);
eddc9ec5
ACM
1687 iph = ip_hdr(skb2);
1688 iph->ihl = sizeof(struct iphdr) >> 2;
1689 iph->saddr = rt->rt_src;
1690 iph->daddr = rt->rt_dst;
1691 iph->version = 0;
4feb88e5 1692 err = ipmr_cache_unresolved(net, vif, skb2);
1da177e4
LT
1693 read_unlock(&mrt_lock);
1694 return err;
1695 }
1696
1697 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1698 cache->mfc_flags |= MFC_NOTIFY;
1699 err = ipmr_fill_mroute(skb, cache, rtm);
1700 read_unlock(&mrt_lock);
1701 return err;
1702}
1703
e905a9ed 1704#ifdef CONFIG_PROC_FS
1da177e4
LT
1705/*
1706 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1707 */
1708struct ipmr_vif_iter {
f6bb4514 1709 struct seq_net_private p;
1da177e4
LT
1710 int ct;
1711};
1712
f6bb4514
BT
1713static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1714 struct ipmr_vif_iter *iter,
1da177e4
LT
1715 loff_t pos)
1716{
f6bb4514
BT
1717 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1718 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1719 continue;
e905a9ed 1720 if (pos-- == 0)
f6bb4514 1721 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1722 }
1723 return NULL;
1724}
1725
1726static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1727 __acquires(mrt_lock)
1da177e4 1728{
f6bb4514
BT
1729 struct net *net = seq_file_net(seq);
1730
1da177e4 1731 read_lock(&mrt_lock);
f6bb4514 1732 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1733 : SEQ_START_TOKEN;
1734}
1735
1736static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1737{
1738 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 1739 struct net *net = seq_file_net(seq);
1da177e4
LT
1740
1741 ++*pos;
1742 if (v == SEQ_START_TOKEN)
f6bb4514 1743 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 1744
f6bb4514
BT
1745 while (++iter->ct < net->ipv4.maxvif) {
1746 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1747 continue;
f6bb4514 1748 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1749 }
1750 return NULL;
1751}
1752
1753static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1754 __releases(mrt_lock)
1da177e4
LT
1755{
1756 read_unlock(&mrt_lock);
1757}
1758
1759static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1760{
f6bb4514
BT
1761 struct net *net = seq_file_net(seq);
1762
1da177e4 1763 if (v == SEQ_START_TOKEN) {
e905a9ed 1764 seq_puts(seq,
1da177e4
LT
1765 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1766 } else {
1767 const struct vif_device *vif = v;
1768 const char *name = vif->dev ? vif->dev->name : "none";
1769
1770 seq_printf(seq,
1771 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
f6bb4514 1772 vif - net->ipv4.vif_table,
e905a9ed 1773 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1774 vif->bytes_out, vif->pkt_out,
1775 vif->flags, vif->local, vif->remote);
1776 }
1777 return 0;
1778}
1779
f690808e 1780static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1781 .start = ipmr_vif_seq_start,
1782 .next = ipmr_vif_seq_next,
1783 .stop = ipmr_vif_seq_stop,
1784 .show = ipmr_vif_seq_show,
1785};
1786
1787static int ipmr_vif_open(struct inode *inode, struct file *file)
1788{
f6bb4514
BT
1789 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1790 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1791}
1792
9a32144e 1793static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1794 .owner = THIS_MODULE,
1795 .open = ipmr_vif_open,
1796 .read = seq_read,
1797 .llseek = seq_lseek,
f6bb4514 1798 .release = seq_release_net,
1da177e4
LT
1799};
1800
1801struct ipmr_mfc_iter {
f6bb4514 1802 struct seq_net_private p;
1da177e4
LT
1803 struct mfc_cache **cache;
1804 int ct;
1805};
1806
1807
f6bb4514
BT
1808static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1809 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4
LT
1810{
1811 struct mfc_cache *mfc;
1812
f6bb4514 1813 it->cache = net->ipv4.mfc_cache_array;
1da177e4 1814 read_lock(&mrt_lock);
e905a9ed 1815 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
f6bb4514 1816 for (mfc = net->ipv4.mfc_cache_array[it->ct];
2bb8b26c 1817 mfc; mfc = mfc->next)
e905a9ed 1818 if (pos-- == 0)
1da177e4
LT
1819 return mfc;
1820 read_unlock(&mrt_lock);
1821
1822 it->cache = &mfc_unres_queue;
1823 spin_lock_bh(&mfc_unres_lock);
132adf54 1824 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
f6bb4514
BT
1825 if (net_eq(mfc_net(mfc), net) &&
1826 pos-- == 0)
1da177e4
LT
1827 return mfc;
1828 spin_unlock_bh(&mfc_unres_lock);
1829
1830 it->cache = NULL;
1831 return NULL;
1832}
1833
1834
1835static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1836{
1837 struct ipmr_mfc_iter *it = seq->private;
f6bb4514
BT
1838 struct net *net = seq_file_net(seq);
1839
1da177e4
LT
1840 it->cache = NULL;
1841 it->ct = 0;
f6bb4514 1842 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1843 : SEQ_START_TOKEN;
1844}
1845
1846static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1847{
1848 struct mfc_cache *mfc = v;
1849 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1850 struct net *net = seq_file_net(seq);
1da177e4
LT
1851
1852 ++*pos;
1853
1854 if (v == SEQ_START_TOKEN)
f6bb4514 1855 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4
LT
1856
1857 if (mfc->next)
1858 return mfc->next;
e905a9ed
YH
1859
1860 if (it->cache == &mfc_unres_queue)
1da177e4
LT
1861 goto end_of_list;
1862
f6bb4514 1863 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1da177e4
LT
1864
1865 while (++it->ct < MFC_LINES) {
f6bb4514 1866 mfc = net->ipv4.mfc_cache_array[it->ct];
1da177e4
LT
1867 if (mfc)
1868 return mfc;
1869 }
1870
1871 /* exhausted cache_array, show unresolved */
1872 read_unlock(&mrt_lock);
1873 it->cache = &mfc_unres_queue;
1874 it->ct = 0;
e905a9ed 1875
1da177e4
LT
1876 spin_lock_bh(&mfc_unres_lock);
1877 mfc = mfc_unres_queue;
f6bb4514
BT
1878 while (mfc && !net_eq(mfc_net(mfc), net))
1879 mfc = mfc->next;
e905a9ed 1880 if (mfc)
1da177e4
LT
1881 return mfc;
1882
1883 end_of_list:
1884 spin_unlock_bh(&mfc_unres_lock);
1885 it->cache = NULL;
1886
1887 return NULL;
1888}
1889
1890static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1891{
1892 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1893 struct net *net = seq_file_net(seq);
1da177e4
LT
1894
1895 if (it->cache == &mfc_unres_queue)
1896 spin_unlock_bh(&mfc_unres_lock);
f6bb4514 1897 else if (it->cache == net->ipv4.mfc_cache_array)
1da177e4
LT
1898 read_unlock(&mrt_lock);
1899}
1900
1901static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1902{
1903 int n;
f6bb4514 1904 struct net *net = seq_file_net(seq);
1da177e4
LT
1905
1906 if (v == SEQ_START_TOKEN) {
e905a9ed 1907 seq_puts(seq,
1da177e4
LT
1908 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1909 } else {
1910 const struct mfc_cache *mfc = v;
1911 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1912
999890b2 1913 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1914 (unsigned long) mfc->mfc_mcastgrp,
1915 (unsigned long) mfc->mfc_origin,
1ea472e2 1916 mfc->mfc_parent);
1da177e4
LT
1917
1918 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
1919 seq_printf(seq, " %8lu %8lu %8lu",
1920 mfc->mfc_un.res.pkt,
1921 mfc->mfc_un.res.bytes,
1922 mfc->mfc_un.res.wrong_if);
132adf54
SH
1923 for (n = mfc->mfc_un.res.minvif;
1924 n < mfc->mfc_un.res.maxvif; n++ ) {
f6bb4514 1925 if (VIF_EXISTS(net, n) &&
cf958ae3
BT
1926 mfc->mfc_un.res.ttls[n] < 255)
1927 seq_printf(seq,
e905a9ed 1928 " %2d:%-3d",
1da177e4
LT
1929 n, mfc->mfc_un.res.ttls[n]);
1930 }
1ea472e2
BT
1931 } else {
1932 /* unresolved mfc_caches don't contain
1933 * pkt, bytes and wrong_if values
1934 */
1935 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1936 }
1937 seq_putc(seq, '\n');
1938 }
1939 return 0;
1940}
1941
f690808e 1942static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1943 .start = ipmr_mfc_seq_start,
1944 .next = ipmr_mfc_seq_next,
1945 .stop = ipmr_mfc_seq_stop,
1946 .show = ipmr_mfc_seq_show,
1947};
1948
1949static int ipmr_mfc_open(struct inode *inode, struct file *file)
1950{
f6bb4514
BT
1951 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1952 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1953}
1954
9a32144e 1955static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1956 .owner = THIS_MODULE,
1957 .open = ipmr_mfc_open,
1958 .read = seq_read,
1959 .llseek = seq_lseek,
f6bb4514 1960 .release = seq_release_net,
1da177e4 1961};
e905a9ed 1962#endif
1da177e4
LT
1963
1964#ifdef CONFIG_IP_PIMSM_V2
32613090 1965static const struct net_protocol pim_protocol = {
1da177e4 1966 .handler = pim_rcv,
403dbb97 1967 .netns_ok = 1,
1da177e4
LT
1968};
1969#endif
1970
1971
1972/*
1973 * Setup for IP multicast routing
1974 */
cf958ae3
BT
1975static int __net_init ipmr_net_init(struct net *net)
1976{
1977 int err = 0;
1978
1979 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1980 GFP_KERNEL);
1981 if (!net->ipv4.vif_table) {
1982 err = -ENOMEM;
1983 goto fail;
1984 }
2bb8b26c
BT
1985
1986 /* Forwarding cache */
1987 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1988 sizeof(struct mfc_cache *),
1989 GFP_KERNEL);
1990 if (!net->ipv4.mfc_cache_array) {
1991 err = -ENOMEM;
1992 goto fail_mfc_cache;
1993 }
6c5143db
BT
1994
1995#ifdef CONFIG_IP_PIMSM
1996 net->ipv4.mroute_reg_vif_num = -1;
1997#endif
f6bb4514
BT
1998
1999#ifdef CONFIG_PROC_FS
2000 err = -ENOMEM;
2001 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2002 goto proc_vif_fail;
2003 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2004 goto proc_cache_fail;
2005#endif
2bb8b26c
BT
2006 return 0;
2007
f6bb4514
BT
2008#ifdef CONFIG_PROC_FS
2009proc_cache_fail:
2010 proc_net_remove(net, "ip_mr_vif");
2011proc_vif_fail:
2012 kfree(net->ipv4.mfc_cache_array);
2013#endif
2bb8b26c
BT
2014fail_mfc_cache:
2015 kfree(net->ipv4.vif_table);
cf958ae3
BT
2016fail:
2017 return err;
2018}
2019
2020static void __net_exit ipmr_net_exit(struct net *net)
2021{
f6bb4514
BT
2022#ifdef CONFIG_PROC_FS
2023 proc_net_remove(net, "ip_mr_cache");
2024 proc_net_remove(net, "ip_mr_vif");
2025#endif
2bb8b26c 2026 kfree(net->ipv4.mfc_cache_array);
cf958ae3
BT
2027 kfree(net->ipv4.vif_table);
2028}
2029
2030static struct pernet_operations ipmr_net_ops = {
2031 .init = ipmr_net_init,
2032 .exit = ipmr_net_exit,
2033};
e905a9ed 2034
03d2f897 2035int __init ip_mr_init(void)
1da177e4 2036{
03d2f897
WC
2037 int err;
2038
1da177e4
LT
2039 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2040 sizeof(struct mfc_cache),
e5d679f3 2041 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2042 NULL);
03d2f897
WC
2043 if (!mrt_cachep)
2044 return -ENOMEM;
2045
cf958ae3
BT
2046 err = register_pernet_subsys(&ipmr_net_ops);
2047 if (err)
2048 goto reg_pernet_fail;
2049
b24b8a24 2050 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
03d2f897
WC
2051 err = register_netdevice_notifier(&ip_mr_notifier);
2052 if (err)
2053 goto reg_notif_fail;
403dbb97
TG
2054#ifdef CONFIG_IP_PIMSM_V2
2055 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2056 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2057 err = -EAGAIN;
2058 goto add_proto_fail;
2059 }
2060#endif
03d2f897 2061 return 0;
f6bb4514 2062
403dbb97
TG
2063#ifdef CONFIG_IP_PIMSM_V2
2064add_proto_fail:
2065 unregister_netdevice_notifier(&ip_mr_notifier);
2066#endif
c3e38896
BT
2067reg_notif_fail:
2068 del_timer(&ipmr_expire_timer);
cf958ae3
BT
2069 unregister_pernet_subsys(&ipmr_net_ops);
2070reg_pernet_fail:
c3e38896 2071 kmem_cache_destroy(mrt_cachep);
03d2f897 2072 return err;
1da177e4 2073}