]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
ipv4: ipmr: remove net pointer from struct mfc_cache
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
1da177e4
LT
66
67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68#define CONFIG_IP_PIMSM 1
69#endif
70
1da177e4
LT
71/* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
73 */
74
75static DEFINE_RWLOCK(mrt_lock);
76
77/*
78 * Multicast router control variables
79 */
80
cf958ae3 81#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4 82
1da177e4
LT
83/* Special spinlock for queue of unresolved entries */
84static DEFINE_SPINLOCK(mfc_unres_lock);
85
86/* We return to original Alan's scheme. Hash table of resolved
87 entries is changed only in process context and protected
88 with weak lock mrt_lock. Queue of unresolved entries is protected
89 with strong spinlock mfc_unres_lock.
90
91 In this case data path is free of exclusive locks at all.
92 */
93
e18b890b 94static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 95
d658f8a0
PM
96static int ip_mr_forward(struct net *net, struct sk_buff *skb,
97 struct mfc_cache *cache, int local);
4feb88e5
BT
98static int ipmr_cache_report(struct net *net,
99 struct sk_buff *pkt, vifi_t vifi, int assert);
d658f8a0
PM
100static int ipmr_fill_mroute(struct net *net, struct sk_buff *skb,
101 struct mfc_cache *c, struct rtmsg *rtm);
1da177e4 102
1da177e4
LT
103/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
104
d607032d
WC
105static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
106{
4feb88e5
BT
107 struct net *net = dev_net(dev);
108
d607032d
WC
109 dev_close(dev);
110
4feb88e5 111 dev = __dev_get_by_name(net, "tunl0");
d607032d 112 if (dev) {
5bc3eb7e 113 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 114 struct ifreq ifr;
d607032d
WC
115 struct ip_tunnel_parm p;
116
117 memset(&p, 0, sizeof(p));
118 p.iph.daddr = v->vifc_rmt_addr.s_addr;
119 p.iph.saddr = v->vifc_lcl_addr.s_addr;
120 p.iph.version = 4;
121 p.iph.ihl = 5;
122 p.iph.protocol = IPPROTO_IPIP;
123 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
124 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
125
5bc3eb7e
SH
126 if (ops->ndo_do_ioctl) {
127 mm_segment_t oldfs = get_fs();
128
129 set_fs(KERNEL_DS);
130 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
131 set_fs(oldfs);
132 }
d607032d
WC
133 }
134}
135
1da177e4 136static
4feb88e5 137struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
138{
139 struct net_device *dev;
140
4feb88e5 141 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
142
143 if (dev) {
5bc3eb7e 144 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
145 int err;
146 struct ifreq ifr;
1da177e4
LT
147 struct ip_tunnel_parm p;
148 struct in_device *in_dev;
149
150 memset(&p, 0, sizeof(p));
151 p.iph.daddr = v->vifc_rmt_addr.s_addr;
152 p.iph.saddr = v->vifc_lcl_addr.s_addr;
153 p.iph.version = 4;
154 p.iph.ihl = 5;
155 p.iph.protocol = IPPROTO_IPIP;
156 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 157 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 158
5bc3eb7e
SH
159 if (ops->ndo_do_ioctl) {
160 mm_segment_t oldfs = get_fs();
161
162 set_fs(KERNEL_DS);
163 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
164 set_fs(oldfs);
165 } else
166 err = -EOPNOTSUPP;
1da177e4
LT
167
168 dev = NULL;
169
4feb88e5
BT
170 if (err == 0 &&
171 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
172 dev->flags |= IFF_MULTICAST;
173
e5ed6399 174 in_dev = __in_dev_get_rtnl(dev);
71e27da9 175 if (in_dev == NULL)
1da177e4 176 goto failure;
71e27da9
HX
177
178 ipv4_devconf_setall(in_dev);
179 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
180
181 if (dev_open(dev))
182 goto failure;
7dc00c82 183 dev_hold(dev);
1da177e4
LT
184 }
185 }
186 return dev;
187
188failure:
189 /* allow the register to be completed before unregistering. */
190 rtnl_unlock();
191 rtnl_lock();
192
193 unregister_netdevice(dev);
194 return NULL;
195}
196
197#ifdef CONFIG_IP_PIMSM
198
6fef4c0c 199static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 200{
4feb88e5
BT
201 struct net *net = dev_net(dev);
202
1da177e4 203 read_lock(&mrt_lock);
cf3677ae
PE
204 dev->stats.tx_bytes += skb->len;
205 dev->stats.tx_packets++;
4feb88e5
BT
206 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
207 IGMPMSG_WHOLEPKT);
1da177e4
LT
208 read_unlock(&mrt_lock);
209 kfree_skb(skb);
6ed10654 210 return NETDEV_TX_OK;
1da177e4
LT
211}
212
007c3838
SH
213static const struct net_device_ops reg_vif_netdev_ops = {
214 .ndo_start_xmit = reg_vif_xmit,
215};
216
1da177e4
LT
217static void reg_vif_setup(struct net_device *dev)
218{
219 dev->type = ARPHRD_PIMREG;
46f25dff 220 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 221 dev->flags = IFF_NOARP;
007c3838 222 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 223 dev->destructor = free_netdev;
403dbb97 224 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
225}
226
403dbb97 227static struct net_device *ipmr_reg_vif(struct net *net)
1da177e4
LT
228{
229 struct net_device *dev;
230 struct in_device *in_dev;
231
cf3677ae 232 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
233
234 if (dev == NULL)
235 return NULL;
236
403dbb97
TG
237 dev_net_set(dev, net);
238
1da177e4
LT
239 if (register_netdevice(dev)) {
240 free_netdev(dev);
241 return NULL;
242 }
243 dev->iflink = 0;
244
71e27da9
HX
245 rcu_read_lock();
246 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
247 rcu_read_unlock();
1da177e4 248 goto failure;
71e27da9 249 }
1da177e4 250
71e27da9
HX
251 ipv4_devconf_setall(in_dev);
252 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
253 rcu_read_unlock();
1da177e4
LT
254
255 if (dev_open(dev))
256 goto failure;
257
7dc00c82
WC
258 dev_hold(dev);
259
1da177e4
LT
260 return dev;
261
262failure:
263 /* allow the register to be completed before unregistering. */
264 rtnl_unlock();
265 rtnl_lock();
266
267 unregister_netdevice(dev);
268 return NULL;
269}
270#endif
271
272/*
273 * Delete a VIF entry
7dc00c82 274 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 275 */
e905a9ed 276
d17fa6fa
ED
277static int vif_delete(struct net *net, int vifi, int notify,
278 struct list_head *head)
1da177e4
LT
279{
280 struct vif_device *v;
281 struct net_device *dev;
282 struct in_device *in_dev;
283
4feb88e5 284 if (vifi < 0 || vifi >= net->ipv4.maxvif)
1da177e4
LT
285 return -EADDRNOTAVAIL;
286
4feb88e5 287 v = &net->ipv4.vif_table[vifi];
1da177e4
LT
288
289 write_lock_bh(&mrt_lock);
290 dev = v->dev;
291 v->dev = NULL;
292
293 if (!dev) {
294 write_unlock_bh(&mrt_lock);
295 return -EADDRNOTAVAIL;
296 }
297
298#ifdef CONFIG_IP_PIMSM
4feb88e5
BT
299 if (vifi == net->ipv4.mroute_reg_vif_num)
300 net->ipv4.mroute_reg_vif_num = -1;
1da177e4
LT
301#endif
302
4feb88e5 303 if (vifi+1 == net->ipv4.maxvif) {
1da177e4
LT
304 int tmp;
305 for (tmp=vifi-1; tmp>=0; tmp--) {
4feb88e5 306 if (VIF_EXISTS(net, tmp))
1da177e4
LT
307 break;
308 }
4feb88e5 309 net->ipv4.maxvif = tmp+1;
1da177e4
LT
310 }
311
312 write_unlock_bh(&mrt_lock);
313
314 dev_set_allmulti(dev, -1);
315
e5ed6399 316 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 317 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
318 ip_rt_multicast_event(in_dev);
319 }
320
7dc00c82 321 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 322 unregister_netdevice_queue(dev, head);
1da177e4
LT
323
324 dev_put(dev);
325 return 0;
326}
327
5c0a66f5
BT
328static inline void ipmr_cache_free(struct mfc_cache *c)
329{
5c0a66f5
BT
330 kmem_cache_free(mrt_cachep, c);
331}
332
1da177e4
LT
333/* Destroy an unresolved cache entry, killing queued skbs
334 and reporting error to netlink readers.
335 */
336
d658f8a0 337static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
1da177e4
LT
338{
339 struct sk_buff *skb;
9ef1d4c7 340 struct nlmsgerr *e;
1da177e4 341
4feb88e5 342 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4 343
c354e124 344 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 345 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
346 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
347 nlh->nlmsg_type = NLMSG_ERROR;
348 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
349 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
350 e = NLMSG_DATA(nlh);
351 e->error = -ETIMEDOUT;
352 memset(&e->msg, 0, sizeof(e->msg));
2942e900 353
4feb88e5 354 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
355 } else
356 kfree_skb(skb);
357 }
358
5c0a66f5 359 ipmr_cache_free(c);
1da177e4
LT
360}
361
362
e258beb2 363/* Timer process for the unresolved queue. */
1da177e4 364
e258beb2 365static void ipmr_expire_process(unsigned long arg)
1da177e4 366{
e258beb2 367 struct net *net = (struct net *)arg;
1da177e4
LT
368 unsigned long now;
369 unsigned long expires;
370 struct mfc_cache *c, **cp;
371
372 if (!spin_trylock(&mfc_unres_lock)) {
e258beb2 373 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
374 return;
375 }
376
e258beb2 377 if (net->ipv4.mfc_unres_queue == NULL)
1da177e4
LT
378 goto out;
379
380 now = jiffies;
381 expires = 10*HZ;
e258beb2 382 cp = &net->ipv4.mfc_unres_queue;
1da177e4
LT
383
384 while ((c=*cp) != NULL) {
385 if (time_after(c->mfc_un.unres.expires, now)) {
386 unsigned long interval = c->mfc_un.unres.expires - now;
387 if (interval < expires)
388 expires = interval;
389 cp = &c->next;
390 continue;
391 }
392
393 *cp = c->next;
394
d658f8a0 395 ipmr_destroy_unres(net, c);
1da177e4
LT
396 }
397
e258beb2
PM
398 if (net->ipv4.mfc_unres_queue != NULL)
399 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
1da177e4
LT
400
401out:
402 spin_unlock(&mfc_unres_lock);
403}
404
405/* Fill oifs list. It is called under write locked mrt_lock. */
406
d658f8a0
PM
407static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
408 unsigned char *ttls)
1da177e4
LT
409{
410 int vifi;
411
412 cache->mfc_un.res.minvif = MAXVIFS;
413 cache->mfc_un.res.maxvif = 0;
414 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
415
4feb88e5
BT
416 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
417 if (VIF_EXISTS(net, vifi) &&
cf958ae3 418 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
419 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
420 if (cache->mfc_un.res.minvif > vifi)
421 cache->mfc_un.res.minvif = vifi;
422 if (cache->mfc_un.res.maxvif <= vifi)
423 cache->mfc_un.res.maxvif = vifi + 1;
424 }
425 }
426}
427
4feb88e5 428static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
1da177e4
LT
429{
430 int vifi = vifc->vifc_vifi;
4feb88e5 431 struct vif_device *v = &net->ipv4.vif_table[vifi];
1da177e4
LT
432 struct net_device *dev;
433 struct in_device *in_dev;
d607032d 434 int err;
1da177e4
LT
435
436 /* Is vif busy ? */
4feb88e5 437 if (VIF_EXISTS(net, vifi))
1da177e4
LT
438 return -EADDRINUSE;
439
440 switch (vifc->vifc_flags) {
441#ifdef CONFIG_IP_PIMSM
442 case VIFF_REGISTER:
443 /*
444 * Special Purpose VIF in PIM
445 * All the packets will be sent to the daemon
446 */
4feb88e5 447 if (net->ipv4.mroute_reg_vif_num >= 0)
1da177e4 448 return -EADDRINUSE;
403dbb97 449 dev = ipmr_reg_vif(net);
1da177e4
LT
450 if (!dev)
451 return -ENOBUFS;
d607032d
WC
452 err = dev_set_allmulti(dev, 1);
453 if (err) {
454 unregister_netdevice(dev);
7dc00c82 455 dev_put(dev);
d607032d
WC
456 return err;
457 }
1da177e4
LT
458 break;
459#endif
e905a9ed 460 case VIFF_TUNNEL:
4feb88e5 461 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
462 if (!dev)
463 return -ENOBUFS;
d607032d
WC
464 err = dev_set_allmulti(dev, 1);
465 if (err) {
466 ipmr_del_tunnel(dev, vifc);
7dc00c82 467 dev_put(dev);
d607032d
WC
468 return err;
469 }
1da177e4 470 break;
ee5e81f0
I
471
472 case VIFF_USE_IFINDEX:
1da177e4 473 case 0:
ee5e81f0
I
474 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
475 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
476 if (dev && dev->ip_ptr == NULL) {
477 dev_put(dev);
478 return -EADDRNOTAVAIL;
479 }
480 } else
481 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
482
1da177e4
LT
483 if (!dev)
484 return -EADDRNOTAVAIL;
d607032d 485 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
486 if (err) {
487 dev_put(dev);
d607032d 488 return err;
7dc00c82 489 }
1da177e4
LT
490 break;
491 default:
492 return -EINVAL;
493 }
494
d0490cfd
DC
495 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
496 dev_put(dev);
1da177e4 497 return -EADDRNOTAVAIL;
d0490cfd 498 }
42f811b8 499 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
500 ip_rt_multicast_event(in_dev);
501
502 /*
503 * Fill in the VIF structures
504 */
c354e124
JK
505 v->rate_limit = vifc->vifc_rate_limit;
506 v->local = vifc->vifc_lcl_addr.s_addr;
507 v->remote = vifc->vifc_rmt_addr.s_addr;
508 v->flags = vifc->vifc_flags;
1da177e4
LT
509 if (!mrtsock)
510 v->flags |= VIFF_STATIC;
c354e124 511 v->threshold = vifc->vifc_threshold;
1da177e4
LT
512 v->bytes_in = 0;
513 v->bytes_out = 0;
514 v->pkt_in = 0;
515 v->pkt_out = 0;
516 v->link = dev->ifindex;
517 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
518 v->link = dev->iflink;
519
520 /* And finish update writing critical data */
521 write_lock_bh(&mrt_lock);
c354e124 522 v->dev = dev;
1da177e4
LT
523#ifdef CONFIG_IP_PIMSM
524 if (v->flags&VIFF_REGISTER)
4feb88e5 525 net->ipv4.mroute_reg_vif_num = vifi;
1da177e4 526#endif
4feb88e5
BT
527 if (vifi+1 > net->ipv4.maxvif)
528 net->ipv4.maxvif = vifi+1;
1da177e4
LT
529 write_unlock_bh(&mrt_lock);
530 return 0;
531}
532
4feb88e5
BT
533static struct mfc_cache *ipmr_cache_find(struct net *net,
534 __be32 origin,
535 __be32 mcastgrp)
1da177e4 536{
c354e124 537 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
538 struct mfc_cache *c;
539
4feb88e5 540 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
1da177e4
LT
541 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
542 break;
543 }
544 return c;
545}
546
547/*
548 * Allocate a multicast cache entry
549 */
d658f8a0 550static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 551{
c354e124
JK
552 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
553 if (c == NULL)
1da177e4 554 return NULL;
1da177e4
LT
555 c->mfc_un.res.minvif = MAXVIFS;
556 return c;
557}
558
d658f8a0 559static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 560{
c354e124
JK
561 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
562 if (c == NULL)
1da177e4 563 return NULL;
1da177e4
LT
564 skb_queue_head_init(&c->mfc_un.unres.unresolved);
565 c->mfc_un.unres.expires = jiffies + 10*HZ;
566 return c;
567}
568
569/*
570 * A cache entry has gone into a resolved state from queued
571 */
e905a9ed 572
d658f8a0
PM
573static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
574 struct mfc_cache *c)
1da177e4
LT
575{
576 struct sk_buff *skb;
9ef1d4c7 577 struct nlmsgerr *e;
1da177e4
LT
578
579 /*
580 * Play the pending entries through our router
581 */
582
c354e124 583 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 584 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
585 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
586
d658f8a0 587 if (ipmr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
588 nlh->nlmsg_len = (skb_tail_pointer(skb) -
589 (u8 *)nlh);
1da177e4
LT
590 } else {
591 nlh->nlmsg_type = NLMSG_ERROR;
592 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
593 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
594 e = NLMSG_DATA(nlh);
595 e->error = -EMSGSIZE;
596 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 597 }
2942e900 598
d658f8a0 599 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 600 } else
d658f8a0 601 ip_mr_forward(net, skb, c, 0);
1da177e4
LT
602 }
603}
604
605/*
606 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
607 * expects the following bizarre scheme.
608 *
609 * Called under mrt_lock.
610 */
e905a9ed 611
4feb88e5
BT
612static int ipmr_cache_report(struct net *net,
613 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
614{
615 struct sk_buff *skb;
c9bdd4b5 616 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
617 struct igmphdr *igmp;
618 struct igmpmsg *msg;
619 int ret;
620
621#ifdef CONFIG_IP_PIMSM
622 if (assert == IGMPMSG_WHOLEPKT)
623 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
624 else
625#endif
626 skb = alloc_skb(128, GFP_ATOMIC);
627
132adf54 628 if (!skb)
1da177e4
LT
629 return -ENOBUFS;
630
631#ifdef CONFIG_IP_PIMSM
632 if (assert == IGMPMSG_WHOLEPKT) {
633 /* Ugly, but we have no choice with this interface.
634 Duplicate old header, fix ihl, length etc.
635 And all this only to mangle msg->im_msgtype and
636 to set msg->im_mbz to "mbz" :-)
637 */
878c8145
ACM
638 skb_push(skb, sizeof(struct iphdr));
639 skb_reset_network_header(skb);
badff6d0 640 skb_reset_transport_header(skb);
0272ffc4 641 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 642 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
643 msg->im_msgtype = IGMPMSG_WHOLEPKT;
644 msg->im_mbz = 0;
4feb88e5 645 msg->im_vif = net->ipv4.mroute_reg_vif_num;
eddc9ec5
ACM
646 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
647 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
648 sizeof(struct iphdr));
e905a9ed 649 } else
1da177e4 650#endif
e905a9ed
YH
651 {
652
1da177e4
LT
653 /*
654 * Copy the IP header
655 */
656
27a884dc 657 skb->network_header = skb->tail;
ddc7b8e3 658 skb_put(skb, ihl);
27d7ff46 659 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
660 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
661 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 662 msg->im_vif = vifi;
adf30907 663 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
664
665 /*
666 * Add our header
667 */
668
c354e124 669 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
670 igmp->type =
671 msg->im_msgtype = assert;
672 igmp->code = 0;
eddc9ec5 673 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 674 skb->transport_header = skb->network_header;
e905a9ed 675 }
1da177e4 676
4feb88e5 677 if (net->ipv4.mroute_sk == NULL) {
1da177e4
LT
678 kfree_skb(skb);
679 return -EINVAL;
680 }
681
682 /*
683 * Deliver to mrouted
684 */
4feb88e5 685 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
70a269e6 686 if (ret < 0) {
1da177e4
LT
687 if (net_ratelimit())
688 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
689 kfree_skb(skb);
690 }
691
692 return ret;
693}
694
695/*
696 * Queue a packet for resolution. It gets locked cache entry!
697 */
e905a9ed 698
1da177e4 699static int
4feb88e5 700ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
1da177e4
LT
701{
702 int err;
703 struct mfc_cache *c;
eddc9ec5 704 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
705
706 spin_lock_bh(&mfc_unres_lock);
e258beb2
PM
707 for (c=net->ipv4.mfc_unres_queue; c; c=c->next) {
708 if (c->mfc_mcastgrp == iph->daddr &&
eddc9ec5 709 c->mfc_origin == iph->saddr)
1da177e4
LT
710 break;
711 }
712
713 if (c == NULL) {
714 /*
715 * Create a new entry if allowable
716 */
717
4feb88e5 718 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
d658f8a0 719 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
720 spin_unlock_bh(&mfc_unres_lock);
721
722 kfree_skb(skb);
723 return -ENOBUFS;
724 }
725
726 /*
727 * Fill in the new cache entry
728 */
eddc9ec5
ACM
729 c->mfc_parent = -1;
730 c->mfc_origin = iph->saddr;
731 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
732
733 /*
734 * Reflect first query at mrouted.
735 */
4feb88e5
BT
736 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
737 if (err < 0) {
e905a9ed 738 /* If the report failed throw the cache entry
1da177e4
LT
739 out - Brad Parker
740 */
741 spin_unlock_bh(&mfc_unres_lock);
742
5c0a66f5 743 ipmr_cache_free(c);
1da177e4
LT
744 kfree_skb(skb);
745 return err;
746 }
747
4feb88e5 748 atomic_inc(&net->ipv4.cache_resolve_queue_len);
e258beb2
PM
749 c->next = net->ipv4.mfc_unres_queue;
750 net->ipv4.mfc_unres_queue = c;
1da177e4 751
e258beb2 752 mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
753 }
754
755 /*
756 * See if we can append the packet
757 */
758 if (c->mfc_un.unres.unresolved.qlen>3) {
759 kfree_skb(skb);
760 err = -ENOBUFS;
761 } else {
c354e124 762 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
763 err = 0;
764 }
765
766 spin_unlock_bh(&mfc_unres_lock);
767 return err;
768}
769
770/*
771 * MFC cache manipulation by user space mroute daemon
772 */
773
4feb88e5 774static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
1da177e4
LT
775{
776 int line;
777 struct mfc_cache *c, **cp;
778
c354e124 779 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 780
4feb88e5 781 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 782 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
783 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
784 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
785 write_lock_bh(&mrt_lock);
786 *cp = c->next;
787 write_unlock_bh(&mrt_lock);
788
5c0a66f5 789 ipmr_cache_free(c);
1da177e4
LT
790 return 0;
791 }
792 }
793 return -ENOENT;
794}
795
4feb88e5 796static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
1da177e4
LT
797{
798 int line;
799 struct mfc_cache *uc, *c, **cp;
800
a50436f2
PM
801 if (mfc->mfcc_parent >= MAXVIFS)
802 return -ENFILE;
803
c354e124 804 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 805
4feb88e5 806 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 807 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
808 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
809 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
810 break;
811 }
812
813 if (c != NULL) {
814 write_lock_bh(&mrt_lock);
815 c->mfc_parent = mfc->mfcc_parent;
d658f8a0 816 ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
1da177e4
LT
817 if (!mrtsock)
818 c->mfc_flags |= MFC_STATIC;
819 write_unlock_bh(&mrt_lock);
820 return 0;
821 }
822
f97c1e0c 823 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
824 return -EINVAL;
825
d658f8a0 826 c = ipmr_cache_alloc();
c354e124 827 if (c == NULL)
1da177e4
LT
828 return -ENOMEM;
829
c354e124
JK
830 c->mfc_origin = mfc->mfcc_origin.s_addr;
831 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
832 c->mfc_parent = mfc->mfcc_parent;
d658f8a0 833 ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
1da177e4
LT
834 if (!mrtsock)
835 c->mfc_flags |= MFC_STATIC;
836
837 write_lock_bh(&mrt_lock);
4feb88e5
BT
838 c->next = net->ipv4.mfc_cache_array[line];
839 net->ipv4.mfc_cache_array[line] = c;
1da177e4
LT
840 write_unlock_bh(&mrt_lock);
841
842 /*
843 * Check to see if we resolved a queued list. If so we
844 * need to send on the frames and tidy up.
845 */
846 spin_lock_bh(&mfc_unres_lock);
e258beb2 847 for (cp = &net->ipv4.mfc_unres_queue; (uc=*cp) != NULL;
1da177e4 848 cp = &uc->next) {
e258beb2 849 if (uc->mfc_origin == c->mfc_origin &&
1da177e4
LT
850 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
851 *cp = uc->next;
4feb88e5 852 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4
LT
853 break;
854 }
855 }
e258beb2
PM
856 if (net->ipv4.mfc_unres_queue == NULL)
857 del_timer(&net->ipv4.ipmr_expire_timer);
1da177e4
LT
858 spin_unlock_bh(&mfc_unres_lock);
859
860 if (uc) {
d658f8a0 861 ipmr_cache_resolve(net, uc, c);
5c0a66f5 862 ipmr_cache_free(uc);
1da177e4
LT
863 }
864 return 0;
865}
866
867/*
868 * Close the multicast socket, and clear the vif tables etc
869 */
e905a9ed 870
4feb88e5 871static void mroute_clean_tables(struct net *net)
1da177e4
LT
872{
873 int i;
d17fa6fa 874 LIST_HEAD(list);
e905a9ed 875
1da177e4
LT
876 /*
877 * Shut down all active vif entries
878 */
4feb88e5
BT
879 for (i = 0; i < net->ipv4.maxvif; i++) {
880 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
d17fa6fa 881 vif_delete(net, i, 0, &list);
1da177e4 882 }
d17fa6fa 883 unregister_netdevice_many(&list);
1da177e4
LT
884
885 /*
886 * Wipe the cache
887 */
c354e124 888 for (i=0; i<MFC_LINES; i++) {
1da177e4
LT
889 struct mfc_cache *c, **cp;
890
4feb88e5 891 cp = &net->ipv4.mfc_cache_array[i];
1da177e4
LT
892 while ((c = *cp) != NULL) {
893 if (c->mfc_flags&MFC_STATIC) {
894 cp = &c->next;
895 continue;
896 }
897 write_lock_bh(&mrt_lock);
898 *cp = c->next;
899 write_unlock_bh(&mrt_lock);
900
5c0a66f5 901 ipmr_cache_free(c);
1da177e4
LT
902 }
903 }
904
4feb88e5 905 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
1e8fb3b6 906 struct mfc_cache *c, **cp;
1da177e4
LT
907
908 spin_lock_bh(&mfc_unres_lock);
e258beb2 909 cp = &net->ipv4.mfc_unres_queue;
1e8fb3b6 910 while ((c = *cp) != NULL) {
1e8fb3b6 911 *cp = c->next;
d658f8a0 912 ipmr_destroy_unres(net, c);
1da177e4
LT
913 }
914 spin_unlock_bh(&mfc_unres_lock);
915 }
916}
917
918static void mrtsock_destruct(struct sock *sk)
919{
4feb88e5
BT
920 struct net *net = sock_net(sk);
921
1da177e4 922 rtnl_lock();
4feb88e5
BT
923 if (sk == net->ipv4.mroute_sk) {
924 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4
LT
925
926 write_lock_bh(&mrt_lock);
4feb88e5 927 net->ipv4.mroute_sk = NULL;
1da177e4
LT
928 write_unlock_bh(&mrt_lock);
929
4feb88e5 930 mroute_clean_tables(net);
1da177e4
LT
931 }
932 rtnl_unlock();
933}
934
935/*
936 * Socket options and virtual interface manipulation. The whole
937 * virtual interface system is a complete heap, but unfortunately
938 * that's how BSD mrouted happens to think. Maybe one day with a proper
939 * MOSPF/PIM router set up we can clean this up.
940 */
e905a9ed 941
b7058842 942int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
943{
944 int ret;
945 struct vifctl vif;
946 struct mfcctl mfc;
4feb88e5 947 struct net *net = sock_net(sk);
e905a9ed 948
132adf54 949 if (optname != MRT_INIT) {
4feb88e5 950 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
951 return -EACCES;
952 }
953
132adf54
SH
954 switch (optname) {
955 case MRT_INIT:
956 if (sk->sk_type != SOCK_RAW ||
c720c7e8 957 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 958 return -EOPNOTSUPP;
c354e124 959 if (optlen != sizeof(int))
132adf54 960 return -ENOPROTOOPT;
1da177e4 961
132adf54 962 rtnl_lock();
4feb88e5 963 if (net->ipv4.mroute_sk) {
1da177e4 964 rtnl_unlock();
132adf54
SH
965 return -EADDRINUSE;
966 }
967
968 ret = ip_ra_control(sk, 1, mrtsock_destruct);
969 if (ret == 0) {
970 write_lock_bh(&mrt_lock);
4feb88e5 971 net->ipv4.mroute_sk = sk;
132adf54
SH
972 write_unlock_bh(&mrt_lock);
973
4feb88e5 974 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
975 }
976 rtnl_unlock();
977 return ret;
978 case MRT_DONE:
4feb88e5 979 if (sk != net->ipv4.mroute_sk)
132adf54
SH
980 return -EACCES;
981 return ip_ra_control(sk, 0, NULL);
982 case MRT_ADD_VIF:
983 case MRT_DEL_VIF:
c354e124 984 if (optlen != sizeof(vif))
132adf54 985 return -EINVAL;
c354e124 986 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
987 return -EFAULT;
988 if (vif.vifc_vifi >= MAXVIFS)
989 return -ENFILE;
990 rtnl_lock();
c354e124 991 if (optname == MRT_ADD_VIF) {
4feb88e5 992 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
132adf54 993 } else {
d17fa6fa 994 ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
132adf54
SH
995 }
996 rtnl_unlock();
997 return ret;
1da177e4
LT
998
999 /*
1000 * Manipulate the forwarding caches. These live
1001 * in a sort of kernel/user symbiosis.
1002 */
132adf54
SH
1003 case MRT_ADD_MFC:
1004 case MRT_DEL_MFC:
c354e124 1005 if (optlen != sizeof(mfc))
132adf54 1006 return -EINVAL;
c354e124 1007 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1008 return -EFAULT;
1009 rtnl_lock();
c354e124 1010 if (optname == MRT_DEL_MFC)
4feb88e5 1011 ret = ipmr_mfc_delete(net, &mfc);
132adf54 1012 else
4feb88e5 1013 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
132adf54
SH
1014 rtnl_unlock();
1015 return ret;
1da177e4
LT
1016 /*
1017 * Control PIM assert.
1018 */
132adf54
SH
1019 case MRT_ASSERT:
1020 {
1021 int v;
1022 if (get_user(v,(int __user *)optval))
1023 return -EFAULT;
4feb88e5 1024 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1025 return 0;
1026 }
1da177e4 1027#ifdef CONFIG_IP_PIMSM
132adf54
SH
1028 case MRT_PIM:
1029 {
ba93ef74
SH
1030 int v;
1031
132adf54
SH
1032 if (get_user(v,(int __user *)optval))
1033 return -EFAULT;
ba93ef74
SH
1034 v = (v) ? 1 : 0;
1035
132adf54
SH
1036 rtnl_lock();
1037 ret = 0;
4feb88e5
BT
1038 if (v != net->ipv4.mroute_do_pim) {
1039 net->ipv4.mroute_do_pim = v;
1040 net->ipv4.mroute_do_assert = v;
1da177e4 1041 }
132adf54
SH
1042 rtnl_unlock();
1043 return ret;
1044 }
1da177e4 1045#endif
132adf54
SH
1046 /*
1047 * Spurious command, or MRT_VERSION which you cannot
1048 * set.
1049 */
1050 default:
1051 return -ENOPROTOOPT;
1da177e4
LT
1052 }
1053}
1054
1055/*
1056 * Getsock opt support for the multicast routing system.
1057 */
e905a9ed 1058
c354e124 1059int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1060{
1061 int olr;
1062 int val;
4feb88e5 1063 struct net *net = sock_net(sk);
1da177e4 1064
c354e124 1065 if (optname != MRT_VERSION &&
1da177e4
LT
1066#ifdef CONFIG_IP_PIMSM
1067 optname!=MRT_PIM &&
1068#endif
1069 optname!=MRT_ASSERT)
1070 return -ENOPROTOOPT;
1071
1072 if (get_user(olr, optlen))
1073 return -EFAULT;
1074
1075 olr = min_t(unsigned int, olr, sizeof(int));
1076 if (olr < 0)
1077 return -EINVAL;
e905a9ed 1078
c354e124 1079 if (put_user(olr, optlen))
1da177e4 1080 return -EFAULT;
c354e124
JK
1081 if (optname == MRT_VERSION)
1082 val = 0x0305;
1da177e4 1083#ifdef CONFIG_IP_PIMSM
c354e124 1084 else if (optname == MRT_PIM)
4feb88e5 1085 val = net->ipv4.mroute_do_pim;
1da177e4
LT
1086#endif
1087 else
4feb88e5 1088 val = net->ipv4.mroute_do_assert;
c354e124 1089 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1090 return -EFAULT;
1091 return 0;
1092}
1093
1094/*
1095 * The IP multicast ioctl support routines.
1096 */
e905a9ed 1097
1da177e4
LT
1098int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1099{
1100 struct sioc_sg_req sr;
1101 struct sioc_vif_req vr;
1102 struct vif_device *vif;
1103 struct mfc_cache *c;
4feb88e5 1104 struct net *net = sock_net(sk);
e905a9ed 1105
132adf54
SH
1106 switch (cmd) {
1107 case SIOCGETVIFCNT:
c354e124 1108 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1109 return -EFAULT;
4feb88e5 1110 if (vr.vifi >= net->ipv4.maxvif)
132adf54
SH
1111 return -EINVAL;
1112 read_lock(&mrt_lock);
4feb88e5
BT
1113 vif = &net->ipv4.vif_table[vr.vifi];
1114 if (VIF_EXISTS(net, vr.vifi)) {
c354e124
JK
1115 vr.icount = vif->pkt_in;
1116 vr.ocount = vif->pkt_out;
1117 vr.ibytes = vif->bytes_in;
1118 vr.obytes = vif->bytes_out;
1da177e4 1119 read_unlock(&mrt_lock);
1da177e4 1120
c354e124 1121 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1122 return -EFAULT;
1123 return 0;
1124 }
1125 read_unlock(&mrt_lock);
1126 return -EADDRNOTAVAIL;
1127 case SIOCGETSGCNT:
c354e124 1128 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1129 return -EFAULT;
1130
1131 read_lock(&mrt_lock);
4feb88e5 1132 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1133 if (c) {
1134 sr.pktcnt = c->mfc_un.res.pkt;
1135 sr.bytecnt = c->mfc_un.res.bytes;
1136 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1137 read_unlock(&mrt_lock);
132adf54 1138
c354e124 1139 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1140 return -EFAULT;
1141 return 0;
1142 }
1143 read_unlock(&mrt_lock);
1144 return -EADDRNOTAVAIL;
1145 default:
1146 return -ENOIOCTLCMD;
1da177e4
LT
1147 }
1148}
1149
1150
1151static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1152{
e9dc8653 1153 struct net_device *dev = ptr;
4feb88e5 1154 struct net *net = dev_net(dev);
1da177e4
LT
1155 struct vif_device *v;
1156 int ct;
d17fa6fa 1157 LIST_HEAD(list);
e9dc8653 1158
1da177e4
LT
1159 if (event != NETDEV_UNREGISTER)
1160 return NOTIFY_DONE;
4feb88e5
BT
1161 v = &net->ipv4.vif_table[0];
1162 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
c354e124 1163 if (v->dev == dev)
d17fa6fa 1164 vif_delete(net, ct, 1, &list);
1da177e4 1165 }
d17fa6fa 1166 unregister_netdevice_many(&list);
1da177e4
LT
1167 return NOTIFY_DONE;
1168}
1169
1170
c354e124 1171static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1172 .notifier_call = ipmr_device_event,
1173};
1174
1175/*
1176 * Encapsulate a packet by attaching a valid IPIP header to it.
1177 * This avoids tunnel drivers and other mess and gives us the speed so
1178 * important for multicast video.
1179 */
e905a9ed 1180
114c7844 1181static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1182{
8856dfa3 1183 struct iphdr *iph;
eddc9ec5 1184 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1185
1186 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1187 skb->transport_header = skb->network_header;
8856dfa3 1188 skb_reset_network_header(skb);
eddc9ec5 1189 iph = ip_hdr(skb);
1da177e4
LT
1190
1191 iph->version = 4;
e023dd64
ACM
1192 iph->tos = old_iph->tos;
1193 iph->ttl = old_iph->ttl;
1da177e4
LT
1194 iph->frag_off = 0;
1195 iph->daddr = daddr;
1196 iph->saddr = saddr;
1197 iph->protocol = IPPROTO_IPIP;
1198 iph->ihl = 5;
1199 iph->tot_len = htons(skb->len);
adf30907 1200 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1201 ip_send_check(iph);
1202
1da177e4
LT
1203 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1204 nf_reset(skb);
1205}
1206
1207static inline int ipmr_forward_finish(struct sk_buff *skb)
1208{
1209 struct ip_options * opt = &(IPCB(skb)->opt);
1210
adf30907 1211 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1212
1213 if (unlikely(opt->optlen))
1214 ip_forward_options(skb);
1215
1216 return dst_output(skb);
1217}
1218
1219/*
1220 * Processing handlers for ipmr_forward
1221 */
1222
d658f8a0
PM
1223static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
1224 struct mfc_cache *c, int vifi)
1da177e4 1225{
eddc9ec5 1226 const struct iphdr *iph = ip_hdr(skb);
4feb88e5 1227 struct vif_device *vif = &net->ipv4.vif_table[vifi];
1da177e4
LT
1228 struct net_device *dev;
1229 struct rtable *rt;
1230 int encap = 0;
1231
1232 if (vif->dev == NULL)
1233 goto out_free;
1234
1235#ifdef CONFIG_IP_PIMSM
1236 if (vif->flags & VIFF_REGISTER) {
1237 vif->pkt_out++;
c354e124 1238 vif->bytes_out += skb->len;
cf3677ae
PE
1239 vif->dev->stats.tx_bytes += skb->len;
1240 vif->dev->stats.tx_packets++;
4feb88e5 1241 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1242 goto out_free;
1da177e4
LT
1243 }
1244#endif
1245
1246 if (vif->flags&VIFF_TUNNEL) {
1247 struct flowi fl = { .oif = vif->link,
1248 .nl_u = { .ip4_u =
1249 { .daddr = vif->remote,
1250 .saddr = vif->local,
1251 .tos = RT_TOS(iph->tos) } },
1252 .proto = IPPROTO_IPIP };
4feb88e5 1253 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1254 goto out_free;
1255 encap = sizeof(struct iphdr);
1256 } else {
1257 struct flowi fl = { .oif = vif->link,
1258 .nl_u = { .ip4_u =
1259 { .daddr = iph->daddr,
1260 .tos = RT_TOS(iph->tos) } },
1261 .proto = IPPROTO_IPIP };
4feb88e5 1262 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1263 goto out_free;
1264 }
1265
1266 dev = rt->u.dst.dev;
1267
1268 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1269 /* Do not fragment multicasts. Alas, IPv4 does not
1270 allow to send ICMP, so that packets will disappear
1271 to blackhole.
1272 */
1273
7c73a6fa 1274 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1275 ip_rt_put(rt);
1276 goto out_free;
1277 }
1278
1279 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1280
1281 if (skb_cow(skb, encap)) {
e905a9ed 1282 ip_rt_put(rt);
1da177e4
LT
1283 goto out_free;
1284 }
1285
1286 vif->pkt_out++;
c354e124 1287 vif->bytes_out += skb->len;
1da177e4 1288
adf30907
ED
1289 skb_dst_drop(skb);
1290 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1291 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1292
1293 /* FIXME: forward and output firewalls used to be called here.
1294 * What do we do with netfilter? -- RR */
1295 if (vif->flags & VIFF_TUNNEL) {
1296 ip_encap(skb, vif->local, vif->remote);
1297 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1298 vif->dev->stats.tx_packets++;
1299 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1300 }
1301
1302 IPCB(skb)->flags |= IPSKB_FORWARDED;
1303
1304 /*
1305 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1306 * not only before forwarding, but after forwarding on all output
1307 * interfaces. It is clear, if mrouter runs a multicasting
1308 * program, it should receive packets not depending to what interface
1309 * program is joined.
1310 * If we will not make it, the program will have to join on all
1311 * interfaces. On the other hand, multihoming host (or router, but
1312 * not mrouter) cannot join to more than one interface - it will
1313 * result in receiving multiple packets.
1314 */
6e23ae2a 1315 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1316 ipmr_forward_finish);
1317 return;
1318
1319out_free:
1320 kfree_skb(skb);
1321 return;
1322}
1323
1324static int ipmr_find_vif(struct net_device *dev)
1325{
4feb88e5 1326 struct net *net = dev_net(dev);
1da177e4 1327 int ct;
4feb88e5
BT
1328 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1329 if (net->ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1330 break;
1331 }
1332 return ct;
1333}
1334
1335/* "local" means that we should preserve one skb (for local delivery) */
1336
d658f8a0
PM
1337static int ip_mr_forward(struct net *net, struct sk_buff *skb,
1338 struct mfc_cache *cache, int local)
1da177e4
LT
1339{
1340 int psend = -1;
1341 int vif, ct;
1342
1343 vif = cache->mfc_parent;
1344 cache->mfc_un.res.pkt++;
1345 cache->mfc_un.res.bytes += skb->len;
1346
1347 /*
1348 * Wrong interface: drop packet and (maybe) send PIM assert.
1349 */
4feb88e5 1350 if (net->ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1351 int true_vifi;
1352
511c3f92 1353 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1354 /* It is our own packet, looped back.
1355 Very complicated situation...
1356
1357 The best workaround until routing daemons will be
1358 fixed is not to redistribute packet, if it was
1359 send through wrong interface. It means, that
1360 multicast applications WILL NOT work for
1361 (S,G), which have default multicast route pointing
1362 to wrong oif. In any case, it is not a good
1363 idea to use multicasting applications on router.
1364 */
1365 goto dont_forward;
1366 }
1367
1368 cache->mfc_un.res.wrong_if++;
1369 true_vifi = ipmr_find_vif(skb->dev);
1370
4feb88e5 1371 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1da177e4
LT
1372 /* pimsm uses asserts, when switching from RPT to SPT,
1373 so that we cannot check that packet arrived on an oif.
1374 It is bad, but otherwise we would need to move pretty
1375 large chunk of pimd to kernel. Ough... --ANK
1376 */
4feb88e5 1377 (net->ipv4.mroute_do_pim ||
6f9374a9 1378 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1379 time_after(jiffies,
1da177e4
LT
1380 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1381 cache->mfc_un.res.last_assert = jiffies;
4feb88e5 1382 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1383 }
1384 goto dont_forward;
1385 }
1386
4feb88e5
BT
1387 net->ipv4.vif_table[vif].pkt_in++;
1388 net->ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1389
1390 /*
1391 * Forward the frame
1392 */
1393 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1394 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1395 if (psend != -1) {
1396 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1397 if (skb2)
d658f8a0 1398 ipmr_queue_xmit(net, skb2, cache, psend);
1da177e4 1399 }
c354e124 1400 psend = ct;
1da177e4
LT
1401 }
1402 }
1403 if (psend != -1) {
1404 if (local) {
1405 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1406 if (skb2)
d658f8a0 1407 ipmr_queue_xmit(net, skb2, cache, psend);
1da177e4 1408 } else {
d658f8a0 1409 ipmr_queue_xmit(net, skb, cache, psend);
1da177e4
LT
1410 return 0;
1411 }
1412 }
1413
1414dont_forward:
1415 if (!local)
1416 kfree_skb(skb);
1417 return 0;
1418}
1419
1420
1421/*
1422 * Multicast packets for forwarding arrive here
1423 */
1424
1425int ip_mr_input(struct sk_buff *skb)
1426{
1427 struct mfc_cache *cache;
4feb88e5 1428 struct net *net = dev_net(skb->dev);
511c3f92 1429 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1da177e4
LT
1430
1431 /* Packet is looped back after forward, it should not be
1432 forwarded second time, but still can be delivered locally.
1433 */
1434 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1435 goto dont_forward;
1436
1437 if (!local) {
1438 if (IPCB(skb)->opt.router_alert) {
1439 if (ip_call_ra_chain(skb))
1440 return 0;
eddc9ec5 1441 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1442 /* IGMPv1 (and broken IGMPv2 implementations sort of
1443 Cisco IOS <= 11.2(8)) do not put router alert
1444 option to IGMP packets destined to routable
1445 groups. It is very bad, because it means
1446 that we can forward NO IGMP messages.
1447 */
1448 read_lock(&mrt_lock);
4feb88e5 1449 if (net->ipv4.mroute_sk) {
2715bcf9 1450 nf_reset(skb);
4feb88e5 1451 raw_rcv(net->ipv4.mroute_sk, skb);
1da177e4
LT
1452 read_unlock(&mrt_lock);
1453 return 0;
1454 }
1455 read_unlock(&mrt_lock);
1456 }
1457 }
1458
1459 read_lock(&mrt_lock);
4feb88e5 1460 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1461
1462 /*
1463 * No usable cache entry
1464 */
c354e124 1465 if (cache == NULL) {
1da177e4
LT
1466 int vif;
1467
1468 if (local) {
1469 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1470 ip_local_deliver(skb);
1471 if (skb2 == NULL) {
1472 read_unlock(&mrt_lock);
1473 return -ENOBUFS;
1474 }
1475 skb = skb2;
1476 }
1477
1478 vif = ipmr_find_vif(skb->dev);
1479 if (vif >= 0) {
4feb88e5 1480 int err = ipmr_cache_unresolved(net, vif, skb);
1da177e4
LT
1481 read_unlock(&mrt_lock);
1482
1483 return err;
1484 }
1485 read_unlock(&mrt_lock);
1486 kfree_skb(skb);
1487 return -ENODEV;
1488 }
1489
d658f8a0 1490 ip_mr_forward(net, skb, cache, local);
1da177e4
LT
1491
1492 read_unlock(&mrt_lock);
1493
1494 if (local)
1495 return ip_local_deliver(skb);
1496
1497 return 0;
1498
1499dont_forward:
1500 if (local)
1501 return ip_local_deliver(skb);
1502 kfree_skb(skb);
1503 return 0;
1504}
1505
b1879204
IJ
1506#ifdef CONFIG_IP_PIMSM
1507static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1508{
b1879204
IJ
1509 struct net_device *reg_dev = NULL;
1510 struct iphdr *encap;
4feb88e5 1511 struct net *net = dev_net(skb->dev);
1da177e4 1512
b1879204 1513 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1514 /*
1515 Check that:
1516 a. packet is really destinted to a multicast group
1517 b. packet is not a NULL-REGISTER
1518 c. packet is not truncated
1519 */
f97c1e0c 1520 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1521 encap->tot_len == 0 ||
b1879204
IJ
1522 ntohs(encap->tot_len) + pimlen > skb->len)
1523 return 1;
1da177e4
LT
1524
1525 read_lock(&mrt_lock);
4feb88e5
BT
1526 if (net->ipv4.mroute_reg_vif_num >= 0)
1527 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1da177e4
LT
1528 if (reg_dev)
1529 dev_hold(reg_dev);
1530 read_unlock(&mrt_lock);
1531
e905a9ed 1532 if (reg_dev == NULL)
b1879204 1533 return 1;
1da177e4 1534
b0e380b1 1535 skb->mac_header = skb->network_header;
1da177e4 1536 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1537 skb_reset_network_header(skb);
1da177e4 1538 skb->dev = reg_dev;
1da177e4
LT
1539 skb->protocol = htons(ETH_P_IP);
1540 skb->ip_summed = 0;
1541 skb->pkt_type = PACKET_HOST;
adf30907 1542 skb_dst_drop(skb);
cf3677ae
PE
1543 reg_dev->stats.rx_bytes += skb->len;
1544 reg_dev->stats.rx_packets++;
1da177e4
LT
1545 nf_reset(skb);
1546 netif_rx(skb);
1547 dev_put(reg_dev);
b1879204 1548
1da177e4 1549 return 0;
b1879204
IJ
1550}
1551#endif
1552
1553#ifdef CONFIG_IP_PIMSM_V1
1554/*
1555 * Handle IGMP messages of PIMv1
1556 */
1557
1558int pim_rcv_v1(struct sk_buff * skb)
1559{
1560 struct igmphdr *pim;
4feb88e5 1561 struct net *net = dev_net(skb->dev);
b1879204
IJ
1562
1563 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1564 goto drop;
1565
1566 pim = igmp_hdr(skb);
1567
4feb88e5 1568 if (!net->ipv4.mroute_do_pim ||
b1879204
IJ
1569 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1570 goto drop;
1571
1572 if (__pim_rcv(skb, sizeof(*pim))) {
1573drop:
1574 kfree_skb(skb);
1575 }
1da177e4
LT
1576 return 0;
1577}
1578#endif
1579
1580#ifdef CONFIG_IP_PIMSM_V2
1581static int pim_rcv(struct sk_buff * skb)
1582{
1583 struct pimreghdr *pim;
1da177e4 1584
b1879204 1585 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1586 goto drop;
1587
9c70220b 1588 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1589 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1590 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1591 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1592 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1593 goto drop;
1594
b1879204
IJ
1595 if (__pim_rcv(skb, sizeof(*pim))) {
1596drop:
1597 kfree_skb(skb);
1598 }
1da177e4
LT
1599 return 0;
1600}
1601#endif
1602
1603static int
d658f8a0
PM
1604ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
1605 struct rtmsg *rtm)
1da177e4
LT
1606{
1607 int ct;
1608 struct rtnexthop *nhp;
27a884dc 1609 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1610 struct rtattr *mp_head;
1611
7438189b
ND
1612 /* If cache is unresolved, don't try to parse IIF and OIF */
1613 if (c->mfc_parent > MAXVIFS)
1614 return -ENOENT;
1615
1616 if (VIF_EXISTS(net, c->mfc_parent))
1617 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1618
c354e124 1619 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1620
1621 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
7438189b 1622 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1623 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1624 goto rtattr_failure;
c354e124 1625 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1626 nhp->rtnh_flags = 0;
1627 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4feb88e5 1628 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1629 nhp->rtnh_len = sizeof(*nhp);
1630 }
1631 }
1632 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1633 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1634 rtm->rtm_type = RTN_MULTICAST;
1635 return 1;
1636
1637rtattr_failure:
dc5fc579 1638 nlmsg_trim(skb, b);
1da177e4
LT
1639 return -EMSGSIZE;
1640}
1641
4feb88e5
BT
1642int ipmr_get_route(struct net *net,
1643 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1644{
1645 int err;
1646 struct mfc_cache *cache;
511c3f92 1647 struct rtable *rt = skb_rtable(skb);
1da177e4
LT
1648
1649 read_lock(&mrt_lock);
4feb88e5 1650 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1da177e4 1651
c354e124 1652 if (cache == NULL) {
72287490 1653 struct sk_buff *skb2;
eddc9ec5 1654 struct iphdr *iph;
1da177e4
LT
1655 struct net_device *dev;
1656 int vif;
1657
1658 if (nowait) {
1659 read_unlock(&mrt_lock);
1660 return -EAGAIN;
1661 }
1662
1663 dev = skb->dev;
1664 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1665 read_unlock(&mrt_lock);
1666 return -ENODEV;
1667 }
72287490
AK
1668 skb2 = skb_clone(skb, GFP_ATOMIC);
1669 if (!skb2) {
1670 read_unlock(&mrt_lock);
1671 return -ENOMEM;
1672 }
1673
e2d1bca7
ACM
1674 skb_push(skb2, sizeof(struct iphdr));
1675 skb_reset_network_header(skb2);
eddc9ec5
ACM
1676 iph = ip_hdr(skb2);
1677 iph->ihl = sizeof(struct iphdr) >> 2;
1678 iph->saddr = rt->rt_src;
1679 iph->daddr = rt->rt_dst;
1680 iph->version = 0;
4feb88e5 1681 err = ipmr_cache_unresolved(net, vif, skb2);
1da177e4
LT
1682 read_unlock(&mrt_lock);
1683 return err;
1684 }
1685
1686 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1687 cache->mfc_flags |= MFC_NOTIFY;
d658f8a0 1688 err = ipmr_fill_mroute(net, skb, cache, rtm);
1da177e4
LT
1689 read_unlock(&mrt_lock);
1690 return err;
1691}
1692
e905a9ed 1693#ifdef CONFIG_PROC_FS
1da177e4
LT
1694/*
1695 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1696 */
1697struct ipmr_vif_iter {
f6bb4514 1698 struct seq_net_private p;
1da177e4
LT
1699 int ct;
1700};
1701
f6bb4514
BT
1702static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1703 struct ipmr_vif_iter *iter,
1da177e4
LT
1704 loff_t pos)
1705{
f6bb4514
BT
1706 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1707 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1708 continue;
e905a9ed 1709 if (pos-- == 0)
f6bb4514 1710 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1711 }
1712 return NULL;
1713}
1714
1715static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1716 __acquires(mrt_lock)
1da177e4 1717{
f6bb4514
BT
1718 struct net *net = seq_file_net(seq);
1719
1da177e4 1720 read_lock(&mrt_lock);
f6bb4514 1721 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1722 : SEQ_START_TOKEN;
1723}
1724
1725static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1726{
1727 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 1728 struct net *net = seq_file_net(seq);
1da177e4
LT
1729
1730 ++*pos;
1731 if (v == SEQ_START_TOKEN)
f6bb4514 1732 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 1733
f6bb4514
BT
1734 while (++iter->ct < net->ipv4.maxvif) {
1735 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1736 continue;
f6bb4514 1737 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1738 }
1739 return NULL;
1740}
1741
1742static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1743 __releases(mrt_lock)
1da177e4
LT
1744{
1745 read_unlock(&mrt_lock);
1746}
1747
1748static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1749{
f6bb4514
BT
1750 struct net *net = seq_file_net(seq);
1751
1da177e4 1752 if (v == SEQ_START_TOKEN) {
e905a9ed 1753 seq_puts(seq,
1da177e4
LT
1754 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1755 } else {
1756 const struct vif_device *vif = v;
1757 const char *name = vif->dev ? vif->dev->name : "none";
1758
1759 seq_printf(seq,
1760 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
f6bb4514 1761 vif - net->ipv4.vif_table,
e905a9ed 1762 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1763 vif->bytes_out, vif->pkt_out,
1764 vif->flags, vif->local, vif->remote);
1765 }
1766 return 0;
1767}
1768
f690808e 1769static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1770 .start = ipmr_vif_seq_start,
1771 .next = ipmr_vif_seq_next,
1772 .stop = ipmr_vif_seq_stop,
1773 .show = ipmr_vif_seq_show,
1774};
1775
1776static int ipmr_vif_open(struct inode *inode, struct file *file)
1777{
f6bb4514
BT
1778 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1779 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1780}
1781
9a32144e 1782static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1783 .owner = THIS_MODULE,
1784 .open = ipmr_vif_open,
1785 .read = seq_read,
1786 .llseek = seq_lseek,
f6bb4514 1787 .release = seq_release_net,
1da177e4
LT
1788};
1789
1790struct ipmr_mfc_iter {
f6bb4514 1791 struct seq_net_private p;
1da177e4
LT
1792 struct mfc_cache **cache;
1793 int ct;
1794};
1795
1796
f6bb4514
BT
1797static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1798 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4
LT
1799{
1800 struct mfc_cache *mfc;
1801
f6bb4514 1802 it->cache = net->ipv4.mfc_cache_array;
1da177e4 1803 read_lock(&mrt_lock);
e905a9ed 1804 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
f6bb4514 1805 for (mfc = net->ipv4.mfc_cache_array[it->ct];
2bb8b26c 1806 mfc; mfc = mfc->next)
e905a9ed 1807 if (pos-- == 0)
1da177e4
LT
1808 return mfc;
1809 read_unlock(&mrt_lock);
1810
e258beb2 1811 it->cache = &net->ipv4.mfc_unres_queue;
1da177e4 1812 spin_lock_bh(&mfc_unres_lock);
e258beb2
PM
1813 for (mfc = net->ipv4.mfc_unres_queue; mfc; mfc = mfc->next)
1814 if (pos-- == 0)
1da177e4
LT
1815 return mfc;
1816 spin_unlock_bh(&mfc_unres_lock);
1817
1818 it->cache = NULL;
1819 return NULL;
1820}
1821
1822
1823static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1824{
1825 struct ipmr_mfc_iter *it = seq->private;
f6bb4514
BT
1826 struct net *net = seq_file_net(seq);
1827
1da177e4
LT
1828 it->cache = NULL;
1829 it->ct = 0;
f6bb4514 1830 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1831 : SEQ_START_TOKEN;
1832}
1833
1834static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1835{
1836 struct mfc_cache *mfc = v;
1837 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1838 struct net *net = seq_file_net(seq);
1da177e4
LT
1839
1840 ++*pos;
1841
1842 if (v == SEQ_START_TOKEN)
f6bb4514 1843 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4
LT
1844
1845 if (mfc->next)
1846 return mfc->next;
e905a9ed 1847
e258beb2 1848 if (it->cache == &net->ipv4.mfc_unres_queue)
1da177e4
LT
1849 goto end_of_list;
1850
f6bb4514 1851 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1da177e4
LT
1852
1853 while (++it->ct < MFC_LINES) {
f6bb4514 1854 mfc = net->ipv4.mfc_cache_array[it->ct];
1da177e4
LT
1855 if (mfc)
1856 return mfc;
1857 }
1858
1859 /* exhausted cache_array, show unresolved */
1860 read_unlock(&mrt_lock);
e258beb2 1861 it->cache = &net->ipv4.mfc_unres_queue;
1da177e4 1862 it->ct = 0;
e905a9ed 1863
1da177e4 1864 spin_lock_bh(&mfc_unres_lock);
e258beb2 1865 mfc = net->ipv4.mfc_unres_queue;
e905a9ed 1866 if (mfc)
1da177e4
LT
1867 return mfc;
1868
1869 end_of_list:
1870 spin_unlock_bh(&mfc_unres_lock);
1871 it->cache = NULL;
1872
1873 return NULL;
1874}
1875
1876static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1877{
1878 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1879 struct net *net = seq_file_net(seq);
1da177e4 1880
e258beb2 1881 if (it->cache == &net->ipv4.mfc_unres_queue)
1da177e4 1882 spin_unlock_bh(&mfc_unres_lock);
f6bb4514 1883 else if (it->cache == net->ipv4.mfc_cache_array)
1da177e4
LT
1884 read_unlock(&mrt_lock);
1885}
1886
1887static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1888{
1889 int n;
f6bb4514 1890 struct net *net = seq_file_net(seq);
1da177e4
LT
1891
1892 if (v == SEQ_START_TOKEN) {
e905a9ed 1893 seq_puts(seq,
1da177e4
LT
1894 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1895 } else {
1896 const struct mfc_cache *mfc = v;
1897 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1898
999890b2 1899 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1900 (unsigned long) mfc->mfc_mcastgrp,
1901 (unsigned long) mfc->mfc_origin,
1ea472e2 1902 mfc->mfc_parent);
1da177e4 1903
e258beb2 1904 if (it->cache != &net->ipv4.mfc_unres_queue) {
1ea472e2
BT
1905 seq_printf(seq, " %8lu %8lu %8lu",
1906 mfc->mfc_un.res.pkt,
1907 mfc->mfc_un.res.bytes,
1908 mfc->mfc_un.res.wrong_if);
132adf54
SH
1909 for (n = mfc->mfc_un.res.minvif;
1910 n < mfc->mfc_un.res.maxvif; n++ ) {
f6bb4514 1911 if (VIF_EXISTS(net, n) &&
cf958ae3
BT
1912 mfc->mfc_un.res.ttls[n] < 255)
1913 seq_printf(seq,
e905a9ed 1914 " %2d:%-3d",
1da177e4
LT
1915 n, mfc->mfc_un.res.ttls[n]);
1916 }
1ea472e2
BT
1917 } else {
1918 /* unresolved mfc_caches don't contain
1919 * pkt, bytes and wrong_if values
1920 */
1921 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1922 }
1923 seq_putc(seq, '\n');
1924 }
1925 return 0;
1926}
1927
f690808e 1928static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1929 .start = ipmr_mfc_seq_start,
1930 .next = ipmr_mfc_seq_next,
1931 .stop = ipmr_mfc_seq_stop,
1932 .show = ipmr_mfc_seq_show,
1933};
1934
1935static int ipmr_mfc_open(struct inode *inode, struct file *file)
1936{
f6bb4514
BT
1937 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1938 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1939}
1940
9a32144e 1941static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1942 .owner = THIS_MODULE,
1943 .open = ipmr_mfc_open,
1944 .read = seq_read,
1945 .llseek = seq_lseek,
f6bb4514 1946 .release = seq_release_net,
1da177e4 1947};
e905a9ed 1948#endif
1da177e4
LT
1949
1950#ifdef CONFIG_IP_PIMSM_V2
32613090 1951static const struct net_protocol pim_protocol = {
1da177e4 1952 .handler = pim_rcv,
403dbb97 1953 .netns_ok = 1,
1da177e4
LT
1954};
1955#endif
1956
1957
1958/*
1959 * Setup for IP multicast routing
1960 */
cf958ae3
BT
1961static int __net_init ipmr_net_init(struct net *net)
1962{
1963 int err = 0;
1964
1965 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1966 GFP_KERNEL);
1967 if (!net->ipv4.vif_table) {
1968 err = -ENOMEM;
1969 goto fail;
1970 }
2bb8b26c
BT
1971
1972 /* Forwarding cache */
1973 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1974 sizeof(struct mfc_cache *),
1975 GFP_KERNEL);
1976 if (!net->ipv4.mfc_cache_array) {
1977 err = -ENOMEM;
1978 goto fail_mfc_cache;
1979 }
6c5143db 1980
e258beb2
PM
1981 setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
1982 (unsigned long)net);
1983
6c5143db
BT
1984#ifdef CONFIG_IP_PIMSM
1985 net->ipv4.mroute_reg_vif_num = -1;
1986#endif
f6bb4514
BT
1987
1988#ifdef CONFIG_PROC_FS
1989 err = -ENOMEM;
1990 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1991 goto proc_vif_fail;
1992 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1993 goto proc_cache_fail;
1994#endif
2bb8b26c
BT
1995 return 0;
1996
f6bb4514
BT
1997#ifdef CONFIG_PROC_FS
1998proc_cache_fail:
1999 proc_net_remove(net, "ip_mr_vif");
2000proc_vif_fail:
2001 kfree(net->ipv4.mfc_cache_array);
2002#endif
2bb8b26c
BT
2003fail_mfc_cache:
2004 kfree(net->ipv4.vif_table);
cf958ae3
BT
2005fail:
2006 return err;
2007}
2008
2009static void __net_exit ipmr_net_exit(struct net *net)
2010{
f6bb4514
BT
2011#ifdef CONFIG_PROC_FS
2012 proc_net_remove(net, "ip_mr_cache");
2013 proc_net_remove(net, "ip_mr_vif");
2014#endif
2bb8b26c 2015 kfree(net->ipv4.mfc_cache_array);
cf958ae3
BT
2016 kfree(net->ipv4.vif_table);
2017}
2018
2019static struct pernet_operations ipmr_net_ops = {
2020 .init = ipmr_net_init,
2021 .exit = ipmr_net_exit,
2022};
e905a9ed 2023
03d2f897 2024int __init ip_mr_init(void)
1da177e4 2025{
03d2f897
WC
2026 int err;
2027
1da177e4
LT
2028 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2029 sizeof(struct mfc_cache),
e5d679f3 2030 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2031 NULL);
03d2f897
WC
2032 if (!mrt_cachep)
2033 return -ENOMEM;
2034
cf958ae3
BT
2035 err = register_pernet_subsys(&ipmr_net_ops);
2036 if (err)
2037 goto reg_pernet_fail;
2038
03d2f897
WC
2039 err = register_netdevice_notifier(&ip_mr_notifier);
2040 if (err)
2041 goto reg_notif_fail;
403dbb97
TG
2042#ifdef CONFIG_IP_PIMSM_V2
2043 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2044 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2045 err = -EAGAIN;
2046 goto add_proto_fail;
2047 }
2048#endif
03d2f897 2049 return 0;
f6bb4514 2050
403dbb97
TG
2051#ifdef CONFIG_IP_PIMSM_V2
2052add_proto_fail:
2053 unregister_netdevice_notifier(&ip_mr_notifier);
2054#endif
c3e38896 2055reg_notif_fail:
cf958ae3
BT
2056 unregister_pernet_subsys(&ipmr_net_ops);
2057reg_pernet_fail:
c3e38896 2058 kmem_cache_destroy(mrt_cachep);
03d2f897 2059 return err;
1da177e4 2060}