]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipmr.c
ipv4: ipmr: move unres_queue and timer to per-namespace data
[net-next-2.6.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
1da177e4
LT
66
67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68#define CONFIG_IP_PIMSM 1
69#endif
70
1da177e4
LT
71/* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
73 */
74
75static DEFINE_RWLOCK(mrt_lock);
76
77/*
78 * Multicast router control variables
79 */
80
cf958ae3 81#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4 82
1da177e4
LT
83/* Special spinlock for queue of unresolved entries */
84static DEFINE_SPINLOCK(mfc_unres_lock);
85
86/* We return to original Alan's scheme. Hash table of resolved
87 entries is changed only in process context and protected
88 with weak lock mrt_lock. Queue of unresolved entries is protected
89 with strong spinlock mfc_unres_lock.
90
91 In this case data path is free of exclusive locks at all.
92 */
93
e18b890b 94static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4
LT
95
96static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
4feb88e5
BT
97static int ipmr_cache_report(struct net *net,
98 struct sk_buff *pkt, vifi_t vifi, int assert);
1da177e4
LT
99static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
100
1da177e4
LT
101/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
102
d607032d
WC
103static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
104{
4feb88e5
BT
105 struct net *net = dev_net(dev);
106
d607032d
WC
107 dev_close(dev);
108
4feb88e5 109 dev = __dev_get_by_name(net, "tunl0");
d607032d 110 if (dev) {
5bc3eb7e 111 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 112 struct ifreq ifr;
d607032d
WC
113 struct ip_tunnel_parm p;
114
115 memset(&p, 0, sizeof(p));
116 p.iph.daddr = v->vifc_rmt_addr.s_addr;
117 p.iph.saddr = v->vifc_lcl_addr.s_addr;
118 p.iph.version = 4;
119 p.iph.ihl = 5;
120 p.iph.protocol = IPPROTO_IPIP;
121 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
122 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
123
5bc3eb7e
SH
124 if (ops->ndo_do_ioctl) {
125 mm_segment_t oldfs = get_fs();
126
127 set_fs(KERNEL_DS);
128 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
129 set_fs(oldfs);
130 }
d607032d
WC
131 }
132}
133
1da177e4 134static
4feb88e5 135struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
136{
137 struct net_device *dev;
138
4feb88e5 139 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
140
141 if (dev) {
5bc3eb7e 142 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
143 int err;
144 struct ifreq ifr;
1da177e4
LT
145 struct ip_tunnel_parm p;
146 struct in_device *in_dev;
147
148 memset(&p, 0, sizeof(p));
149 p.iph.daddr = v->vifc_rmt_addr.s_addr;
150 p.iph.saddr = v->vifc_lcl_addr.s_addr;
151 p.iph.version = 4;
152 p.iph.ihl = 5;
153 p.iph.protocol = IPPROTO_IPIP;
154 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 155 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 156
5bc3eb7e
SH
157 if (ops->ndo_do_ioctl) {
158 mm_segment_t oldfs = get_fs();
159
160 set_fs(KERNEL_DS);
161 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
162 set_fs(oldfs);
163 } else
164 err = -EOPNOTSUPP;
1da177e4
LT
165
166 dev = NULL;
167
4feb88e5
BT
168 if (err == 0 &&
169 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
170 dev->flags |= IFF_MULTICAST;
171
e5ed6399 172 in_dev = __in_dev_get_rtnl(dev);
71e27da9 173 if (in_dev == NULL)
1da177e4 174 goto failure;
71e27da9
HX
175
176 ipv4_devconf_setall(in_dev);
177 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
178
179 if (dev_open(dev))
180 goto failure;
7dc00c82 181 dev_hold(dev);
1da177e4
LT
182 }
183 }
184 return dev;
185
186failure:
187 /* allow the register to be completed before unregistering. */
188 rtnl_unlock();
189 rtnl_lock();
190
191 unregister_netdevice(dev);
192 return NULL;
193}
194
195#ifdef CONFIG_IP_PIMSM
196
6fef4c0c 197static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 198{
4feb88e5
BT
199 struct net *net = dev_net(dev);
200
1da177e4 201 read_lock(&mrt_lock);
cf3677ae
PE
202 dev->stats.tx_bytes += skb->len;
203 dev->stats.tx_packets++;
4feb88e5
BT
204 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
205 IGMPMSG_WHOLEPKT);
1da177e4
LT
206 read_unlock(&mrt_lock);
207 kfree_skb(skb);
6ed10654 208 return NETDEV_TX_OK;
1da177e4
LT
209}
210
007c3838
SH
211static const struct net_device_ops reg_vif_netdev_ops = {
212 .ndo_start_xmit = reg_vif_xmit,
213};
214
1da177e4
LT
215static void reg_vif_setup(struct net_device *dev)
216{
217 dev->type = ARPHRD_PIMREG;
46f25dff 218 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 219 dev->flags = IFF_NOARP;
007c3838 220 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 221 dev->destructor = free_netdev;
403dbb97 222 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
223}
224
403dbb97 225static struct net_device *ipmr_reg_vif(struct net *net)
1da177e4
LT
226{
227 struct net_device *dev;
228 struct in_device *in_dev;
229
cf3677ae 230 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
231
232 if (dev == NULL)
233 return NULL;
234
403dbb97
TG
235 dev_net_set(dev, net);
236
1da177e4
LT
237 if (register_netdevice(dev)) {
238 free_netdev(dev);
239 return NULL;
240 }
241 dev->iflink = 0;
242
71e27da9
HX
243 rcu_read_lock();
244 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
245 rcu_read_unlock();
1da177e4 246 goto failure;
71e27da9 247 }
1da177e4 248
71e27da9
HX
249 ipv4_devconf_setall(in_dev);
250 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
251 rcu_read_unlock();
1da177e4
LT
252
253 if (dev_open(dev))
254 goto failure;
255
7dc00c82
WC
256 dev_hold(dev);
257
1da177e4
LT
258 return dev;
259
260failure:
261 /* allow the register to be completed before unregistering. */
262 rtnl_unlock();
263 rtnl_lock();
264
265 unregister_netdevice(dev);
266 return NULL;
267}
268#endif
269
270/*
271 * Delete a VIF entry
7dc00c82 272 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 273 */
e905a9ed 274
d17fa6fa
ED
275static int vif_delete(struct net *net, int vifi, int notify,
276 struct list_head *head)
1da177e4
LT
277{
278 struct vif_device *v;
279 struct net_device *dev;
280 struct in_device *in_dev;
281
4feb88e5 282 if (vifi < 0 || vifi >= net->ipv4.maxvif)
1da177e4
LT
283 return -EADDRNOTAVAIL;
284
4feb88e5 285 v = &net->ipv4.vif_table[vifi];
1da177e4
LT
286
287 write_lock_bh(&mrt_lock);
288 dev = v->dev;
289 v->dev = NULL;
290
291 if (!dev) {
292 write_unlock_bh(&mrt_lock);
293 return -EADDRNOTAVAIL;
294 }
295
296#ifdef CONFIG_IP_PIMSM
4feb88e5
BT
297 if (vifi == net->ipv4.mroute_reg_vif_num)
298 net->ipv4.mroute_reg_vif_num = -1;
1da177e4
LT
299#endif
300
4feb88e5 301 if (vifi+1 == net->ipv4.maxvif) {
1da177e4
LT
302 int tmp;
303 for (tmp=vifi-1; tmp>=0; tmp--) {
4feb88e5 304 if (VIF_EXISTS(net, tmp))
1da177e4
LT
305 break;
306 }
4feb88e5 307 net->ipv4.maxvif = tmp+1;
1da177e4
LT
308 }
309
310 write_unlock_bh(&mrt_lock);
311
312 dev_set_allmulti(dev, -1);
313
e5ed6399 314 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 315 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
316 ip_rt_multicast_event(in_dev);
317 }
318
7dc00c82 319 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 320 unregister_netdevice_queue(dev, head);
1da177e4
LT
321
322 dev_put(dev);
323 return 0;
324}
325
5c0a66f5
BT
326static inline void ipmr_cache_free(struct mfc_cache *c)
327{
328 release_net(mfc_net(c));
329 kmem_cache_free(mrt_cachep, c);
330}
331
1da177e4
LT
332/* Destroy an unresolved cache entry, killing queued skbs
333 and reporting error to netlink readers.
334 */
335
336static void ipmr_destroy_unres(struct mfc_cache *c)
337{
338 struct sk_buff *skb;
9ef1d4c7 339 struct nlmsgerr *e;
4feb88e5 340 struct net *net = mfc_net(c);
1da177e4 341
4feb88e5 342 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4 343
c354e124 344 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 345 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
346 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
347 nlh->nlmsg_type = NLMSG_ERROR;
348 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
349 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
350 e = NLMSG_DATA(nlh);
351 e->error = -ETIMEDOUT;
352 memset(&e->msg, 0, sizeof(e->msg));
2942e900 353
4feb88e5 354 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
355 } else
356 kfree_skb(skb);
357 }
358
5c0a66f5 359 ipmr_cache_free(c);
1da177e4
LT
360}
361
362
e258beb2 363/* Timer process for the unresolved queue. */
1da177e4 364
e258beb2 365static void ipmr_expire_process(unsigned long arg)
1da177e4 366{
e258beb2 367 struct net *net = (struct net *)arg;
1da177e4
LT
368 unsigned long now;
369 unsigned long expires;
370 struct mfc_cache *c, **cp;
371
372 if (!spin_trylock(&mfc_unres_lock)) {
e258beb2 373 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
374 return;
375 }
376
e258beb2 377 if (net->ipv4.mfc_unres_queue == NULL)
1da177e4
LT
378 goto out;
379
380 now = jiffies;
381 expires = 10*HZ;
e258beb2 382 cp = &net->ipv4.mfc_unres_queue;
1da177e4
LT
383
384 while ((c=*cp) != NULL) {
385 if (time_after(c->mfc_un.unres.expires, now)) {
386 unsigned long interval = c->mfc_un.unres.expires - now;
387 if (interval < expires)
388 expires = interval;
389 cp = &c->next;
390 continue;
391 }
392
393 *cp = c->next;
394
395 ipmr_destroy_unres(c);
396 }
397
e258beb2
PM
398 if (net->ipv4.mfc_unres_queue != NULL)
399 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
1da177e4
LT
400
401out:
402 spin_unlock(&mfc_unres_lock);
403}
404
405/* Fill oifs list. It is called under write locked mrt_lock. */
406
d1b04c08 407static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
1da177e4
LT
408{
409 int vifi;
4feb88e5 410 struct net *net = mfc_net(cache);
1da177e4
LT
411
412 cache->mfc_un.res.minvif = MAXVIFS;
413 cache->mfc_un.res.maxvif = 0;
414 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
415
4feb88e5
BT
416 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
417 if (VIF_EXISTS(net, vifi) &&
cf958ae3 418 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
419 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
420 if (cache->mfc_un.res.minvif > vifi)
421 cache->mfc_un.res.minvif = vifi;
422 if (cache->mfc_un.res.maxvif <= vifi)
423 cache->mfc_un.res.maxvif = vifi + 1;
424 }
425 }
426}
427
4feb88e5 428static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
1da177e4
LT
429{
430 int vifi = vifc->vifc_vifi;
4feb88e5 431 struct vif_device *v = &net->ipv4.vif_table[vifi];
1da177e4
LT
432 struct net_device *dev;
433 struct in_device *in_dev;
d607032d 434 int err;
1da177e4
LT
435
436 /* Is vif busy ? */
4feb88e5 437 if (VIF_EXISTS(net, vifi))
1da177e4
LT
438 return -EADDRINUSE;
439
440 switch (vifc->vifc_flags) {
441#ifdef CONFIG_IP_PIMSM
442 case VIFF_REGISTER:
443 /*
444 * Special Purpose VIF in PIM
445 * All the packets will be sent to the daemon
446 */
4feb88e5 447 if (net->ipv4.mroute_reg_vif_num >= 0)
1da177e4 448 return -EADDRINUSE;
403dbb97 449 dev = ipmr_reg_vif(net);
1da177e4
LT
450 if (!dev)
451 return -ENOBUFS;
d607032d
WC
452 err = dev_set_allmulti(dev, 1);
453 if (err) {
454 unregister_netdevice(dev);
7dc00c82 455 dev_put(dev);
d607032d
WC
456 return err;
457 }
1da177e4
LT
458 break;
459#endif
e905a9ed 460 case VIFF_TUNNEL:
4feb88e5 461 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
462 if (!dev)
463 return -ENOBUFS;
d607032d
WC
464 err = dev_set_allmulti(dev, 1);
465 if (err) {
466 ipmr_del_tunnel(dev, vifc);
7dc00c82 467 dev_put(dev);
d607032d
WC
468 return err;
469 }
1da177e4 470 break;
ee5e81f0
I
471
472 case VIFF_USE_IFINDEX:
1da177e4 473 case 0:
ee5e81f0
I
474 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
475 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
476 if (dev && dev->ip_ptr == NULL) {
477 dev_put(dev);
478 return -EADDRNOTAVAIL;
479 }
480 } else
481 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
482
1da177e4
LT
483 if (!dev)
484 return -EADDRNOTAVAIL;
d607032d 485 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
486 if (err) {
487 dev_put(dev);
d607032d 488 return err;
7dc00c82 489 }
1da177e4
LT
490 break;
491 default:
492 return -EINVAL;
493 }
494
d0490cfd
DC
495 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
496 dev_put(dev);
1da177e4 497 return -EADDRNOTAVAIL;
d0490cfd 498 }
42f811b8 499 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
500 ip_rt_multicast_event(in_dev);
501
502 /*
503 * Fill in the VIF structures
504 */
c354e124
JK
505 v->rate_limit = vifc->vifc_rate_limit;
506 v->local = vifc->vifc_lcl_addr.s_addr;
507 v->remote = vifc->vifc_rmt_addr.s_addr;
508 v->flags = vifc->vifc_flags;
1da177e4
LT
509 if (!mrtsock)
510 v->flags |= VIFF_STATIC;
c354e124 511 v->threshold = vifc->vifc_threshold;
1da177e4
LT
512 v->bytes_in = 0;
513 v->bytes_out = 0;
514 v->pkt_in = 0;
515 v->pkt_out = 0;
516 v->link = dev->ifindex;
517 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
518 v->link = dev->iflink;
519
520 /* And finish update writing critical data */
521 write_lock_bh(&mrt_lock);
c354e124 522 v->dev = dev;
1da177e4
LT
523#ifdef CONFIG_IP_PIMSM
524 if (v->flags&VIFF_REGISTER)
4feb88e5 525 net->ipv4.mroute_reg_vif_num = vifi;
1da177e4 526#endif
4feb88e5
BT
527 if (vifi+1 > net->ipv4.maxvif)
528 net->ipv4.maxvif = vifi+1;
1da177e4
LT
529 write_unlock_bh(&mrt_lock);
530 return 0;
531}
532
4feb88e5
BT
533static struct mfc_cache *ipmr_cache_find(struct net *net,
534 __be32 origin,
535 __be32 mcastgrp)
1da177e4 536{
c354e124 537 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
538 struct mfc_cache *c;
539
4feb88e5 540 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
1da177e4
LT
541 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
542 break;
543 }
544 return c;
545}
546
547/*
548 * Allocate a multicast cache entry
549 */
5c0a66f5 550static struct mfc_cache *ipmr_cache_alloc(struct net *net)
1da177e4 551{
c354e124
JK
552 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
553 if (c == NULL)
1da177e4 554 return NULL;
1da177e4 555 c->mfc_un.res.minvif = MAXVIFS;
5c0a66f5 556 mfc_net_set(c, net);
1da177e4
LT
557 return c;
558}
559
5c0a66f5 560static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
1da177e4 561{
c354e124
JK
562 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
563 if (c == NULL)
1da177e4 564 return NULL;
1da177e4
LT
565 skb_queue_head_init(&c->mfc_un.unres.unresolved);
566 c->mfc_un.unres.expires = jiffies + 10*HZ;
5c0a66f5 567 mfc_net_set(c, net);
1da177e4
LT
568 return c;
569}
570
571/*
572 * A cache entry has gone into a resolved state from queued
573 */
e905a9ed 574
1da177e4
LT
575static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
576{
577 struct sk_buff *skb;
9ef1d4c7 578 struct nlmsgerr *e;
1da177e4
LT
579
580 /*
581 * Play the pending entries through our router
582 */
583
c354e124 584 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 585 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
586 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
587
588 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
589 nlh->nlmsg_len = (skb_tail_pointer(skb) -
590 (u8 *)nlh);
1da177e4
LT
591 } else {
592 nlh->nlmsg_type = NLMSG_ERROR;
593 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
594 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
595 e = NLMSG_DATA(nlh);
596 e->error = -EMSGSIZE;
597 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 598 }
2942e900 599
4feb88e5 600 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
1da177e4
LT
601 } else
602 ip_mr_forward(skb, c, 0);
603 }
604}
605
606/*
607 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
608 * expects the following bizarre scheme.
609 *
610 * Called under mrt_lock.
611 */
e905a9ed 612
4feb88e5
BT
613static int ipmr_cache_report(struct net *net,
614 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
615{
616 struct sk_buff *skb;
c9bdd4b5 617 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
618 struct igmphdr *igmp;
619 struct igmpmsg *msg;
620 int ret;
621
622#ifdef CONFIG_IP_PIMSM
623 if (assert == IGMPMSG_WHOLEPKT)
624 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
625 else
626#endif
627 skb = alloc_skb(128, GFP_ATOMIC);
628
132adf54 629 if (!skb)
1da177e4
LT
630 return -ENOBUFS;
631
632#ifdef CONFIG_IP_PIMSM
633 if (assert == IGMPMSG_WHOLEPKT) {
634 /* Ugly, but we have no choice with this interface.
635 Duplicate old header, fix ihl, length etc.
636 And all this only to mangle msg->im_msgtype and
637 to set msg->im_mbz to "mbz" :-)
638 */
878c8145
ACM
639 skb_push(skb, sizeof(struct iphdr));
640 skb_reset_network_header(skb);
badff6d0 641 skb_reset_transport_header(skb);
0272ffc4 642 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 643 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
644 msg->im_msgtype = IGMPMSG_WHOLEPKT;
645 msg->im_mbz = 0;
4feb88e5 646 msg->im_vif = net->ipv4.mroute_reg_vif_num;
eddc9ec5
ACM
647 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
648 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
649 sizeof(struct iphdr));
e905a9ed 650 } else
1da177e4 651#endif
e905a9ed
YH
652 {
653
1da177e4
LT
654 /*
655 * Copy the IP header
656 */
657
27a884dc 658 skb->network_header = skb->tail;
ddc7b8e3 659 skb_put(skb, ihl);
27d7ff46 660 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
661 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
662 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 663 msg->im_vif = vifi;
adf30907 664 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
665
666 /*
667 * Add our header
668 */
669
c354e124 670 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
671 igmp->type =
672 msg->im_msgtype = assert;
673 igmp->code = 0;
eddc9ec5 674 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 675 skb->transport_header = skb->network_header;
e905a9ed 676 }
1da177e4 677
4feb88e5 678 if (net->ipv4.mroute_sk == NULL) {
1da177e4
LT
679 kfree_skb(skb);
680 return -EINVAL;
681 }
682
683 /*
684 * Deliver to mrouted
685 */
4feb88e5 686 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
70a269e6 687 if (ret < 0) {
1da177e4
LT
688 if (net_ratelimit())
689 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
690 kfree_skb(skb);
691 }
692
693 return ret;
694}
695
696/*
697 * Queue a packet for resolution. It gets locked cache entry!
698 */
e905a9ed 699
1da177e4 700static int
4feb88e5 701ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
1da177e4
LT
702{
703 int err;
704 struct mfc_cache *c;
eddc9ec5 705 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
706
707 spin_lock_bh(&mfc_unres_lock);
e258beb2
PM
708 for (c=net->ipv4.mfc_unres_queue; c; c=c->next) {
709 if (c->mfc_mcastgrp == iph->daddr &&
eddc9ec5 710 c->mfc_origin == iph->saddr)
1da177e4
LT
711 break;
712 }
713
714 if (c == NULL) {
715 /*
716 * Create a new entry if allowable
717 */
718
4feb88e5
BT
719 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
720 (c = ipmr_cache_alloc_unres(net)) == NULL) {
1da177e4
LT
721 spin_unlock_bh(&mfc_unres_lock);
722
723 kfree_skb(skb);
724 return -ENOBUFS;
725 }
726
727 /*
728 * Fill in the new cache entry
729 */
eddc9ec5
ACM
730 c->mfc_parent = -1;
731 c->mfc_origin = iph->saddr;
732 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
733
734 /*
735 * Reflect first query at mrouted.
736 */
4feb88e5
BT
737 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
738 if (err < 0) {
e905a9ed 739 /* If the report failed throw the cache entry
1da177e4
LT
740 out - Brad Parker
741 */
742 spin_unlock_bh(&mfc_unres_lock);
743
5c0a66f5 744 ipmr_cache_free(c);
1da177e4
LT
745 kfree_skb(skb);
746 return err;
747 }
748
4feb88e5 749 atomic_inc(&net->ipv4.cache_resolve_queue_len);
e258beb2
PM
750 c->next = net->ipv4.mfc_unres_queue;
751 net->ipv4.mfc_unres_queue = c;
1da177e4 752
e258beb2 753 mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
754 }
755
756 /*
757 * See if we can append the packet
758 */
759 if (c->mfc_un.unres.unresolved.qlen>3) {
760 kfree_skb(skb);
761 err = -ENOBUFS;
762 } else {
c354e124 763 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
764 err = 0;
765 }
766
767 spin_unlock_bh(&mfc_unres_lock);
768 return err;
769}
770
771/*
772 * MFC cache manipulation by user space mroute daemon
773 */
774
4feb88e5 775static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
1da177e4
LT
776{
777 int line;
778 struct mfc_cache *c, **cp;
779
c354e124 780 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 781
4feb88e5 782 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 783 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
784 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
785 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
786 write_lock_bh(&mrt_lock);
787 *cp = c->next;
788 write_unlock_bh(&mrt_lock);
789
5c0a66f5 790 ipmr_cache_free(c);
1da177e4
LT
791 return 0;
792 }
793 }
794 return -ENOENT;
795}
796
4feb88e5 797static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
1da177e4
LT
798{
799 int line;
800 struct mfc_cache *uc, *c, **cp;
801
a50436f2
PM
802 if (mfc->mfcc_parent >= MAXVIFS)
803 return -ENFILE;
804
c354e124 805 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 806
4feb88e5 807 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 808 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
809 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
810 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
811 break;
812 }
813
814 if (c != NULL) {
815 write_lock_bh(&mrt_lock);
816 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 817 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
818 if (!mrtsock)
819 c->mfc_flags |= MFC_STATIC;
820 write_unlock_bh(&mrt_lock);
821 return 0;
822 }
823
f97c1e0c 824 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
825 return -EINVAL;
826
4feb88e5 827 c = ipmr_cache_alloc(net);
c354e124 828 if (c == NULL)
1da177e4
LT
829 return -ENOMEM;
830
c354e124
JK
831 c->mfc_origin = mfc->mfcc_origin.s_addr;
832 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
833 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 834 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
835 if (!mrtsock)
836 c->mfc_flags |= MFC_STATIC;
837
838 write_lock_bh(&mrt_lock);
4feb88e5
BT
839 c->next = net->ipv4.mfc_cache_array[line];
840 net->ipv4.mfc_cache_array[line] = c;
1da177e4
LT
841 write_unlock_bh(&mrt_lock);
842
843 /*
844 * Check to see if we resolved a queued list. If so we
845 * need to send on the frames and tidy up.
846 */
847 spin_lock_bh(&mfc_unres_lock);
e258beb2 848 for (cp = &net->ipv4.mfc_unres_queue; (uc=*cp) != NULL;
1da177e4 849 cp = &uc->next) {
e258beb2 850 if (uc->mfc_origin == c->mfc_origin &&
1da177e4
LT
851 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
852 *cp = uc->next;
4feb88e5 853 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4
LT
854 break;
855 }
856 }
e258beb2
PM
857 if (net->ipv4.mfc_unres_queue == NULL)
858 del_timer(&net->ipv4.ipmr_expire_timer);
1da177e4
LT
859 spin_unlock_bh(&mfc_unres_lock);
860
861 if (uc) {
862 ipmr_cache_resolve(uc, c);
5c0a66f5 863 ipmr_cache_free(uc);
1da177e4
LT
864 }
865 return 0;
866}
867
868/*
869 * Close the multicast socket, and clear the vif tables etc
870 */
e905a9ed 871
4feb88e5 872static void mroute_clean_tables(struct net *net)
1da177e4
LT
873{
874 int i;
d17fa6fa 875 LIST_HEAD(list);
e905a9ed 876
1da177e4
LT
877 /*
878 * Shut down all active vif entries
879 */
4feb88e5
BT
880 for (i = 0; i < net->ipv4.maxvif; i++) {
881 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
d17fa6fa 882 vif_delete(net, i, 0, &list);
1da177e4 883 }
d17fa6fa 884 unregister_netdevice_many(&list);
1da177e4
LT
885
886 /*
887 * Wipe the cache
888 */
c354e124 889 for (i=0; i<MFC_LINES; i++) {
1da177e4
LT
890 struct mfc_cache *c, **cp;
891
4feb88e5 892 cp = &net->ipv4.mfc_cache_array[i];
1da177e4
LT
893 while ((c = *cp) != NULL) {
894 if (c->mfc_flags&MFC_STATIC) {
895 cp = &c->next;
896 continue;
897 }
898 write_lock_bh(&mrt_lock);
899 *cp = c->next;
900 write_unlock_bh(&mrt_lock);
901
5c0a66f5 902 ipmr_cache_free(c);
1da177e4
LT
903 }
904 }
905
4feb88e5 906 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
1e8fb3b6 907 struct mfc_cache *c, **cp;
1da177e4
LT
908
909 spin_lock_bh(&mfc_unres_lock);
e258beb2 910 cp = &net->ipv4.mfc_unres_queue;
1e8fb3b6 911 while ((c = *cp) != NULL) {
1e8fb3b6 912 *cp = c->next;
1da177e4 913 ipmr_destroy_unres(c);
1da177e4
LT
914 }
915 spin_unlock_bh(&mfc_unres_lock);
916 }
917}
918
919static void mrtsock_destruct(struct sock *sk)
920{
4feb88e5
BT
921 struct net *net = sock_net(sk);
922
1da177e4 923 rtnl_lock();
4feb88e5
BT
924 if (sk == net->ipv4.mroute_sk) {
925 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4
LT
926
927 write_lock_bh(&mrt_lock);
4feb88e5 928 net->ipv4.mroute_sk = NULL;
1da177e4
LT
929 write_unlock_bh(&mrt_lock);
930
4feb88e5 931 mroute_clean_tables(net);
1da177e4
LT
932 }
933 rtnl_unlock();
934}
935
936/*
937 * Socket options and virtual interface manipulation. The whole
938 * virtual interface system is a complete heap, but unfortunately
939 * that's how BSD mrouted happens to think. Maybe one day with a proper
940 * MOSPF/PIM router set up we can clean this up.
941 */
e905a9ed 942
b7058842 943int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
944{
945 int ret;
946 struct vifctl vif;
947 struct mfcctl mfc;
4feb88e5 948 struct net *net = sock_net(sk);
e905a9ed 949
132adf54 950 if (optname != MRT_INIT) {
4feb88e5 951 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
952 return -EACCES;
953 }
954
132adf54
SH
955 switch (optname) {
956 case MRT_INIT:
957 if (sk->sk_type != SOCK_RAW ||
c720c7e8 958 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 959 return -EOPNOTSUPP;
c354e124 960 if (optlen != sizeof(int))
132adf54 961 return -ENOPROTOOPT;
1da177e4 962
132adf54 963 rtnl_lock();
4feb88e5 964 if (net->ipv4.mroute_sk) {
1da177e4 965 rtnl_unlock();
132adf54
SH
966 return -EADDRINUSE;
967 }
968
969 ret = ip_ra_control(sk, 1, mrtsock_destruct);
970 if (ret == 0) {
971 write_lock_bh(&mrt_lock);
4feb88e5 972 net->ipv4.mroute_sk = sk;
132adf54
SH
973 write_unlock_bh(&mrt_lock);
974
4feb88e5 975 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
976 }
977 rtnl_unlock();
978 return ret;
979 case MRT_DONE:
4feb88e5 980 if (sk != net->ipv4.mroute_sk)
132adf54
SH
981 return -EACCES;
982 return ip_ra_control(sk, 0, NULL);
983 case MRT_ADD_VIF:
984 case MRT_DEL_VIF:
c354e124 985 if (optlen != sizeof(vif))
132adf54 986 return -EINVAL;
c354e124 987 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
988 return -EFAULT;
989 if (vif.vifc_vifi >= MAXVIFS)
990 return -ENFILE;
991 rtnl_lock();
c354e124 992 if (optname == MRT_ADD_VIF) {
4feb88e5 993 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
132adf54 994 } else {
d17fa6fa 995 ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
132adf54
SH
996 }
997 rtnl_unlock();
998 return ret;
1da177e4
LT
999
1000 /*
1001 * Manipulate the forwarding caches. These live
1002 * in a sort of kernel/user symbiosis.
1003 */
132adf54
SH
1004 case MRT_ADD_MFC:
1005 case MRT_DEL_MFC:
c354e124 1006 if (optlen != sizeof(mfc))
132adf54 1007 return -EINVAL;
c354e124 1008 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1009 return -EFAULT;
1010 rtnl_lock();
c354e124 1011 if (optname == MRT_DEL_MFC)
4feb88e5 1012 ret = ipmr_mfc_delete(net, &mfc);
132adf54 1013 else
4feb88e5 1014 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
132adf54
SH
1015 rtnl_unlock();
1016 return ret;
1da177e4
LT
1017 /*
1018 * Control PIM assert.
1019 */
132adf54
SH
1020 case MRT_ASSERT:
1021 {
1022 int v;
1023 if (get_user(v,(int __user *)optval))
1024 return -EFAULT;
4feb88e5 1025 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1026 return 0;
1027 }
1da177e4 1028#ifdef CONFIG_IP_PIMSM
132adf54
SH
1029 case MRT_PIM:
1030 {
ba93ef74
SH
1031 int v;
1032
132adf54
SH
1033 if (get_user(v,(int __user *)optval))
1034 return -EFAULT;
ba93ef74
SH
1035 v = (v) ? 1 : 0;
1036
132adf54
SH
1037 rtnl_lock();
1038 ret = 0;
4feb88e5
BT
1039 if (v != net->ipv4.mroute_do_pim) {
1040 net->ipv4.mroute_do_pim = v;
1041 net->ipv4.mroute_do_assert = v;
1da177e4 1042 }
132adf54
SH
1043 rtnl_unlock();
1044 return ret;
1045 }
1da177e4 1046#endif
132adf54
SH
1047 /*
1048 * Spurious command, or MRT_VERSION which you cannot
1049 * set.
1050 */
1051 default:
1052 return -ENOPROTOOPT;
1da177e4
LT
1053 }
1054}
1055
1056/*
1057 * Getsock opt support for the multicast routing system.
1058 */
e905a9ed 1059
c354e124 1060int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1061{
1062 int olr;
1063 int val;
4feb88e5 1064 struct net *net = sock_net(sk);
1da177e4 1065
c354e124 1066 if (optname != MRT_VERSION &&
1da177e4
LT
1067#ifdef CONFIG_IP_PIMSM
1068 optname!=MRT_PIM &&
1069#endif
1070 optname!=MRT_ASSERT)
1071 return -ENOPROTOOPT;
1072
1073 if (get_user(olr, optlen))
1074 return -EFAULT;
1075
1076 olr = min_t(unsigned int, olr, sizeof(int));
1077 if (olr < 0)
1078 return -EINVAL;
e905a9ed 1079
c354e124 1080 if (put_user(olr, optlen))
1da177e4 1081 return -EFAULT;
c354e124
JK
1082 if (optname == MRT_VERSION)
1083 val = 0x0305;
1da177e4 1084#ifdef CONFIG_IP_PIMSM
c354e124 1085 else if (optname == MRT_PIM)
4feb88e5 1086 val = net->ipv4.mroute_do_pim;
1da177e4
LT
1087#endif
1088 else
4feb88e5 1089 val = net->ipv4.mroute_do_assert;
c354e124 1090 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1091 return -EFAULT;
1092 return 0;
1093}
1094
1095/*
1096 * The IP multicast ioctl support routines.
1097 */
e905a9ed 1098
1da177e4
LT
1099int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1100{
1101 struct sioc_sg_req sr;
1102 struct sioc_vif_req vr;
1103 struct vif_device *vif;
1104 struct mfc_cache *c;
4feb88e5 1105 struct net *net = sock_net(sk);
e905a9ed 1106
132adf54
SH
1107 switch (cmd) {
1108 case SIOCGETVIFCNT:
c354e124 1109 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1110 return -EFAULT;
4feb88e5 1111 if (vr.vifi >= net->ipv4.maxvif)
132adf54
SH
1112 return -EINVAL;
1113 read_lock(&mrt_lock);
4feb88e5
BT
1114 vif = &net->ipv4.vif_table[vr.vifi];
1115 if (VIF_EXISTS(net, vr.vifi)) {
c354e124
JK
1116 vr.icount = vif->pkt_in;
1117 vr.ocount = vif->pkt_out;
1118 vr.ibytes = vif->bytes_in;
1119 vr.obytes = vif->bytes_out;
1da177e4 1120 read_unlock(&mrt_lock);
1da177e4 1121
c354e124 1122 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1123 return -EFAULT;
1124 return 0;
1125 }
1126 read_unlock(&mrt_lock);
1127 return -EADDRNOTAVAIL;
1128 case SIOCGETSGCNT:
c354e124 1129 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1130 return -EFAULT;
1131
1132 read_lock(&mrt_lock);
4feb88e5 1133 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1134 if (c) {
1135 sr.pktcnt = c->mfc_un.res.pkt;
1136 sr.bytecnt = c->mfc_un.res.bytes;
1137 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1138 read_unlock(&mrt_lock);
132adf54 1139
c354e124 1140 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1141 return -EFAULT;
1142 return 0;
1143 }
1144 read_unlock(&mrt_lock);
1145 return -EADDRNOTAVAIL;
1146 default:
1147 return -ENOIOCTLCMD;
1da177e4
LT
1148 }
1149}
1150
1151
1152static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1153{
e9dc8653 1154 struct net_device *dev = ptr;
4feb88e5 1155 struct net *net = dev_net(dev);
1da177e4
LT
1156 struct vif_device *v;
1157 int ct;
d17fa6fa 1158 LIST_HEAD(list);
e9dc8653 1159
1da177e4
LT
1160 if (event != NETDEV_UNREGISTER)
1161 return NOTIFY_DONE;
4feb88e5
BT
1162 v = &net->ipv4.vif_table[0];
1163 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
c354e124 1164 if (v->dev == dev)
d17fa6fa 1165 vif_delete(net, ct, 1, &list);
1da177e4 1166 }
d17fa6fa 1167 unregister_netdevice_many(&list);
1da177e4
LT
1168 return NOTIFY_DONE;
1169}
1170
1171
c354e124 1172static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1173 .notifier_call = ipmr_device_event,
1174};
1175
1176/*
1177 * Encapsulate a packet by attaching a valid IPIP header to it.
1178 * This avoids tunnel drivers and other mess and gives us the speed so
1179 * important for multicast video.
1180 */
e905a9ed 1181
114c7844 1182static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1183{
8856dfa3 1184 struct iphdr *iph;
eddc9ec5 1185 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1186
1187 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1188 skb->transport_header = skb->network_header;
8856dfa3 1189 skb_reset_network_header(skb);
eddc9ec5 1190 iph = ip_hdr(skb);
1da177e4
LT
1191
1192 iph->version = 4;
e023dd64
ACM
1193 iph->tos = old_iph->tos;
1194 iph->ttl = old_iph->ttl;
1da177e4
LT
1195 iph->frag_off = 0;
1196 iph->daddr = daddr;
1197 iph->saddr = saddr;
1198 iph->protocol = IPPROTO_IPIP;
1199 iph->ihl = 5;
1200 iph->tot_len = htons(skb->len);
adf30907 1201 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1202 ip_send_check(iph);
1203
1da177e4
LT
1204 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1205 nf_reset(skb);
1206}
1207
1208static inline int ipmr_forward_finish(struct sk_buff *skb)
1209{
1210 struct ip_options * opt = &(IPCB(skb)->opt);
1211
adf30907 1212 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1213
1214 if (unlikely(opt->optlen))
1215 ip_forward_options(skb);
1216
1217 return dst_output(skb);
1218}
1219
1220/*
1221 * Processing handlers for ipmr_forward
1222 */
1223
1224static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1225{
4feb88e5 1226 struct net *net = mfc_net(c);
eddc9ec5 1227 const struct iphdr *iph = ip_hdr(skb);
4feb88e5 1228 struct vif_device *vif = &net->ipv4.vif_table[vifi];
1da177e4
LT
1229 struct net_device *dev;
1230 struct rtable *rt;
1231 int encap = 0;
1232
1233 if (vif->dev == NULL)
1234 goto out_free;
1235
1236#ifdef CONFIG_IP_PIMSM
1237 if (vif->flags & VIFF_REGISTER) {
1238 vif->pkt_out++;
c354e124 1239 vif->bytes_out += skb->len;
cf3677ae
PE
1240 vif->dev->stats.tx_bytes += skb->len;
1241 vif->dev->stats.tx_packets++;
4feb88e5 1242 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1243 goto out_free;
1da177e4
LT
1244 }
1245#endif
1246
1247 if (vif->flags&VIFF_TUNNEL) {
1248 struct flowi fl = { .oif = vif->link,
1249 .nl_u = { .ip4_u =
1250 { .daddr = vif->remote,
1251 .saddr = vif->local,
1252 .tos = RT_TOS(iph->tos) } },
1253 .proto = IPPROTO_IPIP };
4feb88e5 1254 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1255 goto out_free;
1256 encap = sizeof(struct iphdr);
1257 } else {
1258 struct flowi fl = { .oif = vif->link,
1259 .nl_u = { .ip4_u =
1260 { .daddr = iph->daddr,
1261 .tos = RT_TOS(iph->tos) } },
1262 .proto = IPPROTO_IPIP };
4feb88e5 1263 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1264 goto out_free;
1265 }
1266
1267 dev = rt->u.dst.dev;
1268
1269 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1270 /* Do not fragment multicasts. Alas, IPv4 does not
1271 allow to send ICMP, so that packets will disappear
1272 to blackhole.
1273 */
1274
7c73a6fa 1275 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1276 ip_rt_put(rt);
1277 goto out_free;
1278 }
1279
1280 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1281
1282 if (skb_cow(skb, encap)) {
e905a9ed 1283 ip_rt_put(rt);
1da177e4
LT
1284 goto out_free;
1285 }
1286
1287 vif->pkt_out++;
c354e124 1288 vif->bytes_out += skb->len;
1da177e4 1289
adf30907
ED
1290 skb_dst_drop(skb);
1291 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1292 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1293
1294 /* FIXME: forward and output firewalls used to be called here.
1295 * What do we do with netfilter? -- RR */
1296 if (vif->flags & VIFF_TUNNEL) {
1297 ip_encap(skb, vif->local, vif->remote);
1298 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1299 vif->dev->stats.tx_packets++;
1300 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1301 }
1302
1303 IPCB(skb)->flags |= IPSKB_FORWARDED;
1304
1305 /*
1306 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1307 * not only before forwarding, but after forwarding on all output
1308 * interfaces. It is clear, if mrouter runs a multicasting
1309 * program, it should receive packets not depending to what interface
1310 * program is joined.
1311 * If we will not make it, the program will have to join on all
1312 * interfaces. On the other hand, multihoming host (or router, but
1313 * not mrouter) cannot join to more than one interface - it will
1314 * result in receiving multiple packets.
1315 */
6e23ae2a 1316 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1317 ipmr_forward_finish);
1318 return;
1319
1320out_free:
1321 kfree_skb(skb);
1322 return;
1323}
1324
1325static int ipmr_find_vif(struct net_device *dev)
1326{
4feb88e5 1327 struct net *net = dev_net(dev);
1da177e4 1328 int ct;
4feb88e5
BT
1329 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1330 if (net->ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1331 break;
1332 }
1333 return ct;
1334}
1335
1336/* "local" means that we should preserve one skb (for local delivery) */
1337
1338static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1339{
1340 int psend = -1;
1341 int vif, ct;
4feb88e5 1342 struct net *net = mfc_net(cache);
1da177e4
LT
1343
1344 vif = cache->mfc_parent;
1345 cache->mfc_un.res.pkt++;
1346 cache->mfc_un.res.bytes += skb->len;
1347
1348 /*
1349 * Wrong interface: drop packet and (maybe) send PIM assert.
1350 */
4feb88e5 1351 if (net->ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1352 int true_vifi;
1353
511c3f92 1354 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1355 /* It is our own packet, looped back.
1356 Very complicated situation...
1357
1358 The best workaround until routing daemons will be
1359 fixed is not to redistribute packet, if it was
1360 send through wrong interface. It means, that
1361 multicast applications WILL NOT work for
1362 (S,G), which have default multicast route pointing
1363 to wrong oif. In any case, it is not a good
1364 idea to use multicasting applications on router.
1365 */
1366 goto dont_forward;
1367 }
1368
1369 cache->mfc_un.res.wrong_if++;
1370 true_vifi = ipmr_find_vif(skb->dev);
1371
4feb88e5 1372 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1da177e4
LT
1373 /* pimsm uses asserts, when switching from RPT to SPT,
1374 so that we cannot check that packet arrived on an oif.
1375 It is bad, but otherwise we would need to move pretty
1376 large chunk of pimd to kernel. Ough... --ANK
1377 */
4feb88e5 1378 (net->ipv4.mroute_do_pim ||
6f9374a9 1379 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1380 time_after(jiffies,
1da177e4
LT
1381 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1382 cache->mfc_un.res.last_assert = jiffies;
4feb88e5 1383 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1384 }
1385 goto dont_forward;
1386 }
1387
4feb88e5
BT
1388 net->ipv4.vif_table[vif].pkt_in++;
1389 net->ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1390
1391 /*
1392 * Forward the frame
1393 */
1394 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1395 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1396 if (psend != -1) {
1397 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1398 if (skb2)
1399 ipmr_queue_xmit(skb2, cache, psend);
1400 }
c354e124 1401 psend = ct;
1da177e4
LT
1402 }
1403 }
1404 if (psend != -1) {
1405 if (local) {
1406 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1407 if (skb2)
1408 ipmr_queue_xmit(skb2, cache, psend);
1409 } else {
1410 ipmr_queue_xmit(skb, cache, psend);
1411 return 0;
1412 }
1413 }
1414
1415dont_forward:
1416 if (!local)
1417 kfree_skb(skb);
1418 return 0;
1419}
1420
1421
1422/*
1423 * Multicast packets for forwarding arrive here
1424 */
1425
1426int ip_mr_input(struct sk_buff *skb)
1427{
1428 struct mfc_cache *cache;
4feb88e5 1429 struct net *net = dev_net(skb->dev);
511c3f92 1430 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1da177e4
LT
1431
1432 /* Packet is looped back after forward, it should not be
1433 forwarded second time, but still can be delivered locally.
1434 */
1435 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1436 goto dont_forward;
1437
1438 if (!local) {
1439 if (IPCB(skb)->opt.router_alert) {
1440 if (ip_call_ra_chain(skb))
1441 return 0;
eddc9ec5 1442 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1443 /* IGMPv1 (and broken IGMPv2 implementations sort of
1444 Cisco IOS <= 11.2(8)) do not put router alert
1445 option to IGMP packets destined to routable
1446 groups. It is very bad, because it means
1447 that we can forward NO IGMP messages.
1448 */
1449 read_lock(&mrt_lock);
4feb88e5 1450 if (net->ipv4.mroute_sk) {
2715bcf9 1451 nf_reset(skb);
4feb88e5 1452 raw_rcv(net->ipv4.mroute_sk, skb);
1da177e4
LT
1453 read_unlock(&mrt_lock);
1454 return 0;
1455 }
1456 read_unlock(&mrt_lock);
1457 }
1458 }
1459
1460 read_lock(&mrt_lock);
4feb88e5 1461 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1462
1463 /*
1464 * No usable cache entry
1465 */
c354e124 1466 if (cache == NULL) {
1da177e4
LT
1467 int vif;
1468
1469 if (local) {
1470 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1471 ip_local_deliver(skb);
1472 if (skb2 == NULL) {
1473 read_unlock(&mrt_lock);
1474 return -ENOBUFS;
1475 }
1476 skb = skb2;
1477 }
1478
1479 vif = ipmr_find_vif(skb->dev);
1480 if (vif >= 0) {
4feb88e5 1481 int err = ipmr_cache_unresolved(net, vif, skb);
1da177e4
LT
1482 read_unlock(&mrt_lock);
1483
1484 return err;
1485 }
1486 read_unlock(&mrt_lock);
1487 kfree_skb(skb);
1488 return -ENODEV;
1489 }
1490
1491 ip_mr_forward(skb, cache, local);
1492
1493 read_unlock(&mrt_lock);
1494
1495 if (local)
1496 return ip_local_deliver(skb);
1497
1498 return 0;
1499
1500dont_forward:
1501 if (local)
1502 return ip_local_deliver(skb);
1503 kfree_skb(skb);
1504 return 0;
1505}
1506
b1879204
IJ
1507#ifdef CONFIG_IP_PIMSM
1508static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1509{
b1879204
IJ
1510 struct net_device *reg_dev = NULL;
1511 struct iphdr *encap;
4feb88e5 1512 struct net *net = dev_net(skb->dev);
1da177e4 1513
b1879204 1514 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1515 /*
1516 Check that:
1517 a. packet is really destinted to a multicast group
1518 b. packet is not a NULL-REGISTER
1519 c. packet is not truncated
1520 */
f97c1e0c 1521 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1522 encap->tot_len == 0 ||
b1879204
IJ
1523 ntohs(encap->tot_len) + pimlen > skb->len)
1524 return 1;
1da177e4
LT
1525
1526 read_lock(&mrt_lock);
4feb88e5
BT
1527 if (net->ipv4.mroute_reg_vif_num >= 0)
1528 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1da177e4
LT
1529 if (reg_dev)
1530 dev_hold(reg_dev);
1531 read_unlock(&mrt_lock);
1532
e905a9ed 1533 if (reg_dev == NULL)
b1879204 1534 return 1;
1da177e4 1535
b0e380b1 1536 skb->mac_header = skb->network_header;
1da177e4 1537 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1538 skb_reset_network_header(skb);
1da177e4 1539 skb->dev = reg_dev;
1da177e4
LT
1540 skb->protocol = htons(ETH_P_IP);
1541 skb->ip_summed = 0;
1542 skb->pkt_type = PACKET_HOST;
adf30907 1543 skb_dst_drop(skb);
cf3677ae
PE
1544 reg_dev->stats.rx_bytes += skb->len;
1545 reg_dev->stats.rx_packets++;
1da177e4
LT
1546 nf_reset(skb);
1547 netif_rx(skb);
1548 dev_put(reg_dev);
b1879204 1549
1da177e4 1550 return 0;
b1879204
IJ
1551}
1552#endif
1553
1554#ifdef CONFIG_IP_PIMSM_V1
1555/*
1556 * Handle IGMP messages of PIMv1
1557 */
1558
1559int pim_rcv_v1(struct sk_buff * skb)
1560{
1561 struct igmphdr *pim;
4feb88e5 1562 struct net *net = dev_net(skb->dev);
b1879204
IJ
1563
1564 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1565 goto drop;
1566
1567 pim = igmp_hdr(skb);
1568
4feb88e5 1569 if (!net->ipv4.mroute_do_pim ||
b1879204
IJ
1570 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1571 goto drop;
1572
1573 if (__pim_rcv(skb, sizeof(*pim))) {
1574drop:
1575 kfree_skb(skb);
1576 }
1da177e4
LT
1577 return 0;
1578}
1579#endif
1580
1581#ifdef CONFIG_IP_PIMSM_V2
1582static int pim_rcv(struct sk_buff * skb)
1583{
1584 struct pimreghdr *pim;
1da177e4 1585
b1879204 1586 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1587 goto drop;
1588
9c70220b 1589 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1590 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1591 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1592 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1593 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1594 goto drop;
1595
b1879204
IJ
1596 if (__pim_rcv(skb, sizeof(*pim))) {
1597drop:
1598 kfree_skb(skb);
1599 }
1da177e4
LT
1600 return 0;
1601}
1602#endif
1603
1604static int
1605ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1606{
1607 int ct;
1608 struct rtnexthop *nhp;
4feb88e5 1609 struct net *net = mfc_net(c);
27a884dc 1610 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1611 struct rtattr *mp_head;
1612
7438189b
ND
1613 /* If cache is unresolved, don't try to parse IIF and OIF */
1614 if (c->mfc_parent > MAXVIFS)
1615 return -ENOENT;
1616
1617 if (VIF_EXISTS(net, c->mfc_parent))
1618 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1619
c354e124 1620 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1621
1622 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
7438189b 1623 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1624 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1625 goto rtattr_failure;
c354e124 1626 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1627 nhp->rtnh_flags = 0;
1628 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4feb88e5 1629 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1630 nhp->rtnh_len = sizeof(*nhp);
1631 }
1632 }
1633 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1634 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1635 rtm->rtm_type = RTN_MULTICAST;
1636 return 1;
1637
1638rtattr_failure:
dc5fc579 1639 nlmsg_trim(skb, b);
1da177e4
LT
1640 return -EMSGSIZE;
1641}
1642
4feb88e5
BT
1643int ipmr_get_route(struct net *net,
1644 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1645{
1646 int err;
1647 struct mfc_cache *cache;
511c3f92 1648 struct rtable *rt = skb_rtable(skb);
1da177e4
LT
1649
1650 read_lock(&mrt_lock);
4feb88e5 1651 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1da177e4 1652
c354e124 1653 if (cache == NULL) {
72287490 1654 struct sk_buff *skb2;
eddc9ec5 1655 struct iphdr *iph;
1da177e4
LT
1656 struct net_device *dev;
1657 int vif;
1658
1659 if (nowait) {
1660 read_unlock(&mrt_lock);
1661 return -EAGAIN;
1662 }
1663
1664 dev = skb->dev;
1665 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1666 read_unlock(&mrt_lock);
1667 return -ENODEV;
1668 }
72287490
AK
1669 skb2 = skb_clone(skb, GFP_ATOMIC);
1670 if (!skb2) {
1671 read_unlock(&mrt_lock);
1672 return -ENOMEM;
1673 }
1674
e2d1bca7
ACM
1675 skb_push(skb2, sizeof(struct iphdr));
1676 skb_reset_network_header(skb2);
eddc9ec5
ACM
1677 iph = ip_hdr(skb2);
1678 iph->ihl = sizeof(struct iphdr) >> 2;
1679 iph->saddr = rt->rt_src;
1680 iph->daddr = rt->rt_dst;
1681 iph->version = 0;
4feb88e5 1682 err = ipmr_cache_unresolved(net, vif, skb2);
1da177e4
LT
1683 read_unlock(&mrt_lock);
1684 return err;
1685 }
1686
1687 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1688 cache->mfc_flags |= MFC_NOTIFY;
1689 err = ipmr_fill_mroute(skb, cache, rtm);
1690 read_unlock(&mrt_lock);
1691 return err;
1692}
1693
e905a9ed 1694#ifdef CONFIG_PROC_FS
1da177e4
LT
1695/*
1696 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1697 */
1698struct ipmr_vif_iter {
f6bb4514 1699 struct seq_net_private p;
1da177e4
LT
1700 int ct;
1701};
1702
f6bb4514
BT
1703static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1704 struct ipmr_vif_iter *iter,
1da177e4
LT
1705 loff_t pos)
1706{
f6bb4514
BT
1707 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1708 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1709 continue;
e905a9ed 1710 if (pos-- == 0)
f6bb4514 1711 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1712 }
1713 return NULL;
1714}
1715
1716static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1717 __acquires(mrt_lock)
1da177e4 1718{
f6bb4514
BT
1719 struct net *net = seq_file_net(seq);
1720
1da177e4 1721 read_lock(&mrt_lock);
f6bb4514 1722 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1723 : SEQ_START_TOKEN;
1724}
1725
1726static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1727{
1728 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 1729 struct net *net = seq_file_net(seq);
1da177e4
LT
1730
1731 ++*pos;
1732 if (v == SEQ_START_TOKEN)
f6bb4514 1733 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 1734
f6bb4514
BT
1735 while (++iter->ct < net->ipv4.maxvif) {
1736 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1737 continue;
f6bb4514 1738 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1739 }
1740 return NULL;
1741}
1742
1743static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1744 __releases(mrt_lock)
1da177e4
LT
1745{
1746 read_unlock(&mrt_lock);
1747}
1748
1749static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1750{
f6bb4514
BT
1751 struct net *net = seq_file_net(seq);
1752
1da177e4 1753 if (v == SEQ_START_TOKEN) {
e905a9ed 1754 seq_puts(seq,
1da177e4
LT
1755 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1756 } else {
1757 const struct vif_device *vif = v;
1758 const char *name = vif->dev ? vif->dev->name : "none";
1759
1760 seq_printf(seq,
1761 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
f6bb4514 1762 vif - net->ipv4.vif_table,
e905a9ed 1763 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1764 vif->bytes_out, vif->pkt_out,
1765 vif->flags, vif->local, vif->remote);
1766 }
1767 return 0;
1768}
1769
f690808e 1770static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1771 .start = ipmr_vif_seq_start,
1772 .next = ipmr_vif_seq_next,
1773 .stop = ipmr_vif_seq_stop,
1774 .show = ipmr_vif_seq_show,
1775};
1776
1777static int ipmr_vif_open(struct inode *inode, struct file *file)
1778{
f6bb4514
BT
1779 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1780 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1781}
1782
9a32144e 1783static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1784 .owner = THIS_MODULE,
1785 .open = ipmr_vif_open,
1786 .read = seq_read,
1787 .llseek = seq_lseek,
f6bb4514 1788 .release = seq_release_net,
1da177e4
LT
1789};
1790
1791struct ipmr_mfc_iter {
f6bb4514 1792 struct seq_net_private p;
1da177e4
LT
1793 struct mfc_cache **cache;
1794 int ct;
1795};
1796
1797
f6bb4514
BT
1798static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1799 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4
LT
1800{
1801 struct mfc_cache *mfc;
1802
f6bb4514 1803 it->cache = net->ipv4.mfc_cache_array;
1da177e4 1804 read_lock(&mrt_lock);
e905a9ed 1805 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
f6bb4514 1806 for (mfc = net->ipv4.mfc_cache_array[it->ct];
2bb8b26c 1807 mfc; mfc = mfc->next)
e905a9ed 1808 if (pos-- == 0)
1da177e4
LT
1809 return mfc;
1810 read_unlock(&mrt_lock);
1811
e258beb2 1812 it->cache = &net->ipv4.mfc_unres_queue;
1da177e4 1813 spin_lock_bh(&mfc_unres_lock);
e258beb2
PM
1814 for (mfc = net->ipv4.mfc_unres_queue; mfc; mfc = mfc->next)
1815 if (pos-- == 0)
1da177e4
LT
1816 return mfc;
1817 spin_unlock_bh(&mfc_unres_lock);
1818
1819 it->cache = NULL;
1820 return NULL;
1821}
1822
1823
1824static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1825{
1826 struct ipmr_mfc_iter *it = seq->private;
f6bb4514
BT
1827 struct net *net = seq_file_net(seq);
1828
1da177e4
LT
1829 it->cache = NULL;
1830 it->ct = 0;
f6bb4514 1831 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1832 : SEQ_START_TOKEN;
1833}
1834
1835static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1836{
1837 struct mfc_cache *mfc = v;
1838 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1839 struct net *net = seq_file_net(seq);
1da177e4
LT
1840
1841 ++*pos;
1842
1843 if (v == SEQ_START_TOKEN)
f6bb4514 1844 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4
LT
1845
1846 if (mfc->next)
1847 return mfc->next;
e905a9ed 1848
e258beb2 1849 if (it->cache == &net->ipv4.mfc_unres_queue)
1da177e4
LT
1850 goto end_of_list;
1851
f6bb4514 1852 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1da177e4
LT
1853
1854 while (++it->ct < MFC_LINES) {
f6bb4514 1855 mfc = net->ipv4.mfc_cache_array[it->ct];
1da177e4
LT
1856 if (mfc)
1857 return mfc;
1858 }
1859
1860 /* exhausted cache_array, show unresolved */
1861 read_unlock(&mrt_lock);
e258beb2 1862 it->cache = &net->ipv4.mfc_unres_queue;
1da177e4 1863 it->ct = 0;
e905a9ed 1864
1da177e4 1865 spin_lock_bh(&mfc_unres_lock);
e258beb2 1866 mfc = net->ipv4.mfc_unres_queue;
e905a9ed 1867 if (mfc)
1da177e4
LT
1868 return mfc;
1869
1870 end_of_list:
1871 spin_unlock_bh(&mfc_unres_lock);
1872 it->cache = NULL;
1873
1874 return NULL;
1875}
1876
1877static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1878{
1879 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1880 struct net *net = seq_file_net(seq);
1da177e4 1881
e258beb2 1882 if (it->cache == &net->ipv4.mfc_unres_queue)
1da177e4 1883 spin_unlock_bh(&mfc_unres_lock);
f6bb4514 1884 else if (it->cache == net->ipv4.mfc_cache_array)
1da177e4
LT
1885 read_unlock(&mrt_lock);
1886}
1887
1888static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1889{
1890 int n;
f6bb4514 1891 struct net *net = seq_file_net(seq);
1da177e4
LT
1892
1893 if (v == SEQ_START_TOKEN) {
e905a9ed 1894 seq_puts(seq,
1da177e4
LT
1895 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1896 } else {
1897 const struct mfc_cache *mfc = v;
1898 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1899
999890b2 1900 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1901 (unsigned long) mfc->mfc_mcastgrp,
1902 (unsigned long) mfc->mfc_origin,
1ea472e2 1903 mfc->mfc_parent);
1da177e4 1904
e258beb2 1905 if (it->cache != &net->ipv4.mfc_unres_queue) {
1ea472e2
BT
1906 seq_printf(seq, " %8lu %8lu %8lu",
1907 mfc->mfc_un.res.pkt,
1908 mfc->mfc_un.res.bytes,
1909 mfc->mfc_un.res.wrong_if);
132adf54
SH
1910 for (n = mfc->mfc_un.res.minvif;
1911 n < mfc->mfc_un.res.maxvif; n++ ) {
f6bb4514 1912 if (VIF_EXISTS(net, n) &&
cf958ae3
BT
1913 mfc->mfc_un.res.ttls[n] < 255)
1914 seq_printf(seq,
e905a9ed 1915 " %2d:%-3d",
1da177e4
LT
1916 n, mfc->mfc_un.res.ttls[n]);
1917 }
1ea472e2
BT
1918 } else {
1919 /* unresolved mfc_caches don't contain
1920 * pkt, bytes and wrong_if values
1921 */
1922 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1923 }
1924 seq_putc(seq, '\n');
1925 }
1926 return 0;
1927}
1928
f690808e 1929static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1930 .start = ipmr_mfc_seq_start,
1931 .next = ipmr_mfc_seq_next,
1932 .stop = ipmr_mfc_seq_stop,
1933 .show = ipmr_mfc_seq_show,
1934};
1935
1936static int ipmr_mfc_open(struct inode *inode, struct file *file)
1937{
f6bb4514
BT
1938 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1939 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1940}
1941
9a32144e 1942static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1943 .owner = THIS_MODULE,
1944 .open = ipmr_mfc_open,
1945 .read = seq_read,
1946 .llseek = seq_lseek,
f6bb4514 1947 .release = seq_release_net,
1da177e4 1948};
e905a9ed 1949#endif
1da177e4
LT
1950
1951#ifdef CONFIG_IP_PIMSM_V2
32613090 1952static const struct net_protocol pim_protocol = {
1da177e4 1953 .handler = pim_rcv,
403dbb97 1954 .netns_ok = 1,
1da177e4
LT
1955};
1956#endif
1957
1958
1959/*
1960 * Setup for IP multicast routing
1961 */
cf958ae3
BT
1962static int __net_init ipmr_net_init(struct net *net)
1963{
1964 int err = 0;
1965
1966 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1967 GFP_KERNEL);
1968 if (!net->ipv4.vif_table) {
1969 err = -ENOMEM;
1970 goto fail;
1971 }
2bb8b26c
BT
1972
1973 /* Forwarding cache */
1974 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1975 sizeof(struct mfc_cache *),
1976 GFP_KERNEL);
1977 if (!net->ipv4.mfc_cache_array) {
1978 err = -ENOMEM;
1979 goto fail_mfc_cache;
1980 }
6c5143db 1981
e258beb2
PM
1982 setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
1983 (unsigned long)net);
1984
6c5143db
BT
1985#ifdef CONFIG_IP_PIMSM
1986 net->ipv4.mroute_reg_vif_num = -1;
1987#endif
f6bb4514
BT
1988
1989#ifdef CONFIG_PROC_FS
1990 err = -ENOMEM;
1991 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1992 goto proc_vif_fail;
1993 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1994 goto proc_cache_fail;
1995#endif
2bb8b26c
BT
1996 return 0;
1997
f6bb4514
BT
1998#ifdef CONFIG_PROC_FS
1999proc_cache_fail:
2000 proc_net_remove(net, "ip_mr_vif");
2001proc_vif_fail:
2002 kfree(net->ipv4.mfc_cache_array);
2003#endif
2bb8b26c
BT
2004fail_mfc_cache:
2005 kfree(net->ipv4.vif_table);
cf958ae3
BT
2006fail:
2007 return err;
2008}
2009
2010static void __net_exit ipmr_net_exit(struct net *net)
2011{
f6bb4514
BT
2012#ifdef CONFIG_PROC_FS
2013 proc_net_remove(net, "ip_mr_cache");
2014 proc_net_remove(net, "ip_mr_vif");
2015#endif
2bb8b26c 2016 kfree(net->ipv4.mfc_cache_array);
cf958ae3
BT
2017 kfree(net->ipv4.vif_table);
2018}
2019
2020static struct pernet_operations ipmr_net_ops = {
2021 .init = ipmr_net_init,
2022 .exit = ipmr_net_exit,
2023};
e905a9ed 2024
03d2f897 2025int __init ip_mr_init(void)
1da177e4 2026{
03d2f897
WC
2027 int err;
2028
1da177e4
LT
2029 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2030 sizeof(struct mfc_cache),
e5d679f3 2031 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2032 NULL);
03d2f897
WC
2033 if (!mrt_cachep)
2034 return -ENOMEM;
2035
cf958ae3
BT
2036 err = register_pernet_subsys(&ipmr_net_ops);
2037 if (err)
2038 goto reg_pernet_fail;
2039
03d2f897
WC
2040 err = register_netdevice_notifier(&ip_mr_notifier);
2041 if (err)
2042 goto reg_notif_fail;
403dbb97
TG
2043#ifdef CONFIG_IP_PIMSM_V2
2044 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2045 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2046 err = -EAGAIN;
2047 goto add_proto_fail;
2048 }
2049#endif
03d2f897 2050 return 0;
f6bb4514 2051
403dbb97
TG
2052#ifdef CONFIG_IP_PIMSM_V2
2053add_proto_fail:
2054 unregister_netdevice_notifier(&ip_mr_notifier);
2055#endif
c3e38896 2056reg_notif_fail:
cf958ae3
BT
2057 unregister_pernet_subsys(&ipmr_net_ops);
2058reg_pernet_fail:
c3e38896 2059 kmem_cache_destroy(mrt_cachep);
03d2f897 2060 return err;
1da177e4 2061}