]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/ipmr.c
8582e12e4a62a8b11ec53d129aabbf16842fcc8e
[net-next-2.6.git] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
69
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73
74 static DEFINE_RWLOCK(mrt_lock);
75
76 /*
77  *      Multicast router control variables
78  */
79
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81
82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
83
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91
92    In this case data path is free of exclusive locks at all.
93  */
94
95 static struct kmem_cache *mrt_cachep __read_mostly;
96
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99                              struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101
102 static struct timer_list ipmr_expire_timer;
103
104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
105
106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
107 {
108         struct net *net = dev_net(dev);
109
110         dev_close(dev);
111
112         dev = __dev_get_by_name(net, "tunl0");
113         if (dev) {
114                 const struct net_device_ops *ops = dev->netdev_ops;
115                 struct ifreq ifr;
116                 struct ip_tunnel_parm p;
117
118                 memset(&p, 0, sizeof(p));
119                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
120                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
121                 p.iph.version = 4;
122                 p.iph.ihl = 5;
123                 p.iph.protocol = IPPROTO_IPIP;
124                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
125                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
126
127                 if (ops->ndo_do_ioctl) {
128                         mm_segment_t oldfs = get_fs();
129
130                         set_fs(KERNEL_DS);
131                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
132                         set_fs(oldfs);
133                 }
134         }
135 }
136
137 static
138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
139 {
140         struct net_device  *dev;
141
142         dev = __dev_get_by_name(net, "tunl0");
143
144         if (dev) {
145                 const struct net_device_ops *ops = dev->netdev_ops;
146                 int err;
147                 struct ifreq ifr;
148                 struct ip_tunnel_parm p;
149                 struct in_device  *in_dev;
150
151                 memset(&p, 0, sizeof(p));
152                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
153                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
154                 p.iph.version = 4;
155                 p.iph.ihl = 5;
156                 p.iph.protocol = IPPROTO_IPIP;
157                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
158                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
159
160                 if (ops->ndo_do_ioctl) {
161                         mm_segment_t oldfs = get_fs();
162
163                         set_fs(KERNEL_DS);
164                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
165                         set_fs(oldfs);
166                 } else
167                         err = -EOPNOTSUPP;
168
169                 dev = NULL;
170
171                 if (err == 0 &&
172                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
173                         dev->flags |= IFF_MULTICAST;
174
175                         in_dev = __in_dev_get_rtnl(dev);
176                         if (in_dev == NULL)
177                                 goto failure;
178
179                         ipv4_devconf_setall(in_dev);
180                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
181
182                         if (dev_open(dev))
183                                 goto failure;
184                         dev_hold(dev);
185                 }
186         }
187         return dev;
188
189 failure:
190         /* allow the register to be completed before unregistering. */
191         rtnl_unlock();
192         rtnl_lock();
193
194         unregister_netdevice(dev);
195         return NULL;
196 }
197
198 #ifdef CONFIG_IP_PIMSM
199
200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201 {
202         struct net *net = dev_net(dev);
203
204         read_lock(&mrt_lock);
205         dev->stats.tx_bytes += skb->len;
206         dev->stats.tx_packets++;
207         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
208                           IGMPMSG_WHOLEPKT);
209         read_unlock(&mrt_lock);
210         kfree_skb(skb);
211         return NETDEV_TX_OK;
212 }
213
214 static const struct net_device_ops reg_vif_netdev_ops = {
215         .ndo_start_xmit = reg_vif_xmit,
216 };
217
218 static void reg_vif_setup(struct net_device *dev)
219 {
220         dev->type               = ARPHRD_PIMREG;
221         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
222         dev->flags              = IFF_NOARP;
223         dev->netdev_ops         = &reg_vif_netdev_ops,
224         dev->destructor         = free_netdev;
225         dev->features           |= NETIF_F_NETNS_LOCAL;
226 }
227
228 static struct net_device *ipmr_reg_vif(struct net *net)
229 {
230         struct net_device *dev;
231         struct in_device *in_dev;
232
233         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
234
235         if (dev == NULL)
236                 return NULL;
237
238         dev_net_set(dev, net);
239
240         if (register_netdevice(dev)) {
241                 free_netdev(dev);
242                 return NULL;
243         }
244         dev->iflink = 0;
245
246         rcu_read_lock();
247         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
248                 rcu_read_unlock();
249                 goto failure;
250         }
251
252         ipv4_devconf_setall(in_dev);
253         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
254         rcu_read_unlock();
255
256         if (dev_open(dev))
257                 goto failure;
258
259         dev_hold(dev);
260
261         return dev;
262
263 failure:
264         /* allow the register to be completed before unregistering. */
265         rtnl_unlock();
266         rtnl_lock();
267
268         unregister_netdevice(dev);
269         return NULL;
270 }
271 #endif
272
273 /*
274  *      Delete a VIF entry
275  *      @notify: Set to 1, if the caller is a notifier_call
276  */
277
278 static int vif_delete(struct net *net, int vifi, int notify,
279                       struct list_head *head)
280 {
281         struct vif_device *v;
282         struct net_device *dev;
283         struct in_device *in_dev;
284
285         if (vifi < 0 || vifi >= net->ipv4.maxvif)
286                 return -EADDRNOTAVAIL;
287
288         v = &net->ipv4.vif_table[vifi];
289
290         write_lock_bh(&mrt_lock);
291         dev = v->dev;
292         v->dev = NULL;
293
294         if (!dev) {
295                 write_unlock_bh(&mrt_lock);
296                 return -EADDRNOTAVAIL;
297         }
298
299 #ifdef CONFIG_IP_PIMSM
300         if (vifi == net->ipv4.mroute_reg_vif_num)
301                 net->ipv4.mroute_reg_vif_num = -1;
302 #endif
303
304         if (vifi+1 == net->ipv4.maxvif) {
305                 int tmp;
306                 for (tmp=vifi-1; tmp>=0; tmp--) {
307                         if (VIF_EXISTS(net, tmp))
308                                 break;
309                 }
310                 net->ipv4.maxvif = tmp+1;
311         }
312
313         write_unlock_bh(&mrt_lock);
314
315         dev_set_allmulti(dev, -1);
316
317         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
318                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
319                 ip_rt_multicast_event(in_dev);
320         }
321
322         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
323                 unregister_netdevice_queue(dev, head);
324
325         dev_put(dev);
326         return 0;
327 }
328
329 static inline void ipmr_cache_free(struct mfc_cache *c)
330 {
331         release_net(mfc_net(c));
332         kmem_cache_free(mrt_cachep, c);
333 }
334
335 /* Destroy an unresolved cache entry, killing queued skbs
336    and reporting error to netlink readers.
337  */
338
339 static void ipmr_destroy_unres(struct mfc_cache *c)
340 {
341         struct sk_buff *skb;
342         struct nlmsgerr *e;
343         struct net *net = mfc_net(c);
344
345         atomic_dec(&net->ipv4.cache_resolve_queue_len);
346
347         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348                 if (ip_hdr(skb)->version == 0) {
349                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350                         nlh->nlmsg_type = NLMSG_ERROR;
351                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352                         skb_trim(skb, nlh->nlmsg_len);
353                         e = NLMSG_DATA(nlh);
354                         e->error = -ETIMEDOUT;
355                         memset(&e->msg, 0, sizeof(e->msg));
356
357                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358                 } else
359                         kfree_skb(skb);
360         }
361
362         ipmr_cache_free(c);
363 }
364
365
366 /* Single timer process for all the unresolved queue. */
367
368 static void ipmr_expire_process(unsigned long dummy)
369 {
370         unsigned long now;
371         unsigned long expires;
372         struct mfc_cache *c, **cp;
373
374         if (!spin_trylock(&mfc_unres_lock)) {
375                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376                 return;
377         }
378
379         if (mfc_unres_queue == NULL)
380                 goto out;
381
382         now = jiffies;
383         expires = 10*HZ;
384         cp = &mfc_unres_queue;
385
386         while ((c=*cp) != NULL) {
387                 if (time_after(c->mfc_un.unres.expires, now)) {
388                         unsigned long interval = c->mfc_un.unres.expires - now;
389                         if (interval < expires)
390                                 expires = interval;
391                         cp = &c->next;
392                         continue;
393                 }
394
395                 *cp = c->next;
396
397                 ipmr_destroy_unres(c);
398         }
399
400         if (mfc_unres_queue != NULL)
401                 mod_timer(&ipmr_expire_timer, jiffies + expires);
402
403 out:
404         spin_unlock(&mfc_unres_lock);
405 }
406
407 /* Fill oifs list. It is called under write locked mrt_lock. */
408
409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410 {
411         int vifi;
412         struct net *net = mfc_net(cache);
413
414         cache->mfc_un.res.minvif = MAXVIFS;
415         cache->mfc_un.res.maxvif = 0;
416         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417
418         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419                 if (VIF_EXISTS(net, vifi) &&
420                     ttls[vifi] && ttls[vifi] < 255) {
421                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422                         if (cache->mfc_un.res.minvif > vifi)
423                                 cache->mfc_un.res.minvif = vifi;
424                         if (cache->mfc_un.res.maxvif <= vifi)
425                                 cache->mfc_un.res.maxvif = vifi + 1;
426                 }
427         }
428 }
429
430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
431 {
432         int vifi = vifc->vifc_vifi;
433         struct vif_device *v = &net->ipv4.vif_table[vifi];
434         struct net_device *dev;
435         struct in_device *in_dev;
436         int err;
437
438         /* Is vif busy ? */
439         if (VIF_EXISTS(net, vifi))
440                 return -EADDRINUSE;
441
442         switch (vifc->vifc_flags) {
443 #ifdef CONFIG_IP_PIMSM
444         case VIFF_REGISTER:
445                 /*
446                  * Special Purpose VIF in PIM
447                  * All the packets will be sent to the daemon
448                  */
449                 if (net->ipv4.mroute_reg_vif_num >= 0)
450                         return -EADDRINUSE;
451                 dev = ipmr_reg_vif(net);
452                 if (!dev)
453                         return -ENOBUFS;
454                 err = dev_set_allmulti(dev, 1);
455                 if (err) {
456                         unregister_netdevice(dev);
457                         dev_put(dev);
458                         return err;
459                 }
460                 break;
461 #endif
462         case VIFF_TUNNEL:
463                 dev = ipmr_new_tunnel(net, vifc);
464                 if (!dev)
465                         return -ENOBUFS;
466                 err = dev_set_allmulti(dev, 1);
467                 if (err) {
468                         ipmr_del_tunnel(dev, vifc);
469                         dev_put(dev);
470                         return err;
471                 }
472                 break;
473
474         case VIFF_USE_IFINDEX:
475         case 0:
476                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
477                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
478                         if (dev && dev->ip_ptr == NULL) {
479                                 dev_put(dev);
480                                 return -EADDRNOTAVAIL;
481                         }
482                 } else
483                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
484
485                 if (!dev)
486                         return -EADDRNOTAVAIL;
487                 err = dev_set_allmulti(dev, 1);
488                 if (err) {
489                         dev_put(dev);
490                         return err;
491                 }
492                 break;
493         default:
494                 return -EINVAL;
495         }
496
497         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
498                 dev_put(dev);
499                 return -EADDRNOTAVAIL;
500         }
501         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
502         ip_rt_multicast_event(in_dev);
503
504         /*
505          *      Fill in the VIF structures
506          */
507         v->rate_limit = vifc->vifc_rate_limit;
508         v->local = vifc->vifc_lcl_addr.s_addr;
509         v->remote = vifc->vifc_rmt_addr.s_addr;
510         v->flags = vifc->vifc_flags;
511         if (!mrtsock)
512                 v->flags |= VIFF_STATIC;
513         v->threshold = vifc->vifc_threshold;
514         v->bytes_in = 0;
515         v->bytes_out = 0;
516         v->pkt_in = 0;
517         v->pkt_out = 0;
518         v->link = dev->ifindex;
519         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
520                 v->link = dev->iflink;
521
522         /* And finish update writing critical data */
523         write_lock_bh(&mrt_lock);
524         v->dev = dev;
525 #ifdef CONFIG_IP_PIMSM
526         if (v->flags&VIFF_REGISTER)
527                 net->ipv4.mroute_reg_vif_num = vifi;
528 #endif
529         if (vifi+1 > net->ipv4.maxvif)
530                 net->ipv4.maxvif = vifi+1;
531         write_unlock_bh(&mrt_lock);
532         return 0;
533 }
534
535 static struct mfc_cache *ipmr_cache_find(struct net *net,
536                                          __be32 origin,
537                                          __be32 mcastgrp)
538 {
539         int line = MFC_HASH(mcastgrp, origin);
540         struct mfc_cache *c;
541
542         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
543                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
544                         break;
545         }
546         return c;
547 }
548
549 /*
550  *      Allocate a multicast cache entry
551  */
552 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
553 {
554         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
555         if (c == NULL)
556                 return NULL;
557         c->mfc_un.res.minvif = MAXVIFS;
558         mfc_net_set(c, net);
559         return c;
560 }
561
562 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
563 {
564         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
565         if (c == NULL)
566                 return NULL;
567         skb_queue_head_init(&c->mfc_un.unres.unresolved);
568         c->mfc_un.unres.expires = jiffies + 10*HZ;
569         mfc_net_set(c, net);
570         return c;
571 }
572
573 /*
574  *      A cache entry has gone into a resolved state from queued
575  */
576
577 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
578 {
579         struct sk_buff *skb;
580         struct nlmsgerr *e;
581
582         /*
583          *      Play the pending entries through our router
584          */
585
586         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
587                 if (ip_hdr(skb)->version == 0) {
588                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
589
590                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
591                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
592                                                   (u8 *)nlh);
593                         } else {
594                                 nlh->nlmsg_type = NLMSG_ERROR;
595                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
596                                 skb_trim(skb, nlh->nlmsg_len);
597                                 e = NLMSG_DATA(nlh);
598                                 e->error = -EMSGSIZE;
599                                 memset(&e->msg, 0, sizeof(e->msg));
600                         }
601
602                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
603                 } else
604                         ip_mr_forward(skb, c, 0);
605         }
606 }
607
608 /*
609  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
610  *      expects the following bizarre scheme.
611  *
612  *      Called under mrt_lock.
613  */
614
615 static int ipmr_cache_report(struct net *net,
616                              struct sk_buff *pkt, vifi_t vifi, int assert)
617 {
618         struct sk_buff *skb;
619         const int ihl = ip_hdrlen(pkt);
620         struct igmphdr *igmp;
621         struct igmpmsg *msg;
622         int ret;
623
624 #ifdef CONFIG_IP_PIMSM
625         if (assert == IGMPMSG_WHOLEPKT)
626                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
627         else
628 #endif
629                 skb = alloc_skb(128, GFP_ATOMIC);
630
631         if (!skb)
632                 return -ENOBUFS;
633
634 #ifdef CONFIG_IP_PIMSM
635         if (assert == IGMPMSG_WHOLEPKT) {
636                 /* Ugly, but we have no choice with this interface.
637                    Duplicate old header, fix ihl, length etc.
638                    And all this only to mangle msg->im_msgtype and
639                    to set msg->im_mbz to "mbz" :-)
640                  */
641                 skb_push(skb, sizeof(struct iphdr));
642                 skb_reset_network_header(skb);
643                 skb_reset_transport_header(skb);
644                 msg = (struct igmpmsg *)skb_network_header(skb);
645                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
646                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
647                 msg->im_mbz = 0;
648                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
649                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
650                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
651                                              sizeof(struct iphdr));
652         } else
653 #endif
654         {
655
656         /*
657          *      Copy the IP header
658          */
659
660         skb->network_header = skb->tail;
661         skb_put(skb, ihl);
662         skb_copy_to_linear_data(skb, pkt->data, ihl);
663         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
664         msg = (struct igmpmsg *)skb_network_header(skb);
665         msg->im_vif = vifi;
666         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
667
668         /*
669          *      Add our header
670          */
671
672         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
673         igmp->type      =
674         msg->im_msgtype = assert;
675         igmp->code      =       0;
676         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
677         skb->transport_header = skb->network_header;
678         }
679
680         if (net->ipv4.mroute_sk == NULL) {
681                 kfree_skb(skb);
682                 return -EINVAL;
683         }
684
685         /*
686          *      Deliver to mrouted
687          */
688         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
689         if (ret < 0) {
690                 if (net_ratelimit())
691                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
692                 kfree_skb(skb);
693         }
694
695         return ret;
696 }
697
698 /*
699  *      Queue a packet for resolution. It gets locked cache entry!
700  */
701
702 static int
703 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
704 {
705         int err;
706         struct mfc_cache *c;
707         const struct iphdr *iph = ip_hdr(skb);
708
709         spin_lock_bh(&mfc_unres_lock);
710         for (c=mfc_unres_queue; c; c=c->next) {
711                 if (net_eq(mfc_net(c), net) &&
712                     c->mfc_mcastgrp == iph->daddr &&
713                     c->mfc_origin == iph->saddr)
714                         break;
715         }
716
717         if (c == NULL) {
718                 /*
719                  *      Create a new entry if allowable
720                  */
721
722                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
723                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
724                         spin_unlock_bh(&mfc_unres_lock);
725
726                         kfree_skb(skb);
727                         return -ENOBUFS;
728                 }
729
730                 /*
731                  *      Fill in the new cache entry
732                  */
733                 c->mfc_parent   = -1;
734                 c->mfc_origin   = iph->saddr;
735                 c->mfc_mcastgrp = iph->daddr;
736
737                 /*
738                  *      Reflect first query at mrouted.
739                  */
740                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
741                 if (err < 0) {
742                         /* If the report failed throw the cache entry
743                            out - Brad Parker
744                          */
745                         spin_unlock_bh(&mfc_unres_lock);
746
747                         ipmr_cache_free(c);
748                         kfree_skb(skb);
749                         return err;
750                 }
751
752                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
753                 c->next = mfc_unres_queue;
754                 mfc_unres_queue = c;
755
756                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
757         }
758
759         /*
760          *      See if we can append the packet
761          */
762         if (c->mfc_un.unres.unresolved.qlen>3) {
763                 kfree_skb(skb);
764                 err = -ENOBUFS;
765         } else {
766                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
767                 err = 0;
768         }
769
770         spin_unlock_bh(&mfc_unres_lock);
771         return err;
772 }
773
774 /*
775  *      MFC cache manipulation by user space mroute daemon
776  */
777
778 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
779 {
780         int line;
781         struct mfc_cache *c, **cp;
782
783         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
784
785         for (cp = &net->ipv4.mfc_cache_array[line];
786              (c = *cp) != NULL; cp = &c->next) {
787                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
788                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
789                         write_lock_bh(&mrt_lock);
790                         *cp = c->next;
791                         write_unlock_bh(&mrt_lock);
792
793                         ipmr_cache_free(c);
794                         return 0;
795                 }
796         }
797         return -ENOENT;
798 }
799
800 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
801 {
802         int line;
803         struct mfc_cache *uc, *c, **cp;
804
805         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
806
807         for (cp = &net->ipv4.mfc_cache_array[line];
808              (c = *cp) != NULL; cp = &c->next) {
809                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
810                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
811                         break;
812         }
813
814         if (c != NULL) {
815                 write_lock_bh(&mrt_lock);
816                 c->mfc_parent = mfc->mfcc_parent;
817                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
818                 if (!mrtsock)
819                         c->mfc_flags |= MFC_STATIC;
820                 write_unlock_bh(&mrt_lock);
821                 return 0;
822         }
823
824         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
825                 return -EINVAL;
826
827         c = ipmr_cache_alloc(net);
828         if (c == NULL)
829                 return -ENOMEM;
830
831         c->mfc_origin = mfc->mfcc_origin.s_addr;
832         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
833         c->mfc_parent = mfc->mfcc_parent;
834         ipmr_update_thresholds(c, mfc->mfcc_ttls);
835         if (!mrtsock)
836                 c->mfc_flags |= MFC_STATIC;
837
838         write_lock_bh(&mrt_lock);
839         c->next = net->ipv4.mfc_cache_array[line];
840         net->ipv4.mfc_cache_array[line] = c;
841         write_unlock_bh(&mrt_lock);
842
843         /*
844          *      Check to see if we resolved a queued list. If so we
845          *      need to send on the frames and tidy up.
846          */
847         spin_lock_bh(&mfc_unres_lock);
848         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
849              cp = &uc->next) {
850                 if (net_eq(mfc_net(uc), net) &&
851                     uc->mfc_origin == c->mfc_origin &&
852                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
853                         *cp = uc->next;
854                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
855                         break;
856                 }
857         }
858         if (mfc_unres_queue == NULL)
859                 del_timer(&ipmr_expire_timer);
860         spin_unlock_bh(&mfc_unres_lock);
861
862         if (uc) {
863                 ipmr_cache_resolve(uc, c);
864                 ipmr_cache_free(uc);
865         }
866         return 0;
867 }
868
869 /*
870  *      Close the multicast socket, and clear the vif tables etc
871  */
872
873 static void mroute_clean_tables(struct net *net)
874 {
875         int i;
876         LIST_HEAD(list);
877
878         /*
879          *      Shut down all active vif entries
880          */
881         for (i = 0; i < net->ipv4.maxvif; i++) {
882                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
883                         vif_delete(net, i, 0, &list);
884         }
885         unregister_netdevice_many(&list);
886
887         /*
888          *      Wipe the cache
889          */
890         for (i=0; i<MFC_LINES; i++) {
891                 struct mfc_cache *c, **cp;
892
893                 cp = &net->ipv4.mfc_cache_array[i];
894                 while ((c = *cp) != NULL) {
895                         if (c->mfc_flags&MFC_STATIC) {
896                                 cp = &c->next;
897                                 continue;
898                         }
899                         write_lock_bh(&mrt_lock);
900                         *cp = c->next;
901                         write_unlock_bh(&mrt_lock);
902
903                         ipmr_cache_free(c);
904                 }
905         }
906
907         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
908                 struct mfc_cache *c, **cp;
909
910                 spin_lock_bh(&mfc_unres_lock);
911                 cp = &mfc_unres_queue;
912                 while ((c = *cp) != NULL) {
913                         if (!net_eq(mfc_net(c), net)) {
914                                 cp = &c->next;
915                                 continue;
916                         }
917                         *cp = c->next;
918
919                         ipmr_destroy_unres(c);
920                 }
921                 spin_unlock_bh(&mfc_unres_lock);
922         }
923 }
924
925 static void mrtsock_destruct(struct sock *sk)
926 {
927         struct net *net = sock_net(sk);
928
929         rtnl_lock();
930         if (sk == net->ipv4.mroute_sk) {
931                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
932
933                 write_lock_bh(&mrt_lock);
934                 net->ipv4.mroute_sk = NULL;
935                 write_unlock_bh(&mrt_lock);
936
937                 mroute_clean_tables(net);
938         }
939         rtnl_unlock();
940 }
941
942 /*
943  *      Socket options and virtual interface manipulation. The whole
944  *      virtual interface system is a complete heap, but unfortunately
945  *      that's how BSD mrouted happens to think. Maybe one day with a proper
946  *      MOSPF/PIM router set up we can clean this up.
947  */
948
949 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
950 {
951         int ret;
952         struct vifctl vif;
953         struct mfcctl mfc;
954         struct net *net = sock_net(sk);
955
956         if (optname != MRT_INIT) {
957                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
958                         return -EACCES;
959         }
960
961         switch (optname) {
962         case MRT_INIT:
963                 if (sk->sk_type != SOCK_RAW ||
964                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
965                         return -EOPNOTSUPP;
966                 if (optlen != sizeof(int))
967                         return -ENOPROTOOPT;
968
969                 rtnl_lock();
970                 if (net->ipv4.mroute_sk) {
971                         rtnl_unlock();
972                         return -EADDRINUSE;
973                 }
974
975                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
976                 if (ret == 0) {
977                         write_lock_bh(&mrt_lock);
978                         net->ipv4.mroute_sk = sk;
979                         write_unlock_bh(&mrt_lock);
980
981                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
982                 }
983                 rtnl_unlock();
984                 return ret;
985         case MRT_DONE:
986                 if (sk != net->ipv4.mroute_sk)
987                         return -EACCES;
988                 return ip_ra_control(sk, 0, NULL);
989         case MRT_ADD_VIF:
990         case MRT_DEL_VIF:
991                 if (optlen != sizeof(vif))
992                         return -EINVAL;
993                 if (copy_from_user(&vif, optval, sizeof(vif)))
994                         return -EFAULT;
995                 if (vif.vifc_vifi >= MAXVIFS)
996                         return -ENFILE;
997                 rtnl_lock();
998                 if (optname == MRT_ADD_VIF) {
999                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
1000                 } else {
1001                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1002                 }
1003                 rtnl_unlock();
1004                 return ret;
1005
1006                 /*
1007                  *      Manipulate the forwarding caches. These live
1008                  *      in a sort of kernel/user symbiosis.
1009                  */
1010         case MRT_ADD_MFC:
1011         case MRT_DEL_MFC:
1012                 if (optlen != sizeof(mfc))
1013                         return -EINVAL;
1014                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1015                         return -EFAULT;
1016                 rtnl_lock();
1017                 if (optname == MRT_DEL_MFC)
1018                         ret = ipmr_mfc_delete(net, &mfc);
1019                 else
1020                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1021                 rtnl_unlock();
1022                 return ret;
1023                 /*
1024                  *      Control PIM assert.
1025                  */
1026         case MRT_ASSERT:
1027         {
1028                 int v;
1029                 if (get_user(v,(int __user *)optval))
1030                         return -EFAULT;
1031                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1032                 return 0;
1033         }
1034 #ifdef CONFIG_IP_PIMSM
1035         case MRT_PIM:
1036         {
1037                 int v;
1038
1039                 if (get_user(v,(int __user *)optval))
1040                         return -EFAULT;
1041                 v = (v) ? 1 : 0;
1042
1043                 rtnl_lock();
1044                 ret = 0;
1045                 if (v != net->ipv4.mroute_do_pim) {
1046                         net->ipv4.mroute_do_pim = v;
1047                         net->ipv4.mroute_do_assert = v;
1048                 }
1049                 rtnl_unlock();
1050                 return ret;
1051         }
1052 #endif
1053         /*
1054          *      Spurious command, or MRT_VERSION which you cannot
1055          *      set.
1056          */
1057         default:
1058                 return -ENOPROTOOPT;
1059         }
1060 }
1061
1062 /*
1063  *      Getsock opt support for the multicast routing system.
1064  */
1065
1066 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1067 {
1068         int olr;
1069         int val;
1070         struct net *net = sock_net(sk);
1071
1072         if (optname != MRT_VERSION &&
1073 #ifdef CONFIG_IP_PIMSM
1074            optname!=MRT_PIM &&
1075 #endif
1076            optname!=MRT_ASSERT)
1077                 return -ENOPROTOOPT;
1078
1079         if (get_user(olr, optlen))
1080                 return -EFAULT;
1081
1082         olr = min_t(unsigned int, olr, sizeof(int));
1083         if (olr < 0)
1084                 return -EINVAL;
1085
1086         if (put_user(olr, optlen))
1087                 return -EFAULT;
1088         if (optname == MRT_VERSION)
1089                 val = 0x0305;
1090 #ifdef CONFIG_IP_PIMSM
1091         else if (optname == MRT_PIM)
1092                 val = net->ipv4.mroute_do_pim;
1093 #endif
1094         else
1095                 val = net->ipv4.mroute_do_assert;
1096         if (copy_to_user(optval, &val, olr))
1097                 return -EFAULT;
1098         return 0;
1099 }
1100
1101 /*
1102  *      The IP multicast ioctl support routines.
1103  */
1104
1105 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1106 {
1107         struct sioc_sg_req sr;
1108         struct sioc_vif_req vr;
1109         struct vif_device *vif;
1110         struct mfc_cache *c;
1111         struct net *net = sock_net(sk);
1112
1113         switch (cmd) {
1114         case SIOCGETVIFCNT:
1115                 if (copy_from_user(&vr, arg, sizeof(vr)))
1116                         return -EFAULT;
1117                 if (vr.vifi >= net->ipv4.maxvif)
1118                         return -EINVAL;
1119                 read_lock(&mrt_lock);
1120                 vif = &net->ipv4.vif_table[vr.vifi];
1121                 if (VIF_EXISTS(net, vr.vifi)) {
1122                         vr.icount = vif->pkt_in;
1123                         vr.ocount = vif->pkt_out;
1124                         vr.ibytes = vif->bytes_in;
1125                         vr.obytes = vif->bytes_out;
1126                         read_unlock(&mrt_lock);
1127
1128                         if (copy_to_user(arg, &vr, sizeof(vr)))
1129                                 return -EFAULT;
1130                         return 0;
1131                 }
1132                 read_unlock(&mrt_lock);
1133                 return -EADDRNOTAVAIL;
1134         case SIOCGETSGCNT:
1135                 if (copy_from_user(&sr, arg, sizeof(sr)))
1136                         return -EFAULT;
1137
1138                 read_lock(&mrt_lock);
1139                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1140                 if (c) {
1141                         sr.pktcnt = c->mfc_un.res.pkt;
1142                         sr.bytecnt = c->mfc_un.res.bytes;
1143                         sr.wrong_if = c->mfc_un.res.wrong_if;
1144                         read_unlock(&mrt_lock);
1145
1146                         if (copy_to_user(arg, &sr, sizeof(sr)))
1147                                 return -EFAULT;
1148                         return 0;
1149                 }
1150                 read_unlock(&mrt_lock);
1151                 return -EADDRNOTAVAIL;
1152         default:
1153                 return -ENOIOCTLCMD;
1154         }
1155 }
1156
1157
1158 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1159 {
1160         struct net_device *dev = ptr;
1161         struct net *net = dev_net(dev);
1162         struct vif_device *v;
1163         int ct;
1164         LIST_HEAD(list);
1165
1166         if (event != NETDEV_UNREGISTER)
1167                 return NOTIFY_DONE;
1168         v = &net->ipv4.vif_table[0];
1169         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1170                 if (v->dev == dev)
1171                         vif_delete(net, ct, 1, &list);
1172         }
1173         unregister_netdevice_many(&list);
1174         return NOTIFY_DONE;
1175 }
1176
1177
1178 static struct notifier_block ip_mr_notifier = {
1179         .notifier_call = ipmr_device_event,
1180 };
1181
1182 /*
1183  *      Encapsulate a packet by attaching a valid IPIP header to it.
1184  *      This avoids tunnel drivers and other mess and gives us the speed so
1185  *      important for multicast video.
1186  */
1187
1188 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1189 {
1190         struct iphdr *iph;
1191         struct iphdr *old_iph = ip_hdr(skb);
1192
1193         skb_push(skb, sizeof(struct iphdr));
1194         skb->transport_header = skb->network_header;
1195         skb_reset_network_header(skb);
1196         iph = ip_hdr(skb);
1197
1198         iph->version    =       4;
1199         iph->tos        =       old_iph->tos;
1200         iph->ttl        =       old_iph->ttl;
1201         iph->frag_off   =       0;
1202         iph->daddr      =       daddr;
1203         iph->saddr      =       saddr;
1204         iph->protocol   =       IPPROTO_IPIP;
1205         iph->ihl        =       5;
1206         iph->tot_len    =       htons(skb->len);
1207         ip_select_ident(iph, skb_dst(skb), NULL);
1208         ip_send_check(iph);
1209
1210         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1211         nf_reset(skb);
1212 }
1213
1214 static inline int ipmr_forward_finish(struct sk_buff *skb)
1215 {
1216         struct ip_options * opt = &(IPCB(skb)->opt);
1217
1218         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1219
1220         if (unlikely(opt->optlen))
1221                 ip_forward_options(skb);
1222
1223         return dst_output(skb);
1224 }
1225
1226 /*
1227  *      Processing handlers for ipmr_forward
1228  */
1229
1230 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1231 {
1232         struct net *net = mfc_net(c);
1233         const struct iphdr *iph = ip_hdr(skb);
1234         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1235         struct net_device *dev;
1236         struct rtable *rt;
1237         int    encap = 0;
1238
1239         if (vif->dev == NULL)
1240                 goto out_free;
1241
1242 #ifdef CONFIG_IP_PIMSM
1243         if (vif->flags & VIFF_REGISTER) {
1244                 vif->pkt_out++;
1245                 vif->bytes_out += skb->len;
1246                 vif->dev->stats.tx_bytes += skb->len;
1247                 vif->dev->stats.tx_packets++;
1248                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1249                 goto out_free;
1250         }
1251 #endif
1252
1253         if (vif->flags&VIFF_TUNNEL) {
1254                 struct flowi fl = { .oif = vif->link,
1255                                     .nl_u = { .ip4_u =
1256                                               { .daddr = vif->remote,
1257                                                 .saddr = vif->local,
1258                                                 .tos = RT_TOS(iph->tos) } },
1259                                     .proto = IPPROTO_IPIP };
1260                 if (ip_route_output_key(net, &rt, &fl))
1261                         goto out_free;
1262                 encap = sizeof(struct iphdr);
1263         } else {
1264                 struct flowi fl = { .oif = vif->link,
1265                                     .nl_u = { .ip4_u =
1266                                               { .daddr = iph->daddr,
1267                                                 .tos = RT_TOS(iph->tos) } },
1268                                     .proto = IPPROTO_IPIP };
1269                 if (ip_route_output_key(net, &rt, &fl))
1270                         goto out_free;
1271         }
1272
1273         dev = rt->u.dst.dev;
1274
1275         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1276                 /* Do not fragment multicasts. Alas, IPv4 does not
1277                    allow to send ICMP, so that packets will disappear
1278                    to blackhole.
1279                  */
1280
1281                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1282                 ip_rt_put(rt);
1283                 goto out_free;
1284         }
1285
1286         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1287
1288         if (skb_cow(skb, encap)) {
1289                 ip_rt_put(rt);
1290                 goto out_free;
1291         }
1292
1293         vif->pkt_out++;
1294         vif->bytes_out += skb->len;
1295
1296         skb_dst_drop(skb);
1297         skb_dst_set(skb, &rt->u.dst);
1298         ip_decrease_ttl(ip_hdr(skb));
1299
1300         /* FIXME: forward and output firewalls used to be called here.
1301          * What do we do with netfilter? -- RR */
1302         if (vif->flags & VIFF_TUNNEL) {
1303                 ip_encap(skb, vif->local, vif->remote);
1304                 /* FIXME: extra output firewall step used to be here. --RR */
1305                 vif->dev->stats.tx_packets++;
1306                 vif->dev->stats.tx_bytes += skb->len;
1307         }
1308
1309         IPCB(skb)->flags |= IPSKB_FORWARDED;
1310
1311         /*
1312          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1313          * not only before forwarding, but after forwarding on all output
1314          * interfaces. It is clear, if mrouter runs a multicasting
1315          * program, it should receive packets not depending to what interface
1316          * program is joined.
1317          * If we will not make it, the program will have to join on all
1318          * interfaces. On the other hand, multihoming host (or router, but
1319          * not mrouter) cannot join to more than one interface - it will
1320          * result in receiving multiple packets.
1321          */
1322         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1323                 ipmr_forward_finish);
1324         return;
1325
1326 out_free:
1327         kfree_skb(skb);
1328         return;
1329 }
1330
1331 static int ipmr_find_vif(struct net_device *dev)
1332 {
1333         struct net *net = dev_net(dev);
1334         int ct;
1335         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1336                 if (net->ipv4.vif_table[ct].dev == dev)
1337                         break;
1338         }
1339         return ct;
1340 }
1341
1342 /* "local" means that we should preserve one skb (for local delivery) */
1343
1344 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1345 {
1346         int psend = -1;
1347         int vif, ct;
1348         struct net *net = mfc_net(cache);
1349
1350         vif = cache->mfc_parent;
1351         cache->mfc_un.res.pkt++;
1352         cache->mfc_un.res.bytes += skb->len;
1353
1354         /*
1355          * Wrong interface: drop packet and (maybe) send PIM assert.
1356          */
1357         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1358                 int true_vifi;
1359
1360                 if (skb_rtable(skb)->fl.iif == 0) {
1361                         /* It is our own packet, looped back.
1362                            Very complicated situation...
1363
1364                            The best workaround until routing daemons will be
1365                            fixed is not to redistribute packet, if it was
1366                            send through wrong interface. It means, that
1367                            multicast applications WILL NOT work for
1368                            (S,G), which have default multicast route pointing
1369                            to wrong oif. In any case, it is not a good
1370                            idea to use multicasting applications on router.
1371                          */
1372                         goto dont_forward;
1373                 }
1374
1375                 cache->mfc_un.res.wrong_if++;
1376                 true_vifi = ipmr_find_vif(skb->dev);
1377
1378                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1379                     /* pimsm uses asserts, when switching from RPT to SPT,
1380                        so that we cannot check that packet arrived on an oif.
1381                        It is bad, but otherwise we would need to move pretty
1382                        large chunk of pimd to kernel. Ough... --ANK
1383                      */
1384                     (net->ipv4.mroute_do_pim ||
1385                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1386                     time_after(jiffies,
1387                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1388                         cache->mfc_un.res.last_assert = jiffies;
1389                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1390                 }
1391                 goto dont_forward;
1392         }
1393
1394         net->ipv4.vif_table[vif].pkt_in++;
1395         net->ipv4.vif_table[vif].bytes_in += skb->len;
1396
1397         /*
1398          *      Forward the frame
1399          */
1400         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1401                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1402                         if (psend != -1) {
1403                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1404                                 if (skb2)
1405                                         ipmr_queue_xmit(skb2, cache, psend);
1406                         }
1407                         psend = ct;
1408                 }
1409         }
1410         if (psend != -1) {
1411                 if (local) {
1412                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1413                         if (skb2)
1414                                 ipmr_queue_xmit(skb2, cache, psend);
1415                 } else {
1416                         ipmr_queue_xmit(skb, cache, psend);
1417                         return 0;
1418                 }
1419         }
1420
1421 dont_forward:
1422         if (!local)
1423                 kfree_skb(skb);
1424         return 0;
1425 }
1426
1427
1428 /*
1429  *      Multicast packets for forwarding arrive here
1430  */
1431
1432 int ip_mr_input(struct sk_buff *skb)
1433 {
1434         struct mfc_cache *cache;
1435         struct net *net = dev_net(skb->dev);
1436         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1437
1438         /* Packet is looped back after forward, it should not be
1439            forwarded second time, but still can be delivered locally.
1440          */
1441         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1442                 goto dont_forward;
1443
1444         if (!local) {
1445                     if (IPCB(skb)->opt.router_alert) {
1446                             if (ip_call_ra_chain(skb))
1447                                     return 0;
1448                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1449                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1450                                Cisco IOS <= 11.2(8)) do not put router alert
1451                                option to IGMP packets destined to routable
1452                                groups. It is very bad, because it means
1453                                that we can forward NO IGMP messages.
1454                              */
1455                             read_lock(&mrt_lock);
1456                             if (net->ipv4.mroute_sk) {
1457                                     nf_reset(skb);
1458                                     raw_rcv(net->ipv4.mroute_sk, skb);
1459                                     read_unlock(&mrt_lock);
1460                                     return 0;
1461                             }
1462                             read_unlock(&mrt_lock);
1463                     }
1464         }
1465
1466         read_lock(&mrt_lock);
1467         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1468
1469         /*
1470          *      No usable cache entry
1471          */
1472         if (cache == NULL) {
1473                 int vif;
1474
1475                 if (local) {
1476                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1477                         ip_local_deliver(skb);
1478                         if (skb2 == NULL) {
1479                                 read_unlock(&mrt_lock);
1480                                 return -ENOBUFS;
1481                         }
1482                         skb = skb2;
1483                 }
1484
1485                 vif = ipmr_find_vif(skb->dev);
1486                 if (vif >= 0) {
1487                         int err = ipmr_cache_unresolved(net, vif, skb);
1488                         read_unlock(&mrt_lock);
1489
1490                         return err;
1491                 }
1492                 read_unlock(&mrt_lock);
1493                 kfree_skb(skb);
1494                 return -ENODEV;
1495         }
1496
1497         ip_mr_forward(skb, cache, local);
1498
1499         read_unlock(&mrt_lock);
1500
1501         if (local)
1502                 return ip_local_deliver(skb);
1503
1504         return 0;
1505
1506 dont_forward:
1507         if (local)
1508                 return ip_local_deliver(skb);
1509         kfree_skb(skb);
1510         return 0;
1511 }
1512
1513 #ifdef CONFIG_IP_PIMSM
1514 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1515 {
1516         struct net_device *reg_dev = NULL;
1517         struct iphdr *encap;
1518         struct net *net = dev_net(skb->dev);
1519
1520         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1521         /*
1522            Check that:
1523            a. packet is really destinted to a multicast group
1524            b. packet is not a NULL-REGISTER
1525            c. packet is not truncated
1526          */
1527         if (!ipv4_is_multicast(encap->daddr) ||
1528             encap->tot_len == 0 ||
1529             ntohs(encap->tot_len) + pimlen > skb->len)
1530                 return 1;
1531
1532         read_lock(&mrt_lock);
1533         if (net->ipv4.mroute_reg_vif_num >= 0)
1534                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1535         if (reg_dev)
1536                 dev_hold(reg_dev);
1537         read_unlock(&mrt_lock);
1538
1539         if (reg_dev == NULL)
1540                 return 1;
1541
1542         skb->mac_header = skb->network_header;
1543         skb_pull(skb, (u8*)encap - skb->data);
1544         skb_reset_network_header(skb);
1545         skb->dev = reg_dev;
1546         skb->protocol = htons(ETH_P_IP);
1547         skb->ip_summed = 0;
1548         skb->pkt_type = PACKET_HOST;
1549         skb_dst_drop(skb);
1550         reg_dev->stats.rx_bytes += skb->len;
1551         reg_dev->stats.rx_packets++;
1552         nf_reset(skb);
1553         netif_rx(skb);
1554         dev_put(reg_dev);
1555
1556         return 0;
1557 }
1558 #endif
1559
1560 #ifdef CONFIG_IP_PIMSM_V1
1561 /*
1562  * Handle IGMP messages of PIMv1
1563  */
1564
1565 int pim_rcv_v1(struct sk_buff * skb)
1566 {
1567         struct igmphdr *pim;
1568         struct net *net = dev_net(skb->dev);
1569
1570         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1571                 goto drop;
1572
1573         pim = igmp_hdr(skb);
1574
1575         if (!net->ipv4.mroute_do_pim ||
1576             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1577                 goto drop;
1578
1579         if (__pim_rcv(skb, sizeof(*pim))) {
1580 drop:
1581                 kfree_skb(skb);
1582         }
1583         return 0;
1584 }
1585 #endif
1586
1587 #ifdef CONFIG_IP_PIMSM_V2
1588 static int pim_rcv(struct sk_buff * skb)
1589 {
1590         struct pimreghdr *pim;
1591
1592         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1593                 goto drop;
1594
1595         pim = (struct pimreghdr *)skb_transport_header(skb);
1596         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1597             (pim->flags&PIM_NULL_REGISTER) ||
1598             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1599              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1600                 goto drop;
1601
1602         if (__pim_rcv(skb, sizeof(*pim))) {
1603 drop:
1604                 kfree_skb(skb);
1605         }
1606         return 0;
1607 }
1608 #endif
1609
1610 static int
1611 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1612 {
1613         int ct;
1614         struct rtnexthop *nhp;
1615         struct net *net = mfc_net(c);
1616         struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1617         u8 *b = skb_tail_pointer(skb);
1618         struct rtattr *mp_head;
1619
1620         if (dev)
1621                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1622
1623         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1624
1625         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1626                 if (c->mfc_un.res.ttls[ct] < 255) {
1627                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1628                                 goto rtattr_failure;
1629                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1630                         nhp->rtnh_flags = 0;
1631                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1632                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1633                         nhp->rtnh_len = sizeof(*nhp);
1634                 }
1635         }
1636         mp_head->rta_type = RTA_MULTIPATH;
1637         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1638         rtm->rtm_type = RTN_MULTICAST;
1639         return 1;
1640
1641 rtattr_failure:
1642         nlmsg_trim(skb, b);
1643         return -EMSGSIZE;
1644 }
1645
1646 int ipmr_get_route(struct net *net,
1647                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1648 {
1649         int err;
1650         struct mfc_cache *cache;
1651         struct rtable *rt = skb_rtable(skb);
1652
1653         read_lock(&mrt_lock);
1654         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1655
1656         if (cache == NULL) {
1657                 struct sk_buff *skb2;
1658                 struct iphdr *iph;
1659                 struct net_device *dev;
1660                 int vif;
1661
1662                 if (nowait) {
1663                         read_unlock(&mrt_lock);
1664                         return -EAGAIN;
1665                 }
1666
1667                 dev = skb->dev;
1668                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1669                         read_unlock(&mrt_lock);
1670                         return -ENODEV;
1671                 }
1672                 skb2 = skb_clone(skb, GFP_ATOMIC);
1673                 if (!skb2) {
1674                         read_unlock(&mrt_lock);
1675                         return -ENOMEM;
1676                 }
1677
1678                 skb_push(skb2, sizeof(struct iphdr));
1679                 skb_reset_network_header(skb2);
1680                 iph = ip_hdr(skb2);
1681                 iph->ihl = sizeof(struct iphdr) >> 2;
1682                 iph->saddr = rt->rt_src;
1683                 iph->daddr = rt->rt_dst;
1684                 iph->version = 0;
1685                 err = ipmr_cache_unresolved(net, vif, skb2);
1686                 read_unlock(&mrt_lock);
1687                 return err;
1688         }
1689
1690         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1691                 cache->mfc_flags |= MFC_NOTIFY;
1692         err = ipmr_fill_mroute(skb, cache, rtm);
1693         read_unlock(&mrt_lock);
1694         return err;
1695 }
1696
1697 #ifdef CONFIG_PROC_FS
1698 /*
1699  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1700  */
1701 struct ipmr_vif_iter {
1702         struct seq_net_private p;
1703         int ct;
1704 };
1705
1706 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1707                                            struct ipmr_vif_iter *iter,
1708                                            loff_t pos)
1709 {
1710         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1711                 if (!VIF_EXISTS(net, iter->ct))
1712                         continue;
1713                 if (pos-- == 0)
1714                         return &net->ipv4.vif_table[iter->ct];
1715         }
1716         return NULL;
1717 }
1718
1719 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1720         __acquires(mrt_lock)
1721 {
1722         struct net *net = seq_file_net(seq);
1723
1724         read_lock(&mrt_lock);
1725         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1726                 : SEQ_START_TOKEN;
1727 }
1728
1729 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1730 {
1731         struct ipmr_vif_iter *iter = seq->private;
1732         struct net *net = seq_file_net(seq);
1733
1734         ++*pos;
1735         if (v == SEQ_START_TOKEN)
1736                 return ipmr_vif_seq_idx(net, iter, 0);
1737
1738         while (++iter->ct < net->ipv4.maxvif) {
1739                 if (!VIF_EXISTS(net, iter->ct))
1740                         continue;
1741                 return &net->ipv4.vif_table[iter->ct];
1742         }
1743         return NULL;
1744 }
1745
1746 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1747         __releases(mrt_lock)
1748 {
1749         read_unlock(&mrt_lock);
1750 }
1751
1752 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1753 {
1754         struct net *net = seq_file_net(seq);
1755
1756         if (v == SEQ_START_TOKEN) {
1757                 seq_puts(seq,
1758                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1759         } else {
1760                 const struct vif_device *vif = v;
1761                 const char *name =  vif->dev ? vif->dev->name : "none";
1762
1763                 seq_printf(seq,
1764                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1765                            vif - net->ipv4.vif_table,
1766                            name, vif->bytes_in, vif->pkt_in,
1767                            vif->bytes_out, vif->pkt_out,
1768                            vif->flags, vif->local, vif->remote);
1769         }
1770         return 0;
1771 }
1772
1773 static const struct seq_operations ipmr_vif_seq_ops = {
1774         .start = ipmr_vif_seq_start,
1775         .next  = ipmr_vif_seq_next,
1776         .stop  = ipmr_vif_seq_stop,
1777         .show  = ipmr_vif_seq_show,
1778 };
1779
1780 static int ipmr_vif_open(struct inode *inode, struct file *file)
1781 {
1782         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1783                             sizeof(struct ipmr_vif_iter));
1784 }
1785
1786 static const struct file_operations ipmr_vif_fops = {
1787         .owner   = THIS_MODULE,
1788         .open    = ipmr_vif_open,
1789         .read    = seq_read,
1790         .llseek  = seq_lseek,
1791         .release = seq_release_net,
1792 };
1793
1794 struct ipmr_mfc_iter {
1795         struct seq_net_private p;
1796         struct mfc_cache **cache;
1797         int ct;
1798 };
1799
1800
1801 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1802                                           struct ipmr_mfc_iter *it, loff_t pos)
1803 {
1804         struct mfc_cache *mfc;
1805
1806         it->cache = net->ipv4.mfc_cache_array;
1807         read_lock(&mrt_lock);
1808         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1809                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1810                      mfc; mfc = mfc->next)
1811                         if (pos-- == 0)
1812                                 return mfc;
1813         read_unlock(&mrt_lock);
1814
1815         it->cache = &mfc_unres_queue;
1816         spin_lock_bh(&mfc_unres_lock);
1817         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1818                 if (net_eq(mfc_net(mfc), net) &&
1819                     pos-- == 0)
1820                         return mfc;
1821         spin_unlock_bh(&mfc_unres_lock);
1822
1823         it->cache = NULL;
1824         return NULL;
1825 }
1826
1827
1828 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1829 {
1830         struct ipmr_mfc_iter *it = seq->private;
1831         struct net *net = seq_file_net(seq);
1832
1833         it->cache = NULL;
1834         it->ct = 0;
1835         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1836                 : SEQ_START_TOKEN;
1837 }
1838
1839 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1840 {
1841         struct mfc_cache *mfc = v;
1842         struct ipmr_mfc_iter *it = seq->private;
1843         struct net *net = seq_file_net(seq);
1844
1845         ++*pos;
1846
1847         if (v == SEQ_START_TOKEN)
1848                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1849
1850         if (mfc->next)
1851                 return mfc->next;
1852
1853         if (it->cache == &mfc_unres_queue)
1854                 goto end_of_list;
1855
1856         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1857
1858         while (++it->ct < MFC_LINES) {
1859                 mfc = net->ipv4.mfc_cache_array[it->ct];
1860                 if (mfc)
1861                         return mfc;
1862         }
1863
1864         /* exhausted cache_array, show unresolved */
1865         read_unlock(&mrt_lock);
1866         it->cache = &mfc_unres_queue;
1867         it->ct = 0;
1868
1869         spin_lock_bh(&mfc_unres_lock);
1870         mfc = mfc_unres_queue;
1871         while (mfc && !net_eq(mfc_net(mfc), net))
1872                 mfc = mfc->next;
1873         if (mfc)
1874                 return mfc;
1875
1876  end_of_list:
1877         spin_unlock_bh(&mfc_unres_lock);
1878         it->cache = NULL;
1879
1880         return NULL;
1881 }
1882
1883 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1884 {
1885         struct ipmr_mfc_iter *it = seq->private;
1886         struct net *net = seq_file_net(seq);
1887
1888         if (it->cache == &mfc_unres_queue)
1889                 spin_unlock_bh(&mfc_unres_lock);
1890         else if (it->cache == net->ipv4.mfc_cache_array)
1891                 read_unlock(&mrt_lock);
1892 }
1893
1894 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1895 {
1896         int n;
1897         struct net *net = seq_file_net(seq);
1898
1899         if (v == SEQ_START_TOKEN) {
1900                 seq_puts(seq,
1901                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1902         } else {
1903                 const struct mfc_cache *mfc = v;
1904                 const struct ipmr_mfc_iter *it = seq->private;
1905
1906                 seq_printf(seq, "%08lX %08lX %-3hd",
1907                            (unsigned long) mfc->mfc_mcastgrp,
1908                            (unsigned long) mfc->mfc_origin,
1909                            mfc->mfc_parent);
1910
1911                 if (it->cache != &mfc_unres_queue) {
1912                         seq_printf(seq, " %8lu %8lu %8lu",
1913                                    mfc->mfc_un.res.pkt,
1914                                    mfc->mfc_un.res.bytes,
1915                                    mfc->mfc_un.res.wrong_if);
1916                         for (n = mfc->mfc_un.res.minvif;
1917                              n < mfc->mfc_un.res.maxvif; n++ ) {
1918                                 if (VIF_EXISTS(net, n) &&
1919                                     mfc->mfc_un.res.ttls[n] < 255)
1920                                         seq_printf(seq,
1921                                            " %2d:%-3d",
1922                                            n, mfc->mfc_un.res.ttls[n]);
1923                         }
1924                 } else {
1925                         /* unresolved mfc_caches don't contain
1926                          * pkt, bytes and wrong_if values
1927                          */
1928                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1929                 }
1930                 seq_putc(seq, '\n');
1931         }
1932         return 0;
1933 }
1934
1935 static const struct seq_operations ipmr_mfc_seq_ops = {
1936         .start = ipmr_mfc_seq_start,
1937         .next  = ipmr_mfc_seq_next,
1938         .stop  = ipmr_mfc_seq_stop,
1939         .show  = ipmr_mfc_seq_show,
1940 };
1941
1942 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1943 {
1944         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1945                             sizeof(struct ipmr_mfc_iter));
1946 }
1947
1948 static const struct file_operations ipmr_mfc_fops = {
1949         .owner   = THIS_MODULE,
1950         .open    = ipmr_mfc_open,
1951         .read    = seq_read,
1952         .llseek  = seq_lseek,
1953         .release = seq_release_net,
1954 };
1955 #endif
1956
1957 #ifdef CONFIG_IP_PIMSM_V2
1958 static const struct net_protocol pim_protocol = {
1959         .handler        =       pim_rcv,
1960         .netns_ok       =       1,
1961 };
1962 #endif
1963
1964
1965 /*
1966  *      Setup for IP multicast routing
1967  */
1968 static int __net_init ipmr_net_init(struct net *net)
1969 {
1970         int err = 0;
1971
1972         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1973                                       GFP_KERNEL);
1974         if (!net->ipv4.vif_table) {
1975                 err = -ENOMEM;
1976                 goto fail;
1977         }
1978
1979         /* Forwarding cache */
1980         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1981                                             sizeof(struct mfc_cache *),
1982                                             GFP_KERNEL);
1983         if (!net->ipv4.mfc_cache_array) {
1984                 err = -ENOMEM;
1985                 goto fail_mfc_cache;
1986         }
1987
1988 #ifdef CONFIG_IP_PIMSM
1989         net->ipv4.mroute_reg_vif_num = -1;
1990 #endif
1991
1992 #ifdef CONFIG_PROC_FS
1993         err = -ENOMEM;
1994         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1995                 goto proc_vif_fail;
1996         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1997                 goto proc_cache_fail;
1998 #endif
1999         return 0;
2000
2001 #ifdef CONFIG_PROC_FS
2002 proc_cache_fail:
2003         proc_net_remove(net, "ip_mr_vif");
2004 proc_vif_fail:
2005         kfree(net->ipv4.mfc_cache_array);
2006 #endif
2007 fail_mfc_cache:
2008         kfree(net->ipv4.vif_table);
2009 fail:
2010         return err;
2011 }
2012
2013 static void __net_exit ipmr_net_exit(struct net *net)
2014 {
2015 #ifdef CONFIG_PROC_FS
2016         proc_net_remove(net, "ip_mr_cache");
2017         proc_net_remove(net, "ip_mr_vif");
2018 #endif
2019         kfree(net->ipv4.mfc_cache_array);
2020         kfree(net->ipv4.vif_table);
2021 }
2022
2023 static struct pernet_operations ipmr_net_ops = {
2024         .init = ipmr_net_init,
2025         .exit = ipmr_net_exit,
2026 };
2027
2028 int __init ip_mr_init(void)
2029 {
2030         int err;
2031
2032         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2033                                        sizeof(struct mfc_cache),
2034                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2035                                        NULL);
2036         if (!mrt_cachep)
2037                 return -ENOMEM;
2038
2039         err = register_pernet_subsys(&ipmr_net_ops);
2040         if (err)
2041                 goto reg_pernet_fail;
2042
2043         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2044         err = register_netdevice_notifier(&ip_mr_notifier);
2045         if (err)
2046                 goto reg_notif_fail;
2047 #ifdef CONFIG_IP_PIMSM_V2
2048         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2049                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2050                 err = -EAGAIN;
2051                 goto add_proto_fail;
2052         }
2053 #endif
2054         return 0;
2055
2056 #ifdef CONFIG_IP_PIMSM_V2
2057 add_proto_fail:
2058         unregister_netdevice_notifier(&ip_mr_notifier);
2059 #endif
2060 reg_notif_fail:
2061         del_timer(&ipmr_expire_timer);
2062         unregister_pernet_subsys(&ipmr_net_ops);
2063 reg_pernet_fail:
2064         kmem_cache_destroy(mrt_cachep);
2065         return err;
2066 }