]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/ipip.c
[IPIP]: Use proper net in hash-lookup functions.
[net-next-2.6.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120
121 #define HASH_SIZE  16
122 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
123
124 static int ipip_net_id;
125 struct ipip_net {
126         struct net_device *fb_tunnel_dev;
127 };
128
129 static int ipip_fb_tunnel_init(struct net_device *dev);
130 static int ipip_tunnel_init(struct net_device *dev);
131 static void ipip_tunnel_setup(struct net_device *dev);
132
133 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
134 static struct ip_tunnel *tunnels_r[HASH_SIZE];
135 static struct ip_tunnel *tunnels_l[HASH_SIZE];
136 static struct ip_tunnel *tunnels_wc[1];
137 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
138
139 static DEFINE_RWLOCK(ipip_lock);
140
141 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
142                 __be32 remote, __be32 local)
143 {
144         unsigned h0 = HASH(remote);
145         unsigned h1 = HASH(local);
146         struct ip_tunnel *t;
147
148         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
149                 if (local == t->parms.iph.saddr &&
150                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151                         return t;
152         }
153         for (t = tunnels_r[h0]; t; t = t->next) {
154                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
155                         return t;
156         }
157         for (t = tunnels_l[h1]; t; t = t->next) {
158                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
159                         return t;
160         }
161         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
162                 return t;
163         return NULL;
164 }
165
166 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
167                 struct ip_tunnel_parm *parms)
168 {
169         __be32 remote = parms->iph.daddr;
170         __be32 local = parms->iph.saddr;
171         unsigned h = 0;
172         int prio = 0;
173
174         if (remote) {
175                 prio |= 2;
176                 h ^= HASH(remote);
177         }
178         if (local) {
179                 prio |= 1;
180                 h ^= HASH(local);
181         }
182         return &tunnels[prio][h];
183 }
184
185 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
186                 struct ip_tunnel *t)
187 {
188         return __ipip_bucket(ipn, &t->parms);
189 }
190
191 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
192 {
193         struct ip_tunnel **tp;
194
195         for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
196                 if (t == *tp) {
197                         write_lock_bh(&ipip_lock);
198                         *tp = t->next;
199                         write_unlock_bh(&ipip_lock);
200                         break;
201                 }
202         }
203 }
204
205 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
206 {
207         struct ip_tunnel **tp = ipip_bucket(ipn, t);
208
209         t->next = *tp;
210         write_lock_bh(&ipip_lock);
211         *tp = t;
212         write_unlock_bh(&ipip_lock);
213 }
214
215 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
216                 struct ip_tunnel_parm *parms, int create)
217 {
218         __be32 remote = parms->iph.daddr;
219         __be32 local = parms->iph.saddr;
220         struct ip_tunnel *t, **tp, *nt;
221         struct net_device *dev;
222         char name[IFNAMSIZ];
223         struct ipip_net *ipn = net_generic(net, ipip_net_id);
224
225         for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
226                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
227                         return t;
228         }
229         if (!create)
230                 return NULL;
231
232         if (parms->name[0])
233                 strlcpy(name, parms->name, IFNAMSIZ);
234         else
235                 sprintf(name, "tunl%%d");
236
237         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
238         if (dev == NULL)
239                 return NULL;
240
241         if (strchr(name, '%')) {
242                 if (dev_alloc_name(dev, name) < 0)
243                         goto failed_free;
244         }
245
246         nt = netdev_priv(dev);
247         dev->init = ipip_tunnel_init;
248         nt->parms = *parms;
249
250         if (register_netdevice(dev) < 0)
251                 goto failed_free;
252
253         dev_hold(dev);
254         ipip_tunnel_link(ipn, nt);
255         return nt;
256
257 failed_free:
258         free_netdev(dev);
259         return NULL;
260 }
261
262 static void ipip_tunnel_uninit(struct net_device *dev)
263 {
264         struct net *net = dev_net(dev);
265         struct ipip_net *ipn = net_generic(net, ipip_net_id);
266
267         if (dev == ipn->fb_tunnel_dev) {
268                 write_lock_bh(&ipip_lock);
269                 tunnels_wc[0] = NULL;
270                 write_unlock_bh(&ipip_lock);
271         } else
272                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
273         dev_put(dev);
274 }
275
276 static int ipip_err(struct sk_buff *skb, u32 info)
277 {
278 #ifndef I_WISH_WORLD_WERE_PERFECT
279
280 /* It is not :-( All the routers (except for Linux) return only
281    8 bytes of packet payload. It means, that precise relaying of
282    ICMP in the real Internet is absolutely infeasible.
283  */
284         struct iphdr *iph = (struct iphdr*)skb->data;
285         const int type = icmp_hdr(skb)->type;
286         const int code = icmp_hdr(skb)->code;
287         struct ip_tunnel *t;
288         int err;
289
290         switch (type) {
291         default:
292         case ICMP_PARAMETERPROB:
293                 return 0;
294
295         case ICMP_DEST_UNREACH:
296                 switch (code) {
297                 case ICMP_SR_FAILED:
298                 case ICMP_PORT_UNREACH:
299                         /* Impossible event. */
300                         return 0;
301                 case ICMP_FRAG_NEEDED:
302                         /* Soft state for pmtu is maintained by IP core. */
303                         return 0;
304                 default:
305                         /* All others are translated to HOST_UNREACH.
306                            rfc2003 contains "deep thoughts" about NET_UNREACH,
307                            I believe they are just ether pollution. --ANK
308                          */
309                         break;
310                 }
311                 break;
312         case ICMP_TIME_EXCEEDED:
313                 if (code != ICMP_EXC_TTL)
314                         return 0;
315                 break;
316         }
317
318         err = -ENOENT;
319
320         read_lock(&ipip_lock);
321         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
322         if (t == NULL || t->parms.iph.daddr == 0)
323                 goto out;
324
325         err = 0;
326         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
327                 goto out;
328
329         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
330                 t->err_count++;
331         else
332                 t->err_count = 1;
333         t->err_time = jiffies;
334 out:
335         read_unlock(&ipip_lock);
336         return err;
337 #else
338         struct iphdr *iph = (struct iphdr*)dp;
339         int hlen = iph->ihl<<2;
340         struct iphdr *eiph;
341         const int type = icmp_hdr(skb)->type;
342         const int code = icmp_hdr(skb)->code;
343         int rel_type = 0;
344         int rel_code = 0;
345         __be32 rel_info = 0;
346         __u32 n = 0;
347         struct sk_buff *skb2;
348         struct flowi fl;
349         struct rtable *rt;
350
351         if (len < hlen + sizeof(struct iphdr))
352                 return 0;
353         eiph = (struct iphdr*)(dp + hlen);
354
355         switch (type) {
356         default:
357                 return 0;
358         case ICMP_PARAMETERPROB:
359                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
360                 if (n < hlen)
361                         return 0;
362
363                 /* So... This guy found something strange INSIDE encapsulated
364                    packet. Well, he is fool, but what can we do ?
365                  */
366                 rel_type = ICMP_PARAMETERPROB;
367                 rel_info = htonl((n - hlen) << 24);
368                 break;
369
370         case ICMP_DEST_UNREACH:
371                 switch (code) {
372                 case ICMP_SR_FAILED:
373                 case ICMP_PORT_UNREACH:
374                         /* Impossible event. */
375                         return 0;
376                 case ICMP_FRAG_NEEDED:
377                         /* And it is the only really necessary thing :-) */
378                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
379                         if (n < hlen+68)
380                                 return 0;
381                         n -= hlen;
382                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
383                         if (n > ntohs(eiph->tot_len))
384                                 return 0;
385                         rel_info = htonl(n);
386                         break;
387                 default:
388                         /* All others are translated to HOST_UNREACH.
389                            rfc2003 contains "deep thoughts" about NET_UNREACH,
390                            I believe, it is just ether pollution. --ANK
391                          */
392                         rel_type = ICMP_DEST_UNREACH;
393                         rel_code = ICMP_HOST_UNREACH;
394                         break;
395                 }
396                 break;
397         case ICMP_TIME_EXCEEDED:
398                 if (code != ICMP_EXC_TTL)
399                         return 0;
400                 break;
401         }
402
403         /* Prepare fake skb to feed it to icmp_send */
404         skb2 = skb_clone(skb, GFP_ATOMIC);
405         if (skb2 == NULL)
406                 return 0;
407         dst_release(skb2->dst);
408         skb2->dst = NULL;
409         skb_pull(skb2, skb->data - (u8*)eiph);
410         skb_reset_network_header(skb2);
411
412         /* Try to guess incoming interface */
413         memset(&fl, 0, sizeof(fl));
414         fl.fl4_daddr = eiph->saddr;
415         fl.fl4_tos = RT_TOS(eiph->tos);
416         fl.proto = IPPROTO_IPIP;
417         if (ip_route_output_key(&init_net, &rt, &key)) {
418                 kfree_skb(skb2);
419                 return 0;
420         }
421         skb2->dev = rt->u.dst.dev;
422
423         /* route "incoming" packet */
424         if (rt->rt_flags&RTCF_LOCAL) {
425                 ip_rt_put(rt);
426                 rt = NULL;
427                 fl.fl4_daddr = eiph->daddr;
428                 fl.fl4_src = eiph->saddr;
429                 fl.fl4_tos = eiph->tos;
430                 if (ip_route_output_key(&init_net, &rt, &fl) ||
431                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
432                         ip_rt_put(rt);
433                         kfree_skb(skb2);
434                         return 0;
435                 }
436         } else {
437                 ip_rt_put(rt);
438                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
439                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
440                         kfree_skb(skb2);
441                         return 0;
442                 }
443         }
444
445         /* change mtu on this route */
446         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
447                 if (n > dst_mtu(skb2->dst)) {
448                         kfree_skb(skb2);
449                         return 0;
450                 }
451                 skb2->dst->ops->update_pmtu(skb2->dst, n);
452         } else if (type == ICMP_TIME_EXCEEDED) {
453                 struct ip_tunnel *t = netdev_priv(skb2->dev);
454                 if (t->parms.iph.ttl) {
455                         rel_type = ICMP_DEST_UNREACH;
456                         rel_code = ICMP_HOST_UNREACH;
457                 }
458         }
459
460         icmp_send(skb2, rel_type, rel_code, rel_info);
461         kfree_skb(skb2);
462         return 0;
463 #endif
464 }
465
466 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
467                                         struct sk_buff *skb)
468 {
469         struct iphdr *inner_iph = ip_hdr(skb);
470
471         if (INET_ECN_is_ce(outer_iph->tos))
472                 IP_ECN_set_ce(inner_iph);
473 }
474
475 static int ipip_rcv(struct sk_buff *skb)
476 {
477         struct ip_tunnel *tunnel;
478         const struct iphdr *iph = ip_hdr(skb);
479
480         read_lock(&ipip_lock);
481         if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
482                                         iph->saddr, iph->daddr)) != NULL) {
483                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
484                         read_unlock(&ipip_lock);
485                         kfree_skb(skb);
486                         return 0;
487                 }
488
489                 secpath_reset(skb);
490
491                 skb->mac_header = skb->network_header;
492                 skb_reset_network_header(skb);
493                 skb->protocol = htons(ETH_P_IP);
494                 skb->pkt_type = PACKET_HOST;
495
496                 tunnel->stat.rx_packets++;
497                 tunnel->stat.rx_bytes += skb->len;
498                 skb->dev = tunnel->dev;
499                 dst_release(skb->dst);
500                 skb->dst = NULL;
501                 nf_reset(skb);
502                 ipip_ecn_decapsulate(iph, skb);
503                 netif_rx(skb);
504                 read_unlock(&ipip_lock);
505                 return 0;
506         }
507         read_unlock(&ipip_lock);
508
509         return -1;
510 }
511
512 /*
513  *      This function assumes it is being called from dev_queue_xmit()
514  *      and that skb is filled properly by that function.
515  */
516
517 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
518 {
519         struct ip_tunnel *tunnel = netdev_priv(dev);
520         struct net_device_stats *stats = &tunnel->stat;
521         struct iphdr  *tiph = &tunnel->parms.iph;
522         u8     tos = tunnel->parms.iph.tos;
523         __be16 df = tiph->frag_off;
524         struct rtable *rt;                      /* Route to the other host */
525         struct net_device *tdev;                        /* Device to other host */
526         struct iphdr  *old_iph = ip_hdr(skb);
527         struct iphdr  *iph;                     /* Our new IP header */
528         unsigned int max_headroom;              /* The extra header space needed */
529         __be32 dst = tiph->daddr;
530         int    mtu;
531
532         if (tunnel->recursion++) {
533                 tunnel->stat.collisions++;
534                 goto tx_error;
535         }
536
537         if (skb->protocol != htons(ETH_P_IP))
538                 goto tx_error;
539
540         if (tos&1)
541                 tos = old_iph->tos;
542
543         if (!dst) {
544                 /* NBMA tunnel */
545                 if ((rt = skb->rtable) == NULL) {
546                         tunnel->stat.tx_fifo_errors++;
547                         goto tx_error;
548                 }
549                 if ((dst = rt->rt_gateway) == 0)
550                         goto tx_error_icmp;
551         }
552
553         {
554                 struct flowi fl = { .oif = tunnel->parms.link,
555                                     .nl_u = { .ip4_u =
556                                               { .daddr = dst,
557                                                 .saddr = tiph->saddr,
558                                                 .tos = RT_TOS(tos) } },
559                                     .proto = IPPROTO_IPIP };
560                 if (ip_route_output_key(&init_net, &rt, &fl)) {
561                         tunnel->stat.tx_carrier_errors++;
562                         goto tx_error_icmp;
563                 }
564         }
565         tdev = rt->u.dst.dev;
566
567         if (tdev == dev) {
568                 ip_rt_put(rt);
569                 tunnel->stat.collisions++;
570                 goto tx_error;
571         }
572
573         if (tiph->frag_off)
574                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
575         else
576                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
577
578         if (mtu < 68) {
579                 tunnel->stat.collisions++;
580                 ip_rt_put(rt);
581                 goto tx_error;
582         }
583         if (skb->dst)
584                 skb->dst->ops->update_pmtu(skb->dst, mtu);
585
586         df |= (old_iph->frag_off&htons(IP_DF));
587
588         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
589                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
590                 ip_rt_put(rt);
591                 goto tx_error;
592         }
593
594         if (tunnel->err_count > 0) {
595                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
596                         tunnel->err_count--;
597                         dst_link_failure(skb);
598                 } else
599                         tunnel->err_count = 0;
600         }
601
602         /*
603          * Okay, now see if we can stuff it in the buffer as-is.
604          */
605         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
606
607         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
608             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
609                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
610                 if (!new_skb) {
611                         ip_rt_put(rt);
612                         stats->tx_dropped++;
613                         dev_kfree_skb(skb);
614                         tunnel->recursion--;
615                         return 0;
616                 }
617                 if (skb->sk)
618                         skb_set_owner_w(new_skb, skb->sk);
619                 dev_kfree_skb(skb);
620                 skb = new_skb;
621                 old_iph = ip_hdr(skb);
622         }
623
624         skb->transport_header = skb->network_header;
625         skb_push(skb, sizeof(struct iphdr));
626         skb_reset_network_header(skb);
627         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
628         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
629                               IPSKB_REROUTED);
630         dst_release(skb->dst);
631         skb->dst = &rt->u.dst;
632
633         /*
634          *      Push down and install the IPIP header.
635          */
636
637         iph                     =       ip_hdr(skb);
638         iph->version            =       4;
639         iph->ihl                =       sizeof(struct iphdr)>>2;
640         iph->frag_off           =       df;
641         iph->protocol           =       IPPROTO_IPIP;
642         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
643         iph->daddr              =       rt->rt_dst;
644         iph->saddr              =       rt->rt_src;
645
646         if ((iph->ttl = tiph->ttl) == 0)
647                 iph->ttl        =       old_iph->ttl;
648
649         nf_reset(skb);
650
651         IPTUNNEL_XMIT();
652         tunnel->recursion--;
653         return 0;
654
655 tx_error_icmp:
656         dst_link_failure(skb);
657 tx_error:
658         stats->tx_errors++;
659         dev_kfree_skb(skb);
660         tunnel->recursion--;
661         return 0;
662 }
663
664 static void ipip_tunnel_bind_dev(struct net_device *dev)
665 {
666         struct net_device *tdev = NULL;
667         struct ip_tunnel *tunnel;
668         struct iphdr *iph;
669
670         tunnel = netdev_priv(dev);
671         iph = &tunnel->parms.iph;
672
673         if (iph->daddr) {
674                 struct flowi fl = { .oif = tunnel->parms.link,
675                                     .nl_u = { .ip4_u =
676                                               { .daddr = iph->daddr,
677                                                 .saddr = iph->saddr,
678                                                 .tos = RT_TOS(iph->tos) } },
679                                     .proto = IPPROTO_IPIP };
680                 struct rtable *rt;
681                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
682                         tdev = rt->u.dst.dev;
683                         ip_rt_put(rt);
684                 }
685                 dev->flags |= IFF_POINTOPOINT;
686         }
687
688         if (!tdev && tunnel->parms.link)
689                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
690
691         if (tdev) {
692                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
693                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
694         }
695         dev->iflink = tunnel->parms.link;
696 }
697
698 static int
699 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
700 {
701         int err = 0;
702         struct ip_tunnel_parm p;
703         struct ip_tunnel *t;
704         struct net *net = dev_net(dev);
705         struct ipip_net *ipn = net_generic(net, ipip_net_id);
706
707         switch (cmd) {
708         case SIOCGETTUNNEL:
709                 t = NULL;
710                 if (dev == ipn->fb_tunnel_dev) {
711                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
712                                 err = -EFAULT;
713                                 break;
714                         }
715                         t = ipip_tunnel_locate(net, &p, 0);
716                 }
717                 if (t == NULL)
718                         t = netdev_priv(dev);
719                 memcpy(&p, &t->parms, sizeof(p));
720                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
721                         err = -EFAULT;
722                 break;
723
724         case SIOCADDTUNNEL:
725         case SIOCCHGTUNNEL:
726                 err = -EPERM;
727                 if (!capable(CAP_NET_ADMIN))
728                         goto done;
729
730                 err = -EFAULT;
731                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
732                         goto done;
733
734                 err = -EINVAL;
735                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
736                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
737                         goto done;
738                 if (p.iph.ttl)
739                         p.iph.frag_off |= htons(IP_DF);
740
741                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
742
743                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
744                         if (t != NULL) {
745                                 if (t->dev != dev) {
746                                         err = -EEXIST;
747                                         break;
748                                 }
749                         } else {
750                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
751                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
752                                         err = -EINVAL;
753                                         break;
754                                 }
755                                 t = netdev_priv(dev);
756                                 ipip_tunnel_unlink(ipn, t);
757                                 t->parms.iph.saddr = p.iph.saddr;
758                                 t->parms.iph.daddr = p.iph.daddr;
759                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
760                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
761                                 ipip_tunnel_link(ipn, t);
762                                 netdev_state_change(dev);
763                         }
764                 }
765
766                 if (t) {
767                         err = 0;
768                         if (cmd == SIOCCHGTUNNEL) {
769                                 t->parms.iph.ttl = p.iph.ttl;
770                                 t->parms.iph.tos = p.iph.tos;
771                                 t->parms.iph.frag_off = p.iph.frag_off;
772                                 if (t->parms.link != p.link) {
773                                         t->parms.link = p.link;
774                                         ipip_tunnel_bind_dev(dev);
775                                         netdev_state_change(dev);
776                                 }
777                         }
778                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
779                                 err = -EFAULT;
780                 } else
781                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
782                 break;
783
784         case SIOCDELTUNNEL:
785                 err = -EPERM;
786                 if (!capable(CAP_NET_ADMIN))
787                         goto done;
788
789                 if (dev == ipn->fb_tunnel_dev) {
790                         err = -EFAULT;
791                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
792                                 goto done;
793                         err = -ENOENT;
794                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
795                                 goto done;
796                         err = -EPERM;
797                         if (t->dev == ipn->fb_tunnel_dev)
798                                 goto done;
799                         dev = t->dev;
800                 }
801                 unregister_netdevice(dev);
802                 err = 0;
803                 break;
804
805         default:
806                 err = -EINVAL;
807         }
808
809 done:
810         return err;
811 }
812
813 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
814 {
815         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
816 }
817
818 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
819 {
820         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
821                 return -EINVAL;
822         dev->mtu = new_mtu;
823         return 0;
824 }
825
826 static void ipip_tunnel_setup(struct net_device *dev)
827 {
828         dev->uninit             = ipip_tunnel_uninit;
829         dev->hard_start_xmit    = ipip_tunnel_xmit;
830         dev->get_stats          = ipip_tunnel_get_stats;
831         dev->do_ioctl           = ipip_tunnel_ioctl;
832         dev->change_mtu         = ipip_tunnel_change_mtu;
833         dev->destructor         = free_netdev;
834
835         dev->type               = ARPHRD_TUNNEL;
836         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
837         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
838         dev->flags              = IFF_NOARP;
839         dev->iflink             = 0;
840         dev->addr_len           = 4;
841 }
842
843 static int ipip_tunnel_init(struct net_device *dev)
844 {
845         struct ip_tunnel *tunnel;
846
847         tunnel = netdev_priv(dev);
848
849         tunnel->dev = dev;
850         strcpy(tunnel->parms.name, dev->name);
851
852         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
853         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
854
855         ipip_tunnel_bind_dev(dev);
856
857         return 0;
858 }
859
860 static int ipip_fb_tunnel_init(struct net_device *dev)
861 {
862         struct ip_tunnel *tunnel = netdev_priv(dev);
863         struct iphdr *iph = &tunnel->parms.iph;
864
865         tunnel->dev = dev;
866         strcpy(tunnel->parms.name, dev->name);
867
868         iph->version            = 4;
869         iph->protocol           = IPPROTO_IPIP;
870         iph->ihl                = 5;
871
872         dev_hold(dev);
873         tunnels_wc[0]           = tunnel;
874         return 0;
875 }
876
877 static struct xfrm_tunnel ipip_handler = {
878         .handler        =       ipip_rcv,
879         .err_handler    =       ipip_err,
880         .priority       =       1,
881 };
882
883 static char banner[] __initdata =
884         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
885
886 static int ipip_init_net(struct net *net)
887 {
888         int err;
889         struct ipip_net *ipn;
890
891         err = -ENOMEM;
892         ipn = kmalloc(sizeof(struct ipip_net), GFP_KERNEL);
893         if (ipn == NULL)
894                 goto err_alloc;
895
896         err = net_assign_generic(net, ipip_net_id, ipn);
897         if (err < 0)
898                 goto err_assign;
899
900         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
901                                            "tunl0",
902                                            ipip_tunnel_setup);
903         if (!ipn->fb_tunnel_dev) {
904                 err = -ENOMEM;
905                 goto err_alloc_dev;
906         }
907
908         ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
909         dev_net_set(ipn->fb_tunnel_dev, net);
910
911         if ((err = register_netdev(ipn->fb_tunnel_dev)))
912                 goto err_reg_dev;
913
914         return 0;
915
916 err_reg_dev:
917         free_netdev(ipn->fb_tunnel_dev);
918 err_alloc_dev:
919         /* nothing */
920 err_assign:
921         kfree(ipn);
922 err_alloc:
923         return err;
924 }
925
926 static void ipip_exit_net(struct net *net)
927 {
928         struct ipip_net *ipn;
929
930         ipn = net_generic(net, ipip_net_id);
931         rtnl_lock();
932         unregister_netdevice(ipn->fb_tunnel_dev);
933         rtnl_unlock();
934         kfree(ipn);
935 }
936
937 static struct pernet_operations ipip_net_ops = {
938         .init = ipip_init_net,
939         .exit = ipip_exit_net,
940 };
941
942 static int __init ipip_init(void)
943 {
944         int err;
945
946         printk(banner);
947
948         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
949                 printk(KERN_INFO "ipip init: can't register tunnel\n");
950                 return -EAGAIN;
951         }
952
953         err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
954         if (err)
955                 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
956
957         return err;
958 }
959
960 static void __exit ipip_destroy_tunnels(void)
961 {
962         int prio;
963
964         for (prio = 1; prio < 4; prio++) {
965                 int h;
966                 for (h = 0; h < HASH_SIZE; h++) {
967                         struct ip_tunnel *t;
968                         while ((t = tunnels[prio][h]) != NULL)
969                                 unregister_netdevice(t->dev);
970                 }
971         }
972 }
973
974 static void __exit ipip_fini(void)
975 {
976         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
977                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
978
979         rtnl_lock();
980         ipip_destroy_tunnels();
981         rtnl_unlock();
982
983         unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
984 }
985
986 module_init(ipip_init);
987 module_exit(ipip_fini);
988 MODULE_LICENSE("GPL");