]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipip.c
[IPIP]: Make the fallback tunnel device per-net.
[net-next-2.6.git] / net / ipv4 / ipip.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: IP/IP protocol decoder.
1da177e4
LT
3 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 *
9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29/* tunnel.c: an IP tunnel driver
30
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
33
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
e905a9ed 38
1da177e4
LT
39 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat
e905a9ed 45
1da177e4
LT
46 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47
48 Reworked:
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
e905a9ed 55
1da177e4
LT
56*/
57
58/* Things I wish I had known when writing the tunnel driver:
59
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
63
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
66
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
e905a9ed 78 find out how much more space you can allocate by calling
1da177e4
LT
79 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
87 */
88
89/*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92 For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
e905a9ed 95
4fc268d2 96#include <linux/capability.h>
1da177e4
LT
97#include <linux/module.h>
98#include <linux/types.h>
1da177e4
LT
99#include <linux/kernel.h>
100#include <asm/uaccess.h>
101#include <linux/skbuff.h>
102#include <linux/netdevice.h>
103#include <linux/in.h>
104#include <linux/tcp.h>
105#include <linux/udp.h>
106#include <linux/if_arp.h>
107#include <linux/mroute.h>
108#include <linux/init.h>
109#include <linux/netfilter_ipv4.h>
46f25dff 110#include <linux/if_ether.h>
1da177e4
LT
111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
1da177e4
LT
115#include <net/ipip.h>
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
10dc4c7b
PE
118#include <net/net_namespace.h>
119#include <net/netns/generic.h>
1da177e4
LT
120
121#define HASH_SIZE 16
d5a0a1e3 122#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 123
10dc4c7b
PE
124static int ipip_net_id;
125struct ipip_net {
b9855c54 126 struct net_device *fb_tunnel_dev;
10dc4c7b
PE
127};
128
1da177e4
LT
129static int ipip_fb_tunnel_init(struct net_device *dev);
130static int ipip_tunnel_init(struct net_device *dev);
131static void ipip_tunnel_setup(struct net_device *dev);
132
1da177e4
LT
133static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
134static struct ip_tunnel *tunnels_r[HASH_SIZE];
135static struct ip_tunnel *tunnels_l[HASH_SIZE];
136static struct ip_tunnel *tunnels_wc[1];
137static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
138
139static DEFINE_RWLOCK(ipip_lock);
140
d5a0a1e3 141static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
1da177e4
LT
142{
143 unsigned h0 = HASH(remote);
144 unsigned h1 = HASH(local);
145 struct ip_tunnel *t;
146
147 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
148 if (local == t->parms.iph.saddr &&
149 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150 return t;
151 }
152 for (t = tunnels_r[h0]; t; t = t->next) {
153 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
154 return t;
155 }
156 for (t = tunnels_l[h1]; t; t = t->next) {
157 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
158 return t;
159 }
160 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
161 return t;
162 return NULL;
163}
164
87d1a164 165static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
1da177e4 166{
87d1a164
YH
167 __be32 remote = parms->iph.daddr;
168 __be32 local = parms->iph.saddr;
1da177e4
LT
169 unsigned h = 0;
170 int prio = 0;
171
172 if (remote) {
173 prio |= 2;
174 h ^= HASH(remote);
175 }
176 if (local) {
177 prio |= 1;
178 h ^= HASH(local);
179 }
180 return &tunnels[prio][h];
181}
182
87d1a164
YH
183static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
184{
185 return __ipip_bucket(&t->parms);
186}
1da177e4
LT
187
188static void ipip_tunnel_unlink(struct ip_tunnel *t)
189{
190 struct ip_tunnel **tp;
191
192 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
193 if (t == *tp) {
194 write_lock_bh(&ipip_lock);
195 *tp = t->next;
196 write_unlock_bh(&ipip_lock);
197 break;
198 }
199 }
200}
201
202static void ipip_tunnel_link(struct ip_tunnel *t)
203{
204 struct ip_tunnel **tp = ipip_bucket(t);
205
206 t->next = *tp;
207 write_lock_bh(&ipip_lock);
208 *tp = t;
209 write_unlock_bh(&ipip_lock);
210}
211
212static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
213{
d5a0a1e3
AV
214 __be32 remote = parms->iph.daddr;
215 __be32 local = parms->iph.saddr;
1da177e4
LT
216 struct ip_tunnel *t, **tp, *nt;
217 struct net_device *dev;
1da177e4
LT
218 char name[IFNAMSIZ];
219
87d1a164 220 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
1da177e4
LT
221 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
222 return t;
223 }
224 if (!create)
225 return NULL;
226
227 if (parms->name[0])
228 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
229 else
230 sprintf(name, "tunl%%d");
1da177e4
LT
231
232 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
233 if (dev == NULL)
234 return NULL;
235
b37d428b
PE
236 if (strchr(name, '%')) {
237 if (dev_alloc_name(dev, name) < 0)
238 goto failed_free;
239 }
240
2941a486 241 nt = netdev_priv(dev);
1da177e4
LT
242 dev->init = ipip_tunnel_init;
243 nt->parms = *parms;
244
b37d428b
PE
245 if (register_netdevice(dev) < 0)
246 goto failed_free;
1da177e4
LT
247
248 dev_hold(dev);
249 ipip_tunnel_link(nt);
1da177e4
LT
250 return nt;
251
b37d428b
PE
252failed_free:
253 free_netdev(dev);
1da177e4
LT
254 return NULL;
255}
256
257static void ipip_tunnel_uninit(struct net_device *dev)
258{
b9855c54
PE
259 struct net *net = dev_net(dev);
260 struct ipip_net *ipn = net_generic(net, ipip_net_id);
261
262 if (dev == ipn->fb_tunnel_dev) {
1da177e4
LT
263 write_lock_bh(&ipip_lock);
264 tunnels_wc[0] = NULL;
265 write_unlock_bh(&ipip_lock);
266 } else
2941a486 267 ipip_tunnel_unlink(netdev_priv(dev));
1da177e4
LT
268 dev_put(dev);
269}
270
d2acc347 271static int ipip_err(struct sk_buff *skb, u32 info)
1da177e4
LT
272{
273#ifndef I_WISH_WORLD_WERE_PERFECT
274
275/* It is not :-( All the routers (except for Linux) return only
276 8 bytes of packet payload. It means, that precise relaying of
277 ICMP in the real Internet is absolutely infeasible.
278 */
279 struct iphdr *iph = (struct iphdr*)skb->data;
88c7664f
ACM
280 const int type = icmp_hdr(skb)->type;
281 const int code = icmp_hdr(skb)->code;
1da177e4 282 struct ip_tunnel *t;
d2acc347 283 int err;
1da177e4
LT
284
285 switch (type) {
286 default:
287 case ICMP_PARAMETERPROB:
d2acc347 288 return 0;
1da177e4
LT
289
290 case ICMP_DEST_UNREACH:
291 switch (code) {
292 case ICMP_SR_FAILED:
293 case ICMP_PORT_UNREACH:
294 /* Impossible event. */
d2acc347 295 return 0;
1da177e4
LT
296 case ICMP_FRAG_NEEDED:
297 /* Soft state for pmtu is maintained by IP core. */
d2acc347 298 return 0;
1da177e4
LT
299 default:
300 /* All others are translated to HOST_UNREACH.
301 rfc2003 contains "deep thoughts" about NET_UNREACH,
302 I believe they are just ether pollution. --ANK
303 */
304 break;
305 }
306 break;
307 case ICMP_TIME_EXCEEDED:
308 if (code != ICMP_EXC_TTL)
d2acc347 309 return 0;
1da177e4
LT
310 break;
311 }
312
d2acc347
HX
313 err = -ENOENT;
314
1da177e4
LT
315 read_lock(&ipip_lock);
316 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
317 if (t == NULL || t->parms.iph.daddr == 0)
318 goto out;
d2acc347
HX
319
320 err = 0;
1da177e4
LT
321 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
322 goto out;
323
324 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
325 t->err_count++;
326 else
327 t->err_count = 1;
328 t->err_time = jiffies;
329out:
330 read_unlock(&ipip_lock);
d2acc347 331 return err;
1da177e4
LT
332#else
333 struct iphdr *iph = (struct iphdr*)dp;
334 int hlen = iph->ihl<<2;
335 struct iphdr *eiph;
88c7664f
ACM
336 const int type = icmp_hdr(skb)->type;
337 const int code = icmp_hdr(skb)->code;
1da177e4
LT
338 int rel_type = 0;
339 int rel_code = 0;
c55e2f49
AV
340 __be32 rel_info = 0;
341 __u32 n = 0;
1da177e4
LT
342 struct sk_buff *skb2;
343 struct flowi fl;
344 struct rtable *rt;
345
346 if (len < hlen + sizeof(struct iphdr))
d2acc347 347 return 0;
1da177e4
LT
348 eiph = (struct iphdr*)(dp + hlen);
349
350 switch (type) {
351 default:
d2acc347 352 return 0;
1da177e4 353 case ICMP_PARAMETERPROB:
88c7664f 354 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
c55e2f49 355 if (n < hlen)
d2acc347 356 return 0;
1da177e4
LT
357
358 /* So... This guy found something strange INSIDE encapsulated
359 packet. Well, he is fool, but what can we do ?
360 */
361 rel_type = ICMP_PARAMETERPROB;
c55e2f49 362 rel_info = htonl((n - hlen) << 24);
1da177e4
LT
363 break;
364
365 case ICMP_DEST_UNREACH:
366 switch (code) {
367 case ICMP_SR_FAILED:
368 case ICMP_PORT_UNREACH:
369 /* Impossible event. */
d2acc347 370 return 0;
1da177e4
LT
371 case ICMP_FRAG_NEEDED:
372 /* And it is the only really necessary thing :-) */
88c7664f 373 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
c55e2f49 374 if (n < hlen+68)
d2acc347 375 return 0;
c55e2f49 376 n -= hlen;
1da177e4 377 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
c55e2f49 378 if (n > ntohs(eiph->tot_len))
d2acc347 379 return 0;
c55e2f49 380 rel_info = htonl(n);
1da177e4
LT
381 break;
382 default:
383 /* All others are translated to HOST_UNREACH.
384 rfc2003 contains "deep thoughts" about NET_UNREACH,
385 I believe, it is just ether pollution. --ANK
386 */
387 rel_type = ICMP_DEST_UNREACH;
388 rel_code = ICMP_HOST_UNREACH;
389 break;
390 }
391 break;
392 case ICMP_TIME_EXCEEDED:
393 if (code != ICMP_EXC_TTL)
d2acc347 394 return 0;
1da177e4
LT
395 break;
396 }
397
398 /* Prepare fake skb to feed it to icmp_send */
399 skb2 = skb_clone(skb, GFP_ATOMIC);
400 if (skb2 == NULL)
d2acc347 401 return 0;
1da177e4
LT
402 dst_release(skb2->dst);
403 skb2->dst = NULL;
404 skb_pull(skb2, skb->data - (u8*)eiph);
c1d2bbe1 405 skb_reset_network_header(skb2);
1da177e4
LT
406
407 /* Try to guess incoming interface */
408 memset(&fl, 0, sizeof(fl));
409 fl.fl4_daddr = eiph->saddr;
410 fl.fl4_tos = RT_TOS(eiph->tos);
411 fl.proto = IPPROTO_IPIP;
f206351a 412 if (ip_route_output_key(&init_net, &rt, &key)) {
1da177e4 413 kfree_skb(skb2);
d2acc347 414 return 0;
1da177e4
LT
415 }
416 skb2->dev = rt->u.dst.dev;
417
418 /* route "incoming" packet */
419 if (rt->rt_flags&RTCF_LOCAL) {
420 ip_rt_put(rt);
421 rt = NULL;
422 fl.fl4_daddr = eiph->daddr;
423 fl.fl4_src = eiph->saddr;
424 fl.fl4_tos = eiph->tos;
f206351a 425 if (ip_route_output_key(&init_net, &rt, &fl) ||
1da177e4
LT
426 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
427 ip_rt_put(rt);
428 kfree_skb(skb2);
d2acc347 429 return 0;
1da177e4
LT
430 }
431 } else {
432 ip_rt_put(rt);
433 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
434 skb2->dst->dev->type != ARPHRD_TUNNEL) {
435 kfree_skb(skb2);
d2acc347 436 return 0;
1da177e4
LT
437 }
438 }
439
440 /* change mtu on this route */
441 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
c55e2f49 442 if (n > dst_mtu(skb2->dst)) {
1da177e4 443 kfree_skb(skb2);
d2acc347 444 return 0;
1da177e4 445 }
c55e2f49 446 skb2->dst->ops->update_pmtu(skb2->dst, n);
1da177e4 447 } else if (type == ICMP_TIME_EXCEEDED) {
2941a486 448 struct ip_tunnel *t = netdev_priv(skb2->dev);
1da177e4
LT
449 if (t->parms.iph.ttl) {
450 rel_type = ICMP_DEST_UNREACH;
451 rel_code = ICMP_HOST_UNREACH;
452 }
453 }
454
455 icmp_send(skb2, rel_type, rel_code, rel_info);
456 kfree_skb(skb2);
d2acc347 457 return 0;
1da177e4
LT
458#endif
459}
460
eddc9ec5
ACM
461static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
462 struct sk_buff *skb)
1da177e4 463{
eddc9ec5 464 struct iphdr *inner_iph = ip_hdr(skb);
1da177e4
LT
465
466 if (INET_ECN_is_ce(outer_iph->tos))
467 IP_ECN_set_ce(inner_iph);
468}
469
470static int ipip_rcv(struct sk_buff *skb)
471{
1da177e4 472 struct ip_tunnel *tunnel;
eddc9ec5 473 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
474
475 read_lock(&ipip_lock);
476 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
477 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
478 read_unlock(&ipip_lock);
479 kfree_skb(skb);
480 return 0;
481 }
482
483 secpath_reset(skb);
484
b0e380b1 485 skb->mac_header = skb->network_header;
c1d2bbe1 486 skb_reset_network_header(skb);
1da177e4
LT
487 skb->protocol = htons(ETH_P_IP);
488 skb->pkt_type = PACKET_HOST;
489
490 tunnel->stat.rx_packets++;
491 tunnel->stat.rx_bytes += skb->len;
492 skb->dev = tunnel->dev;
493 dst_release(skb->dst);
494 skb->dst = NULL;
495 nf_reset(skb);
496 ipip_ecn_decapsulate(iph, skb);
497 netif_rx(skb);
498 read_unlock(&ipip_lock);
499 return 0;
500 }
501 read_unlock(&ipip_lock);
502
1da177e4
LT
503 return -1;
504}
505
506/*
507 * This function assumes it is being called from dev_queue_xmit()
508 * and that skb is filled properly by that function.
509 */
510
511static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
512{
2941a486 513 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
514 struct net_device_stats *stats = &tunnel->stat;
515 struct iphdr *tiph = &tunnel->parms.iph;
516 u8 tos = tunnel->parms.iph.tos;
d5a0a1e3 517 __be16 df = tiph->frag_off;
1da177e4
LT
518 struct rtable *rt; /* Route to the other host */
519 struct net_device *tdev; /* Device to other host */
eddc9ec5 520 struct iphdr *old_iph = ip_hdr(skb);
1da177e4 521 struct iphdr *iph; /* Our new IP header */
c2636b4d 522 unsigned int max_headroom; /* The extra header space needed */
d5a0a1e3 523 __be32 dst = tiph->daddr;
1da177e4
LT
524 int mtu;
525
526 if (tunnel->recursion++) {
527 tunnel->stat.collisions++;
528 goto tx_error;
529 }
530
531 if (skb->protocol != htons(ETH_P_IP))
532 goto tx_error;
533
534 if (tos&1)
535 tos = old_iph->tos;
536
537 if (!dst) {
538 /* NBMA tunnel */
ee6b9673 539 if ((rt = skb->rtable) == NULL) {
1da177e4
LT
540 tunnel->stat.tx_fifo_errors++;
541 goto tx_error;
542 }
543 if ((dst = rt->rt_gateway) == 0)
544 goto tx_error_icmp;
545 }
546
547 {
548 struct flowi fl = { .oif = tunnel->parms.link,
549 .nl_u = { .ip4_u =
550 { .daddr = dst,
551 .saddr = tiph->saddr,
552 .tos = RT_TOS(tos) } },
553 .proto = IPPROTO_IPIP };
f206351a 554 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
555 tunnel->stat.tx_carrier_errors++;
556 goto tx_error_icmp;
557 }
558 }
559 tdev = rt->u.dst.dev;
560
561 if (tdev == dev) {
562 ip_rt_put(rt);
563 tunnel->stat.collisions++;
564 goto tx_error;
565 }
566
567 if (tiph->frag_off)
568 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
569 else
570 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
571
572 if (mtu < 68) {
573 tunnel->stat.collisions++;
574 ip_rt_put(rt);
575 goto tx_error;
576 }
577 if (skb->dst)
578 skb->dst->ops->update_pmtu(skb->dst, mtu);
579
580 df |= (old_iph->frag_off&htons(IP_DF));
581
582 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
583 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
584 ip_rt_put(rt);
585 goto tx_error;
586 }
587
588 if (tunnel->err_count > 0) {
589 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
590 tunnel->err_count--;
591 dst_link_failure(skb);
592 } else
593 tunnel->err_count = 0;
594 }
595
596 /*
597 * Okay, now see if we can stuff it in the buffer as-is.
598 */
599 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
600
cfbba49d
PM
601 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
602 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
603 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
604 if (!new_skb) {
605 ip_rt_put(rt);
e905a9ed 606 stats->tx_dropped++;
1da177e4
LT
607 dev_kfree_skb(skb);
608 tunnel->recursion--;
609 return 0;
610 }
611 if (skb->sk)
612 skb_set_owner_w(new_skb, skb->sk);
613 dev_kfree_skb(skb);
614 skb = new_skb;
eddc9ec5 615 old_iph = ip_hdr(skb);
1da177e4
LT
616 }
617
b0e380b1 618 skb->transport_header = skb->network_header;
e2d1bca7
ACM
619 skb_push(skb, sizeof(struct iphdr));
620 skb_reset_network_header(skb);
1da177e4 621 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
622 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
623 IPSKB_REROUTED);
1da177e4
LT
624 dst_release(skb->dst);
625 skb->dst = &rt->u.dst;
626
627 /*
628 * Push down and install the IPIP header.
629 */
630
eddc9ec5 631 iph = ip_hdr(skb);
1da177e4
LT
632 iph->version = 4;
633 iph->ihl = sizeof(struct iphdr)>>2;
634 iph->frag_off = df;
635 iph->protocol = IPPROTO_IPIP;
636 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
637 iph->daddr = rt->rt_dst;
638 iph->saddr = rt->rt_src;
639
640 if ((iph->ttl = tiph->ttl) == 0)
641 iph->ttl = old_iph->ttl;
642
643 nf_reset(skb);
644
645 IPTUNNEL_XMIT();
646 tunnel->recursion--;
647 return 0;
648
649tx_error_icmp:
650 dst_link_failure(skb);
651tx_error:
652 stats->tx_errors++;
653 dev_kfree_skb(skb);
654 tunnel->recursion--;
655 return 0;
656}
657
5533995b
MS
658static void ipip_tunnel_bind_dev(struct net_device *dev)
659{
660 struct net_device *tdev = NULL;
661 struct ip_tunnel *tunnel;
662 struct iphdr *iph;
663
664 tunnel = netdev_priv(dev);
665 iph = &tunnel->parms.iph;
666
667 if (iph->daddr) {
668 struct flowi fl = { .oif = tunnel->parms.link,
669 .nl_u = { .ip4_u =
670 { .daddr = iph->daddr,
671 .saddr = iph->saddr,
672 .tos = RT_TOS(iph->tos) } },
673 .proto = IPPROTO_IPIP };
674 struct rtable *rt;
f206351a 675 if (!ip_route_output_key(&init_net, &rt, &fl)) {
5533995b
MS
676 tdev = rt->u.dst.dev;
677 ip_rt_put(rt);
678 }
679 dev->flags |= IFF_POINTOPOINT;
680 }
681
682 if (!tdev && tunnel->parms.link)
683 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
684
685 if (tdev) {
686 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
687 dev->mtu = tdev->mtu - sizeof(struct iphdr);
688 }
689 dev->iflink = tunnel->parms.link;
690}
691
1da177e4
LT
692static int
693ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
694{
695 int err = 0;
696 struct ip_tunnel_parm p;
697 struct ip_tunnel *t;
b9855c54
PE
698 struct net *net = dev_net(dev);
699 struct ipip_net *ipn = net_generic(net, ipip_net_id);
1da177e4
LT
700
701 switch (cmd) {
702 case SIOCGETTUNNEL:
703 t = NULL;
b9855c54 704 if (dev == ipn->fb_tunnel_dev) {
1da177e4
LT
705 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
706 err = -EFAULT;
707 break;
708 }
709 t = ipip_tunnel_locate(&p, 0);
710 }
711 if (t == NULL)
2941a486 712 t = netdev_priv(dev);
1da177e4
LT
713 memcpy(&p, &t->parms, sizeof(p));
714 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
715 err = -EFAULT;
716 break;
717
718 case SIOCADDTUNNEL:
719 case SIOCCHGTUNNEL:
720 err = -EPERM;
721 if (!capable(CAP_NET_ADMIN))
722 goto done;
723
724 err = -EFAULT;
725 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
726 goto done;
727
728 err = -EINVAL;
729 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
730 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
731 goto done;
732 if (p.iph.ttl)
733 p.iph.frag_off |= htons(IP_DF);
734
735 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
736
b9855c54 737 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
738 if (t != NULL) {
739 if (t->dev != dev) {
740 err = -EEXIST;
741 break;
742 }
743 } else {
744 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
745 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
746 err = -EINVAL;
747 break;
748 }
2941a486 749 t = netdev_priv(dev);
1da177e4
LT
750 ipip_tunnel_unlink(t);
751 t->parms.iph.saddr = p.iph.saddr;
752 t->parms.iph.daddr = p.iph.daddr;
753 memcpy(dev->dev_addr, &p.iph.saddr, 4);
754 memcpy(dev->broadcast, &p.iph.daddr, 4);
755 ipip_tunnel_link(t);
756 netdev_state_change(dev);
757 }
758 }
759
760 if (t) {
761 err = 0;
762 if (cmd == SIOCCHGTUNNEL) {
763 t->parms.iph.ttl = p.iph.ttl;
764 t->parms.iph.tos = p.iph.tos;
765 t->parms.iph.frag_off = p.iph.frag_off;
5533995b
MS
766 if (t->parms.link != p.link) {
767 t->parms.link = p.link;
768 ipip_tunnel_bind_dev(dev);
769 netdev_state_change(dev);
770 }
1da177e4
LT
771 }
772 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
773 err = -EFAULT;
774 } else
775 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
776 break;
777
778 case SIOCDELTUNNEL:
779 err = -EPERM;
780 if (!capable(CAP_NET_ADMIN))
781 goto done;
782
b9855c54 783 if (dev == ipn->fb_tunnel_dev) {
1da177e4
LT
784 err = -EFAULT;
785 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
786 goto done;
787 err = -ENOENT;
788 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
789 goto done;
790 err = -EPERM;
b9855c54 791 if (t->dev == ipn->fb_tunnel_dev)
1da177e4
LT
792 goto done;
793 dev = t->dev;
794 }
22f8cde5
SH
795 unregister_netdevice(dev);
796 err = 0;
1da177e4
LT
797 break;
798
799 default:
800 err = -EINVAL;
801 }
802
803done:
804 return err;
805}
806
807static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
808{
2941a486 809 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1da177e4
LT
810}
811
812static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
813{
814 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
815 return -EINVAL;
816 dev->mtu = new_mtu;
817 return 0;
818}
819
820static void ipip_tunnel_setup(struct net_device *dev)
821{
1da177e4
LT
822 dev->uninit = ipip_tunnel_uninit;
823 dev->hard_start_xmit = ipip_tunnel_xmit;
824 dev->get_stats = ipip_tunnel_get_stats;
825 dev->do_ioctl = ipip_tunnel_ioctl;
826 dev->change_mtu = ipip_tunnel_change_mtu;
827 dev->destructor = free_netdev;
828
829 dev->type = ARPHRD_TUNNEL;
830 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
46f25dff 831 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
1da177e4
LT
832 dev->flags = IFF_NOARP;
833 dev->iflink = 0;
834 dev->addr_len = 4;
835}
836
837static int ipip_tunnel_init(struct net_device *dev)
838{
1da177e4 839 struct ip_tunnel *tunnel;
1da177e4 840
2941a486 841 tunnel = netdev_priv(dev);
1da177e4
LT
842
843 tunnel->dev = dev;
844 strcpy(tunnel->parms.name, dev->name);
845
846 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
847 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
848
5533995b 849 ipip_tunnel_bind_dev(dev);
1da177e4
LT
850
851 return 0;
852}
853
b9855c54 854static int ipip_fb_tunnel_init(struct net_device *dev)
1da177e4 855{
2941a486 856 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
857 struct iphdr *iph = &tunnel->parms.iph;
858
859 tunnel->dev = dev;
860 strcpy(tunnel->parms.name, dev->name);
861
862 iph->version = 4;
863 iph->protocol = IPPROTO_IPIP;
864 iph->ihl = 5;
865
866 dev_hold(dev);
867 tunnels_wc[0] = tunnel;
868 return 0;
869}
870
871static struct xfrm_tunnel ipip_handler = {
872 .handler = ipip_rcv,
873 .err_handler = ipip_err,
d2acc347 874 .priority = 1,
1da177e4
LT
875};
876
877static char banner[] __initdata =
878 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
879
10dc4c7b
PE
880static int ipip_init_net(struct net *net)
881{
882 int err;
883 struct ipip_net *ipn;
884
885 err = -ENOMEM;
886 ipn = kmalloc(sizeof(struct ipip_net), GFP_KERNEL);
887 if (ipn == NULL)
888 goto err_alloc;
889
890 err = net_assign_generic(net, ipip_net_id, ipn);
891 if (err < 0)
892 goto err_assign;
893
b9855c54
PE
894 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
895 "tunl0",
896 ipip_tunnel_setup);
897 if (!ipn->fb_tunnel_dev) {
898 err = -ENOMEM;
899 goto err_alloc_dev;
900 }
901
902 ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
903 dev_net_set(ipn->fb_tunnel_dev, net);
904
905 if ((err = register_netdev(ipn->fb_tunnel_dev)))
906 goto err_reg_dev;
907
10dc4c7b
PE
908 return 0;
909
b9855c54
PE
910err_reg_dev:
911 free_netdev(ipn->fb_tunnel_dev);
912err_alloc_dev:
913 /* nothing */
10dc4c7b
PE
914err_assign:
915 kfree(ipn);
916err_alloc:
917 return err;
918}
919
920static void ipip_exit_net(struct net *net)
921{
922 struct ipip_net *ipn;
923
924 ipn = net_generic(net, ipip_net_id);
b9855c54
PE
925 rtnl_lock();
926 unregister_netdevice(ipn->fb_tunnel_dev);
927 rtnl_unlock();
10dc4c7b
PE
928 kfree(ipn);
929}
930
931static struct pernet_operations ipip_net_ops = {
932 .init = ipip_init_net,
933 .exit = ipip_exit_net,
934};
935
1da177e4
LT
936static int __init ipip_init(void)
937{
938 int err;
939
940 printk(banner);
941
c0d56408 942 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
1da177e4
LT
943 printk(KERN_INFO "ipip init: can't register tunnel\n");
944 return -EAGAIN;
945 }
946
10dc4c7b
PE
947 err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
948 if (err)
b9855c54
PE
949 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
950
1da177e4 951 return err;
1da177e4
LT
952}
953
db44575f
AK
954static void __exit ipip_destroy_tunnels(void)
955{
956 int prio;
957
958 for (prio = 1; prio < 4; prio++) {
959 int h;
960 for (h = 0; h < HASH_SIZE; h++) {
961 struct ip_tunnel *t;
962 while ((t = tunnels[prio][h]) != NULL)
963 unregister_netdevice(t->dev);
964 }
965 }
966}
967
1da177e4
LT
968static void __exit ipip_fini(void)
969{
c0d56408 970 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1da177e4
LT
971 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
972
db44575f
AK
973 rtnl_lock();
974 ipip_destroy_tunnels();
db44575f 975 rtnl_unlock();
10dc4c7b
PE
976
977 unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
1da177e4
LT
978}
979
980module_init(ipip_init);
981module_exit(ipip_fini);
982MODULE_LICENSE("GPL");