]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipip.c
[VLAN]: Handle vlan devices net namespace changing.
[net-next-2.6.git] / net / ipv4 / ipip.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: IP/IP protocol decoder.
1da177e4
LT
3 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 *
9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29/* tunnel.c: an IP tunnel driver
30
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
33
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
e905a9ed 38
1da177e4
LT
39 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat
e905a9ed 45
1da177e4
LT
46 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47
48 Reworked:
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
e905a9ed 55
1da177e4
LT
56*/
57
58/* Things I wish I had known when writing the tunnel driver:
59
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
63
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
66
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
e905a9ed 78 find out how much more space you can allocate by calling
1da177e4
LT
79 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
87 */
88
89/*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92 For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
e905a9ed 95
4fc268d2 96#include <linux/capability.h>
1da177e4
LT
97#include <linux/module.h>
98#include <linux/types.h>
1da177e4
LT
99#include <linux/kernel.h>
100#include <asm/uaccess.h>
101#include <linux/skbuff.h>
102#include <linux/netdevice.h>
103#include <linux/in.h>
104#include <linux/tcp.h>
105#include <linux/udp.h>
106#include <linux/if_arp.h>
107#include <linux/mroute.h>
108#include <linux/init.h>
109#include <linux/netfilter_ipv4.h>
46f25dff 110#include <linux/if_ether.h>
1da177e4
LT
111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
1da177e4
LT
115#include <net/ipip.h>
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
118
119#define HASH_SIZE 16
d5a0a1e3 120#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4
LT
121
122static int ipip_fb_tunnel_init(struct net_device *dev);
123static int ipip_tunnel_init(struct net_device *dev);
124static void ipip_tunnel_setup(struct net_device *dev);
125
126static struct net_device *ipip_fb_tunnel_dev;
127
128static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129static struct ip_tunnel *tunnels_r[HASH_SIZE];
130static struct ip_tunnel *tunnels_l[HASH_SIZE];
131static struct ip_tunnel *tunnels_wc[1];
132static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134static DEFINE_RWLOCK(ipip_lock);
135
d5a0a1e3 136static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
1da177e4
LT
137{
138 unsigned h0 = HASH(remote);
139 unsigned h1 = HASH(local);
140 struct ip_tunnel *t;
141
142 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 if (local == t->parms.iph.saddr &&
144 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145 return t;
146 }
147 for (t = tunnels_r[h0]; t; t = t->next) {
148 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149 return t;
150 }
151 for (t = tunnels_l[h1]; t; t = t->next) {
152 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153 return t;
154 }
155 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156 return t;
157 return NULL;
158}
159
87d1a164 160static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
1da177e4 161{
87d1a164
YH
162 __be32 remote = parms->iph.daddr;
163 __be32 local = parms->iph.saddr;
1da177e4
LT
164 unsigned h = 0;
165 int prio = 0;
166
167 if (remote) {
168 prio |= 2;
169 h ^= HASH(remote);
170 }
171 if (local) {
172 prio |= 1;
173 h ^= HASH(local);
174 }
175 return &tunnels[prio][h];
176}
177
87d1a164
YH
178static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179{
180 return __ipip_bucket(&t->parms);
181}
1da177e4
LT
182
183static void ipip_tunnel_unlink(struct ip_tunnel *t)
184{
185 struct ip_tunnel **tp;
186
187 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188 if (t == *tp) {
189 write_lock_bh(&ipip_lock);
190 *tp = t->next;
191 write_unlock_bh(&ipip_lock);
192 break;
193 }
194 }
195}
196
197static void ipip_tunnel_link(struct ip_tunnel *t)
198{
199 struct ip_tunnel **tp = ipip_bucket(t);
200
201 t->next = *tp;
202 write_lock_bh(&ipip_lock);
203 *tp = t;
204 write_unlock_bh(&ipip_lock);
205}
206
207static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208{
d5a0a1e3
AV
209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
1da177e4
LT
211 struct ip_tunnel *t, **tp, *nt;
212 struct net_device *dev;
1da177e4
LT
213 char name[IFNAMSIZ];
214
87d1a164 215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
1da177e4
LT
216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217 return t;
218 }
219 if (!create)
220 return NULL;
221
222 if (parms->name[0])
223 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
224 else
225 sprintf(name, "tunl%%d");
1da177e4
LT
226
227 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
228 if (dev == NULL)
229 return NULL;
230
b37d428b
PE
231 if (strchr(name, '%')) {
232 if (dev_alloc_name(dev, name) < 0)
233 goto failed_free;
234 }
235
2941a486 236 nt = netdev_priv(dev);
1da177e4
LT
237 dev->init = ipip_tunnel_init;
238 nt->parms = *parms;
239
b37d428b
PE
240 if (register_netdevice(dev) < 0)
241 goto failed_free;
1da177e4
LT
242
243 dev_hold(dev);
244 ipip_tunnel_link(nt);
1da177e4
LT
245 return nt;
246
b37d428b
PE
247failed_free:
248 free_netdev(dev);
1da177e4
LT
249 return NULL;
250}
251
252static void ipip_tunnel_uninit(struct net_device *dev)
253{
254 if (dev == ipip_fb_tunnel_dev) {
255 write_lock_bh(&ipip_lock);
256 tunnels_wc[0] = NULL;
257 write_unlock_bh(&ipip_lock);
258 } else
2941a486 259 ipip_tunnel_unlink(netdev_priv(dev));
1da177e4
LT
260 dev_put(dev);
261}
262
d2acc347 263static int ipip_err(struct sk_buff *skb, u32 info)
1da177e4
LT
264{
265#ifndef I_WISH_WORLD_WERE_PERFECT
266
267/* It is not :-( All the routers (except for Linux) return only
268 8 bytes of packet payload. It means, that precise relaying of
269 ICMP in the real Internet is absolutely infeasible.
270 */
271 struct iphdr *iph = (struct iphdr*)skb->data;
88c7664f
ACM
272 const int type = icmp_hdr(skb)->type;
273 const int code = icmp_hdr(skb)->code;
1da177e4 274 struct ip_tunnel *t;
d2acc347 275 int err;
1da177e4
LT
276
277 switch (type) {
278 default:
279 case ICMP_PARAMETERPROB:
d2acc347 280 return 0;
1da177e4
LT
281
282 case ICMP_DEST_UNREACH:
283 switch (code) {
284 case ICMP_SR_FAILED:
285 case ICMP_PORT_UNREACH:
286 /* Impossible event. */
d2acc347 287 return 0;
1da177e4
LT
288 case ICMP_FRAG_NEEDED:
289 /* Soft state for pmtu is maintained by IP core. */
d2acc347 290 return 0;
1da177e4
LT
291 default:
292 /* All others are translated to HOST_UNREACH.
293 rfc2003 contains "deep thoughts" about NET_UNREACH,
294 I believe they are just ether pollution. --ANK
295 */
296 break;
297 }
298 break;
299 case ICMP_TIME_EXCEEDED:
300 if (code != ICMP_EXC_TTL)
d2acc347 301 return 0;
1da177e4
LT
302 break;
303 }
304
d2acc347
HX
305 err = -ENOENT;
306
1da177e4
LT
307 read_lock(&ipip_lock);
308 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
309 if (t == NULL || t->parms.iph.daddr == 0)
310 goto out;
d2acc347
HX
311
312 err = 0;
1da177e4
LT
313 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
314 goto out;
315
316 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
317 t->err_count++;
318 else
319 t->err_count = 1;
320 t->err_time = jiffies;
321out:
322 read_unlock(&ipip_lock);
d2acc347 323 return err;
1da177e4
LT
324#else
325 struct iphdr *iph = (struct iphdr*)dp;
326 int hlen = iph->ihl<<2;
327 struct iphdr *eiph;
88c7664f
ACM
328 const int type = icmp_hdr(skb)->type;
329 const int code = icmp_hdr(skb)->code;
1da177e4
LT
330 int rel_type = 0;
331 int rel_code = 0;
c55e2f49
AV
332 __be32 rel_info = 0;
333 __u32 n = 0;
1da177e4
LT
334 struct sk_buff *skb2;
335 struct flowi fl;
336 struct rtable *rt;
337
338 if (len < hlen + sizeof(struct iphdr))
d2acc347 339 return 0;
1da177e4
LT
340 eiph = (struct iphdr*)(dp + hlen);
341
342 switch (type) {
343 default:
d2acc347 344 return 0;
1da177e4 345 case ICMP_PARAMETERPROB:
88c7664f 346 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
c55e2f49 347 if (n < hlen)
d2acc347 348 return 0;
1da177e4
LT
349
350 /* So... This guy found something strange INSIDE encapsulated
351 packet. Well, he is fool, but what can we do ?
352 */
353 rel_type = ICMP_PARAMETERPROB;
c55e2f49 354 rel_info = htonl((n - hlen) << 24);
1da177e4
LT
355 break;
356
357 case ICMP_DEST_UNREACH:
358 switch (code) {
359 case ICMP_SR_FAILED:
360 case ICMP_PORT_UNREACH:
361 /* Impossible event. */
d2acc347 362 return 0;
1da177e4
LT
363 case ICMP_FRAG_NEEDED:
364 /* And it is the only really necessary thing :-) */
88c7664f 365 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
c55e2f49 366 if (n < hlen+68)
d2acc347 367 return 0;
c55e2f49 368 n -= hlen;
1da177e4 369 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
c55e2f49 370 if (n > ntohs(eiph->tot_len))
d2acc347 371 return 0;
c55e2f49 372 rel_info = htonl(n);
1da177e4
LT
373 break;
374 default:
375 /* All others are translated to HOST_UNREACH.
376 rfc2003 contains "deep thoughts" about NET_UNREACH,
377 I believe, it is just ether pollution. --ANK
378 */
379 rel_type = ICMP_DEST_UNREACH;
380 rel_code = ICMP_HOST_UNREACH;
381 break;
382 }
383 break;
384 case ICMP_TIME_EXCEEDED:
385 if (code != ICMP_EXC_TTL)
d2acc347 386 return 0;
1da177e4
LT
387 break;
388 }
389
390 /* Prepare fake skb to feed it to icmp_send */
391 skb2 = skb_clone(skb, GFP_ATOMIC);
392 if (skb2 == NULL)
d2acc347 393 return 0;
1da177e4
LT
394 dst_release(skb2->dst);
395 skb2->dst = NULL;
396 skb_pull(skb2, skb->data - (u8*)eiph);
c1d2bbe1 397 skb_reset_network_header(skb2);
1da177e4
LT
398
399 /* Try to guess incoming interface */
400 memset(&fl, 0, sizeof(fl));
401 fl.fl4_daddr = eiph->saddr;
402 fl.fl4_tos = RT_TOS(eiph->tos);
403 fl.proto = IPPROTO_IPIP;
f206351a 404 if (ip_route_output_key(&init_net, &rt, &key)) {
1da177e4 405 kfree_skb(skb2);
d2acc347 406 return 0;
1da177e4
LT
407 }
408 skb2->dev = rt->u.dst.dev;
409
410 /* route "incoming" packet */
411 if (rt->rt_flags&RTCF_LOCAL) {
412 ip_rt_put(rt);
413 rt = NULL;
414 fl.fl4_daddr = eiph->daddr;
415 fl.fl4_src = eiph->saddr;
416 fl.fl4_tos = eiph->tos;
f206351a 417 if (ip_route_output_key(&init_net, &rt, &fl) ||
1da177e4
LT
418 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
419 ip_rt_put(rt);
420 kfree_skb(skb2);
d2acc347 421 return 0;
1da177e4
LT
422 }
423 } else {
424 ip_rt_put(rt);
425 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
426 skb2->dst->dev->type != ARPHRD_TUNNEL) {
427 kfree_skb(skb2);
d2acc347 428 return 0;
1da177e4
LT
429 }
430 }
431
432 /* change mtu on this route */
433 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
c55e2f49 434 if (n > dst_mtu(skb2->dst)) {
1da177e4 435 kfree_skb(skb2);
d2acc347 436 return 0;
1da177e4 437 }
c55e2f49 438 skb2->dst->ops->update_pmtu(skb2->dst, n);
1da177e4 439 } else if (type == ICMP_TIME_EXCEEDED) {
2941a486 440 struct ip_tunnel *t = netdev_priv(skb2->dev);
1da177e4
LT
441 if (t->parms.iph.ttl) {
442 rel_type = ICMP_DEST_UNREACH;
443 rel_code = ICMP_HOST_UNREACH;
444 }
445 }
446
447 icmp_send(skb2, rel_type, rel_code, rel_info);
448 kfree_skb(skb2);
d2acc347 449 return 0;
1da177e4
LT
450#endif
451}
452
eddc9ec5
ACM
453static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
454 struct sk_buff *skb)
1da177e4 455{
eddc9ec5 456 struct iphdr *inner_iph = ip_hdr(skb);
1da177e4
LT
457
458 if (INET_ECN_is_ce(outer_iph->tos))
459 IP_ECN_set_ce(inner_iph);
460}
461
462static int ipip_rcv(struct sk_buff *skb)
463{
1da177e4 464 struct ip_tunnel *tunnel;
eddc9ec5 465 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
466
467 read_lock(&ipip_lock);
468 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
469 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
470 read_unlock(&ipip_lock);
471 kfree_skb(skb);
472 return 0;
473 }
474
475 secpath_reset(skb);
476
b0e380b1 477 skb->mac_header = skb->network_header;
c1d2bbe1 478 skb_reset_network_header(skb);
1da177e4
LT
479 skb->protocol = htons(ETH_P_IP);
480 skb->pkt_type = PACKET_HOST;
481
482 tunnel->stat.rx_packets++;
483 tunnel->stat.rx_bytes += skb->len;
484 skb->dev = tunnel->dev;
485 dst_release(skb->dst);
486 skb->dst = NULL;
487 nf_reset(skb);
488 ipip_ecn_decapsulate(iph, skb);
489 netif_rx(skb);
490 read_unlock(&ipip_lock);
491 return 0;
492 }
493 read_unlock(&ipip_lock);
494
1da177e4
LT
495 return -1;
496}
497
498/*
499 * This function assumes it is being called from dev_queue_xmit()
500 * and that skb is filled properly by that function.
501 */
502
503static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
504{
2941a486 505 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
506 struct net_device_stats *stats = &tunnel->stat;
507 struct iphdr *tiph = &tunnel->parms.iph;
508 u8 tos = tunnel->parms.iph.tos;
d5a0a1e3 509 __be16 df = tiph->frag_off;
1da177e4
LT
510 struct rtable *rt; /* Route to the other host */
511 struct net_device *tdev; /* Device to other host */
eddc9ec5 512 struct iphdr *old_iph = ip_hdr(skb);
1da177e4 513 struct iphdr *iph; /* Our new IP header */
c2636b4d 514 unsigned int max_headroom; /* The extra header space needed */
d5a0a1e3 515 __be32 dst = tiph->daddr;
1da177e4
LT
516 int mtu;
517
518 if (tunnel->recursion++) {
519 tunnel->stat.collisions++;
520 goto tx_error;
521 }
522
523 if (skb->protocol != htons(ETH_P_IP))
524 goto tx_error;
525
526 if (tos&1)
527 tos = old_iph->tos;
528
529 if (!dst) {
530 /* NBMA tunnel */
ee6b9673 531 if ((rt = skb->rtable) == NULL) {
1da177e4
LT
532 tunnel->stat.tx_fifo_errors++;
533 goto tx_error;
534 }
535 if ((dst = rt->rt_gateway) == 0)
536 goto tx_error_icmp;
537 }
538
539 {
540 struct flowi fl = { .oif = tunnel->parms.link,
541 .nl_u = { .ip4_u =
542 { .daddr = dst,
543 .saddr = tiph->saddr,
544 .tos = RT_TOS(tos) } },
545 .proto = IPPROTO_IPIP };
f206351a 546 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
547 tunnel->stat.tx_carrier_errors++;
548 goto tx_error_icmp;
549 }
550 }
551 tdev = rt->u.dst.dev;
552
553 if (tdev == dev) {
554 ip_rt_put(rt);
555 tunnel->stat.collisions++;
556 goto tx_error;
557 }
558
559 if (tiph->frag_off)
560 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
561 else
562 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
563
564 if (mtu < 68) {
565 tunnel->stat.collisions++;
566 ip_rt_put(rt);
567 goto tx_error;
568 }
569 if (skb->dst)
570 skb->dst->ops->update_pmtu(skb->dst, mtu);
571
572 df |= (old_iph->frag_off&htons(IP_DF));
573
574 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
575 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
576 ip_rt_put(rt);
577 goto tx_error;
578 }
579
580 if (tunnel->err_count > 0) {
581 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
582 tunnel->err_count--;
583 dst_link_failure(skb);
584 } else
585 tunnel->err_count = 0;
586 }
587
588 /*
589 * Okay, now see if we can stuff it in the buffer as-is.
590 */
591 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
592
cfbba49d
PM
593 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
594 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
595 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
596 if (!new_skb) {
597 ip_rt_put(rt);
e905a9ed 598 stats->tx_dropped++;
1da177e4
LT
599 dev_kfree_skb(skb);
600 tunnel->recursion--;
601 return 0;
602 }
603 if (skb->sk)
604 skb_set_owner_w(new_skb, skb->sk);
605 dev_kfree_skb(skb);
606 skb = new_skb;
eddc9ec5 607 old_iph = ip_hdr(skb);
1da177e4
LT
608 }
609
b0e380b1 610 skb->transport_header = skb->network_header;
e2d1bca7
ACM
611 skb_push(skb, sizeof(struct iphdr));
612 skb_reset_network_header(skb);
1da177e4 613 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
614 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
615 IPSKB_REROUTED);
1da177e4
LT
616 dst_release(skb->dst);
617 skb->dst = &rt->u.dst;
618
619 /*
620 * Push down and install the IPIP header.
621 */
622
eddc9ec5 623 iph = ip_hdr(skb);
1da177e4
LT
624 iph->version = 4;
625 iph->ihl = sizeof(struct iphdr)>>2;
626 iph->frag_off = df;
627 iph->protocol = IPPROTO_IPIP;
628 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
629 iph->daddr = rt->rt_dst;
630 iph->saddr = rt->rt_src;
631
632 if ((iph->ttl = tiph->ttl) == 0)
633 iph->ttl = old_iph->ttl;
634
635 nf_reset(skb);
636
637 IPTUNNEL_XMIT();
638 tunnel->recursion--;
639 return 0;
640
641tx_error_icmp:
642 dst_link_failure(skb);
643tx_error:
644 stats->tx_errors++;
645 dev_kfree_skb(skb);
646 tunnel->recursion--;
647 return 0;
648}
649
5533995b
MS
650static void ipip_tunnel_bind_dev(struct net_device *dev)
651{
652 struct net_device *tdev = NULL;
653 struct ip_tunnel *tunnel;
654 struct iphdr *iph;
655
656 tunnel = netdev_priv(dev);
657 iph = &tunnel->parms.iph;
658
659 if (iph->daddr) {
660 struct flowi fl = { .oif = tunnel->parms.link,
661 .nl_u = { .ip4_u =
662 { .daddr = iph->daddr,
663 .saddr = iph->saddr,
664 .tos = RT_TOS(iph->tos) } },
665 .proto = IPPROTO_IPIP };
666 struct rtable *rt;
f206351a 667 if (!ip_route_output_key(&init_net, &rt, &fl)) {
5533995b
MS
668 tdev = rt->u.dst.dev;
669 ip_rt_put(rt);
670 }
671 dev->flags |= IFF_POINTOPOINT;
672 }
673
674 if (!tdev && tunnel->parms.link)
675 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
676
677 if (tdev) {
678 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
679 dev->mtu = tdev->mtu - sizeof(struct iphdr);
680 }
681 dev->iflink = tunnel->parms.link;
682}
683
1da177e4
LT
684static int
685ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
686{
687 int err = 0;
688 struct ip_tunnel_parm p;
689 struct ip_tunnel *t;
690
691 switch (cmd) {
692 case SIOCGETTUNNEL:
693 t = NULL;
694 if (dev == ipip_fb_tunnel_dev) {
695 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
696 err = -EFAULT;
697 break;
698 }
699 t = ipip_tunnel_locate(&p, 0);
700 }
701 if (t == NULL)
2941a486 702 t = netdev_priv(dev);
1da177e4
LT
703 memcpy(&p, &t->parms, sizeof(p));
704 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
705 err = -EFAULT;
706 break;
707
708 case SIOCADDTUNNEL:
709 case SIOCCHGTUNNEL:
710 err = -EPERM;
711 if (!capable(CAP_NET_ADMIN))
712 goto done;
713
714 err = -EFAULT;
715 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
716 goto done;
717
718 err = -EINVAL;
719 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
720 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
721 goto done;
722 if (p.iph.ttl)
723 p.iph.frag_off |= htons(IP_DF);
724
725 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
726
727 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
728 if (t != NULL) {
729 if (t->dev != dev) {
730 err = -EEXIST;
731 break;
732 }
733 } else {
734 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
735 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
736 err = -EINVAL;
737 break;
738 }
2941a486 739 t = netdev_priv(dev);
1da177e4
LT
740 ipip_tunnel_unlink(t);
741 t->parms.iph.saddr = p.iph.saddr;
742 t->parms.iph.daddr = p.iph.daddr;
743 memcpy(dev->dev_addr, &p.iph.saddr, 4);
744 memcpy(dev->broadcast, &p.iph.daddr, 4);
745 ipip_tunnel_link(t);
746 netdev_state_change(dev);
747 }
748 }
749
750 if (t) {
751 err = 0;
752 if (cmd == SIOCCHGTUNNEL) {
753 t->parms.iph.ttl = p.iph.ttl;
754 t->parms.iph.tos = p.iph.tos;
755 t->parms.iph.frag_off = p.iph.frag_off;
5533995b
MS
756 if (t->parms.link != p.link) {
757 t->parms.link = p.link;
758 ipip_tunnel_bind_dev(dev);
759 netdev_state_change(dev);
760 }
1da177e4
LT
761 }
762 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
763 err = -EFAULT;
764 } else
765 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
766 break;
767
768 case SIOCDELTUNNEL:
769 err = -EPERM;
770 if (!capable(CAP_NET_ADMIN))
771 goto done;
772
773 if (dev == ipip_fb_tunnel_dev) {
774 err = -EFAULT;
775 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
776 goto done;
777 err = -ENOENT;
778 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
779 goto done;
780 err = -EPERM;
781 if (t->dev == ipip_fb_tunnel_dev)
782 goto done;
783 dev = t->dev;
784 }
22f8cde5
SH
785 unregister_netdevice(dev);
786 err = 0;
1da177e4
LT
787 break;
788
789 default:
790 err = -EINVAL;
791 }
792
793done:
794 return err;
795}
796
797static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
798{
2941a486 799 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1da177e4
LT
800}
801
802static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
803{
804 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
805 return -EINVAL;
806 dev->mtu = new_mtu;
807 return 0;
808}
809
810static void ipip_tunnel_setup(struct net_device *dev)
811{
1da177e4
LT
812 dev->uninit = ipip_tunnel_uninit;
813 dev->hard_start_xmit = ipip_tunnel_xmit;
814 dev->get_stats = ipip_tunnel_get_stats;
815 dev->do_ioctl = ipip_tunnel_ioctl;
816 dev->change_mtu = ipip_tunnel_change_mtu;
817 dev->destructor = free_netdev;
818
819 dev->type = ARPHRD_TUNNEL;
820 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
46f25dff 821 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
1da177e4
LT
822 dev->flags = IFF_NOARP;
823 dev->iflink = 0;
824 dev->addr_len = 4;
825}
826
827static int ipip_tunnel_init(struct net_device *dev)
828{
1da177e4 829 struct ip_tunnel *tunnel;
1da177e4 830
2941a486 831 tunnel = netdev_priv(dev);
1da177e4
LT
832
833 tunnel->dev = dev;
834 strcpy(tunnel->parms.name, dev->name);
835
836 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
837 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
838
5533995b 839 ipip_tunnel_bind_dev(dev);
1da177e4
LT
840
841 return 0;
842}
843
844static int __init ipip_fb_tunnel_init(struct net_device *dev)
845{
2941a486 846 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
847 struct iphdr *iph = &tunnel->parms.iph;
848
849 tunnel->dev = dev;
850 strcpy(tunnel->parms.name, dev->name);
851
852 iph->version = 4;
853 iph->protocol = IPPROTO_IPIP;
854 iph->ihl = 5;
855
856 dev_hold(dev);
857 tunnels_wc[0] = tunnel;
858 return 0;
859}
860
861static struct xfrm_tunnel ipip_handler = {
862 .handler = ipip_rcv,
863 .err_handler = ipip_err,
d2acc347 864 .priority = 1,
1da177e4
LT
865};
866
867static char banner[] __initdata =
868 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
869
870static int __init ipip_init(void)
871{
872 int err;
873
874 printk(banner);
875
c0d56408 876 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
1da177e4
LT
877 printk(KERN_INFO "ipip init: can't register tunnel\n");
878 return -EAGAIN;
879 }
880
881 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
882 "tunl0",
883 ipip_tunnel_setup);
884 if (!ipip_fb_tunnel_dev) {
885 err = -ENOMEM;
886 goto err1;
887 }
888
889 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
890
891 if ((err = register_netdev(ipip_fb_tunnel_dev)))
892 goto err2;
893 out:
894 return err;
895 err2:
896 free_netdev(ipip_fb_tunnel_dev);
897 err1:
c0d56408 898 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1da177e4
LT
899 goto out;
900}
901
db44575f
AK
902static void __exit ipip_destroy_tunnels(void)
903{
904 int prio;
905
906 for (prio = 1; prio < 4; prio++) {
907 int h;
908 for (h = 0; h < HASH_SIZE; h++) {
909 struct ip_tunnel *t;
910 while ((t = tunnels[prio][h]) != NULL)
911 unregister_netdevice(t->dev);
912 }
913 }
914}
915
1da177e4
LT
916static void __exit ipip_fini(void)
917{
c0d56408 918 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1da177e4
LT
919 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
920
db44575f
AK
921 rtnl_lock();
922 ipip_destroy_tunnels();
923 unregister_netdevice(ipip_fb_tunnel_dev);
924 rtnl_unlock();
1da177e4
LT
925}
926
927module_init(ipip_init);
928module_exit(ipip_fini);
929MODULE_LICENSE("GPL");