]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipip.c
[AF_KEY]: Fix oops by converting to proc_net_*().
[net-next-2.6.git] / net / ipv4 / ipip.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: IP/IP protocol decoder.
1da177e4
LT
3 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 *
9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29/* tunnel.c: an IP tunnel driver
30
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
33
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
e905a9ed 38
1da177e4
LT
39 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat
e905a9ed 45
1da177e4
LT
46 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47
48 Reworked:
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
e905a9ed 55
1da177e4
LT
56*/
57
58/* Things I wish I had known when writing the tunnel driver:
59
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
63
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
66
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
e905a9ed 78 find out how much more space you can allocate by calling
1da177e4
LT
79 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
87 */
88
89/*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92 For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
e905a9ed 95
4fc268d2 96#include <linux/capability.h>
1da177e4
LT
97#include <linux/module.h>
98#include <linux/types.h>
1da177e4
LT
99#include <linux/kernel.h>
100#include <asm/uaccess.h>
101#include <linux/skbuff.h>
102#include <linux/netdevice.h>
103#include <linux/in.h>
104#include <linux/tcp.h>
105#include <linux/udp.h>
106#include <linux/if_arp.h>
107#include <linux/mroute.h>
108#include <linux/init.h>
109#include <linux/netfilter_ipv4.h>
46f25dff 110#include <linux/if_ether.h>
1da177e4
LT
111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
1da177e4
LT
115#include <net/ipip.h>
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
118
119#define HASH_SIZE 16
d5a0a1e3 120#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4
LT
121
122static int ipip_fb_tunnel_init(struct net_device *dev);
123static int ipip_tunnel_init(struct net_device *dev);
124static void ipip_tunnel_setup(struct net_device *dev);
125
126static struct net_device *ipip_fb_tunnel_dev;
127
128static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129static struct ip_tunnel *tunnels_r[HASH_SIZE];
130static struct ip_tunnel *tunnels_l[HASH_SIZE];
131static struct ip_tunnel *tunnels_wc[1];
132static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134static DEFINE_RWLOCK(ipip_lock);
135
d5a0a1e3 136static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
1da177e4
LT
137{
138 unsigned h0 = HASH(remote);
139 unsigned h1 = HASH(local);
140 struct ip_tunnel *t;
141
142 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 if (local == t->parms.iph.saddr &&
144 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145 return t;
146 }
147 for (t = tunnels_r[h0]; t; t = t->next) {
148 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149 return t;
150 }
151 for (t = tunnels_l[h1]; t; t = t->next) {
152 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153 return t;
154 }
155 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156 return t;
157 return NULL;
158}
159
87d1a164 160static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
1da177e4 161{
87d1a164
YH
162 __be32 remote = parms->iph.daddr;
163 __be32 local = parms->iph.saddr;
1da177e4
LT
164 unsigned h = 0;
165 int prio = 0;
166
167 if (remote) {
168 prio |= 2;
169 h ^= HASH(remote);
170 }
171 if (local) {
172 prio |= 1;
173 h ^= HASH(local);
174 }
175 return &tunnels[prio][h];
176}
177
87d1a164
YH
178static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179{
180 return __ipip_bucket(&t->parms);
181}
1da177e4
LT
182
183static void ipip_tunnel_unlink(struct ip_tunnel *t)
184{
185 struct ip_tunnel **tp;
186
187 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188 if (t == *tp) {
189 write_lock_bh(&ipip_lock);
190 *tp = t->next;
191 write_unlock_bh(&ipip_lock);
192 break;
193 }
194 }
195}
196
197static void ipip_tunnel_link(struct ip_tunnel *t)
198{
199 struct ip_tunnel **tp = ipip_bucket(t);
200
201 t->next = *tp;
202 write_lock_bh(&ipip_lock);
203 *tp = t;
204 write_unlock_bh(&ipip_lock);
205}
206
207static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208{
d5a0a1e3
AV
209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
1da177e4
LT
211 struct ip_tunnel *t, **tp, *nt;
212 struct net_device *dev;
1da177e4
LT
213 char name[IFNAMSIZ];
214
87d1a164 215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
1da177e4
LT
216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217 return t;
218 }
219 if (!create)
220 return NULL;
221
222 if (parms->name[0])
223 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
224 else
225 sprintf(name, "tunl%%d");
1da177e4
LT
226
227 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
228 if (dev == NULL)
229 return NULL;
230
2941a486 231 nt = netdev_priv(dev);
1da177e4
LT
232 dev->init = ipip_tunnel_init;
233 nt->parms = *parms;
234
235 if (register_netdevice(dev) < 0) {
236 free_netdev(dev);
237 goto failed;
238 }
239
240 dev_hold(dev);
241 ipip_tunnel_link(nt);
1da177e4
LT
242 return nt;
243
244failed:
245 return NULL;
246}
247
248static void ipip_tunnel_uninit(struct net_device *dev)
249{
250 if (dev == ipip_fb_tunnel_dev) {
251 write_lock_bh(&ipip_lock);
252 tunnels_wc[0] = NULL;
253 write_unlock_bh(&ipip_lock);
254 } else
2941a486 255 ipip_tunnel_unlink(netdev_priv(dev));
1da177e4
LT
256 dev_put(dev);
257}
258
d2acc347 259static int ipip_err(struct sk_buff *skb, u32 info)
1da177e4
LT
260{
261#ifndef I_WISH_WORLD_WERE_PERFECT
262
263/* It is not :-( All the routers (except for Linux) return only
264 8 bytes of packet payload. It means, that precise relaying of
265 ICMP in the real Internet is absolutely infeasible.
266 */
267 struct iphdr *iph = (struct iphdr*)skb->data;
88c7664f
ACM
268 const int type = icmp_hdr(skb)->type;
269 const int code = icmp_hdr(skb)->code;
1da177e4 270 struct ip_tunnel *t;
d2acc347 271 int err;
1da177e4
LT
272
273 switch (type) {
274 default:
275 case ICMP_PARAMETERPROB:
d2acc347 276 return 0;
1da177e4
LT
277
278 case ICMP_DEST_UNREACH:
279 switch (code) {
280 case ICMP_SR_FAILED:
281 case ICMP_PORT_UNREACH:
282 /* Impossible event. */
d2acc347 283 return 0;
1da177e4
LT
284 case ICMP_FRAG_NEEDED:
285 /* Soft state for pmtu is maintained by IP core. */
d2acc347 286 return 0;
1da177e4
LT
287 default:
288 /* All others are translated to HOST_UNREACH.
289 rfc2003 contains "deep thoughts" about NET_UNREACH,
290 I believe they are just ether pollution. --ANK
291 */
292 break;
293 }
294 break;
295 case ICMP_TIME_EXCEEDED:
296 if (code != ICMP_EXC_TTL)
d2acc347 297 return 0;
1da177e4
LT
298 break;
299 }
300
d2acc347
HX
301 err = -ENOENT;
302
1da177e4
LT
303 read_lock(&ipip_lock);
304 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
305 if (t == NULL || t->parms.iph.daddr == 0)
306 goto out;
d2acc347
HX
307
308 err = 0;
1da177e4
LT
309 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
310 goto out;
311
312 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
313 t->err_count++;
314 else
315 t->err_count = 1;
316 t->err_time = jiffies;
317out:
318 read_unlock(&ipip_lock);
d2acc347 319 return err;
1da177e4
LT
320#else
321 struct iphdr *iph = (struct iphdr*)dp;
322 int hlen = iph->ihl<<2;
323 struct iphdr *eiph;
88c7664f
ACM
324 const int type = icmp_hdr(skb)->type;
325 const int code = icmp_hdr(skb)->code;
1da177e4
LT
326 int rel_type = 0;
327 int rel_code = 0;
c55e2f49
AV
328 __be32 rel_info = 0;
329 __u32 n = 0;
1da177e4
LT
330 struct sk_buff *skb2;
331 struct flowi fl;
332 struct rtable *rt;
333
334 if (len < hlen + sizeof(struct iphdr))
d2acc347 335 return 0;
1da177e4
LT
336 eiph = (struct iphdr*)(dp + hlen);
337
338 switch (type) {
339 default:
d2acc347 340 return 0;
1da177e4 341 case ICMP_PARAMETERPROB:
88c7664f 342 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
c55e2f49 343 if (n < hlen)
d2acc347 344 return 0;
1da177e4
LT
345
346 /* So... This guy found something strange INSIDE encapsulated
347 packet. Well, he is fool, but what can we do ?
348 */
349 rel_type = ICMP_PARAMETERPROB;
c55e2f49 350 rel_info = htonl((n - hlen) << 24);
1da177e4
LT
351 break;
352
353 case ICMP_DEST_UNREACH:
354 switch (code) {
355 case ICMP_SR_FAILED:
356 case ICMP_PORT_UNREACH:
357 /* Impossible event. */
d2acc347 358 return 0;
1da177e4
LT
359 case ICMP_FRAG_NEEDED:
360 /* And it is the only really necessary thing :-) */
88c7664f 361 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
c55e2f49 362 if (n < hlen+68)
d2acc347 363 return 0;
c55e2f49 364 n -= hlen;
1da177e4 365 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
c55e2f49 366 if (n > ntohs(eiph->tot_len))
d2acc347 367 return 0;
c55e2f49 368 rel_info = htonl(n);
1da177e4
LT
369 break;
370 default:
371 /* All others are translated to HOST_UNREACH.
372 rfc2003 contains "deep thoughts" about NET_UNREACH,
373 I believe, it is just ether pollution. --ANK
374 */
375 rel_type = ICMP_DEST_UNREACH;
376 rel_code = ICMP_HOST_UNREACH;
377 break;
378 }
379 break;
380 case ICMP_TIME_EXCEEDED:
381 if (code != ICMP_EXC_TTL)
d2acc347 382 return 0;
1da177e4
LT
383 break;
384 }
385
386 /* Prepare fake skb to feed it to icmp_send */
387 skb2 = skb_clone(skb, GFP_ATOMIC);
388 if (skb2 == NULL)
d2acc347 389 return 0;
1da177e4
LT
390 dst_release(skb2->dst);
391 skb2->dst = NULL;
392 skb_pull(skb2, skb->data - (u8*)eiph);
c1d2bbe1 393 skb_reset_network_header(skb2);
1da177e4
LT
394
395 /* Try to guess incoming interface */
396 memset(&fl, 0, sizeof(fl));
397 fl.fl4_daddr = eiph->saddr;
398 fl.fl4_tos = RT_TOS(eiph->tos);
399 fl.proto = IPPROTO_IPIP;
f206351a 400 if (ip_route_output_key(&init_net, &rt, &key)) {
1da177e4 401 kfree_skb(skb2);
d2acc347 402 return 0;
1da177e4
LT
403 }
404 skb2->dev = rt->u.dst.dev;
405
406 /* route "incoming" packet */
407 if (rt->rt_flags&RTCF_LOCAL) {
408 ip_rt_put(rt);
409 rt = NULL;
410 fl.fl4_daddr = eiph->daddr;
411 fl.fl4_src = eiph->saddr;
412 fl.fl4_tos = eiph->tos;
f206351a 413 if (ip_route_output_key(&init_net, &rt, &fl) ||
1da177e4
LT
414 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
415 ip_rt_put(rt);
416 kfree_skb(skb2);
d2acc347 417 return 0;
1da177e4
LT
418 }
419 } else {
420 ip_rt_put(rt);
421 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
422 skb2->dst->dev->type != ARPHRD_TUNNEL) {
423 kfree_skb(skb2);
d2acc347 424 return 0;
1da177e4
LT
425 }
426 }
427
428 /* change mtu on this route */
429 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
c55e2f49 430 if (n > dst_mtu(skb2->dst)) {
1da177e4 431 kfree_skb(skb2);
d2acc347 432 return 0;
1da177e4 433 }
c55e2f49 434 skb2->dst->ops->update_pmtu(skb2->dst, n);
1da177e4 435 } else if (type == ICMP_TIME_EXCEEDED) {
2941a486 436 struct ip_tunnel *t = netdev_priv(skb2->dev);
1da177e4
LT
437 if (t->parms.iph.ttl) {
438 rel_type = ICMP_DEST_UNREACH;
439 rel_code = ICMP_HOST_UNREACH;
440 }
441 }
442
443 icmp_send(skb2, rel_type, rel_code, rel_info);
444 kfree_skb(skb2);
d2acc347 445 return 0;
1da177e4
LT
446#endif
447}
448
eddc9ec5
ACM
449static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
450 struct sk_buff *skb)
1da177e4 451{
eddc9ec5 452 struct iphdr *inner_iph = ip_hdr(skb);
1da177e4
LT
453
454 if (INET_ECN_is_ce(outer_iph->tos))
455 IP_ECN_set_ce(inner_iph);
456}
457
458static int ipip_rcv(struct sk_buff *skb)
459{
1da177e4 460 struct ip_tunnel *tunnel;
eddc9ec5 461 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
462
463 read_lock(&ipip_lock);
464 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
465 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
466 read_unlock(&ipip_lock);
467 kfree_skb(skb);
468 return 0;
469 }
470
471 secpath_reset(skb);
472
b0e380b1 473 skb->mac_header = skb->network_header;
c1d2bbe1 474 skb_reset_network_header(skb);
1da177e4
LT
475 skb->protocol = htons(ETH_P_IP);
476 skb->pkt_type = PACKET_HOST;
477
478 tunnel->stat.rx_packets++;
479 tunnel->stat.rx_bytes += skb->len;
480 skb->dev = tunnel->dev;
481 dst_release(skb->dst);
482 skb->dst = NULL;
483 nf_reset(skb);
484 ipip_ecn_decapsulate(iph, skb);
485 netif_rx(skb);
486 read_unlock(&ipip_lock);
487 return 0;
488 }
489 read_unlock(&ipip_lock);
490
1da177e4
LT
491 return -1;
492}
493
494/*
495 * This function assumes it is being called from dev_queue_xmit()
496 * and that skb is filled properly by that function.
497 */
498
499static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
500{
2941a486 501 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
502 struct net_device_stats *stats = &tunnel->stat;
503 struct iphdr *tiph = &tunnel->parms.iph;
504 u8 tos = tunnel->parms.iph.tos;
d5a0a1e3 505 __be16 df = tiph->frag_off;
1da177e4
LT
506 struct rtable *rt; /* Route to the other host */
507 struct net_device *tdev; /* Device to other host */
eddc9ec5 508 struct iphdr *old_iph = ip_hdr(skb);
1da177e4 509 struct iphdr *iph; /* Our new IP header */
c2636b4d 510 unsigned int max_headroom; /* The extra header space needed */
d5a0a1e3 511 __be32 dst = tiph->daddr;
1da177e4
LT
512 int mtu;
513
514 if (tunnel->recursion++) {
515 tunnel->stat.collisions++;
516 goto tx_error;
517 }
518
519 if (skb->protocol != htons(ETH_P_IP))
520 goto tx_error;
521
522 if (tos&1)
523 tos = old_iph->tos;
524
525 if (!dst) {
526 /* NBMA tunnel */
527 if ((rt = (struct rtable*)skb->dst) == NULL) {
528 tunnel->stat.tx_fifo_errors++;
529 goto tx_error;
530 }
531 if ((dst = rt->rt_gateway) == 0)
532 goto tx_error_icmp;
533 }
534
535 {
536 struct flowi fl = { .oif = tunnel->parms.link,
537 .nl_u = { .ip4_u =
538 { .daddr = dst,
539 .saddr = tiph->saddr,
540 .tos = RT_TOS(tos) } },
541 .proto = IPPROTO_IPIP };
f206351a 542 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
543 tunnel->stat.tx_carrier_errors++;
544 goto tx_error_icmp;
545 }
546 }
547 tdev = rt->u.dst.dev;
548
549 if (tdev == dev) {
550 ip_rt_put(rt);
551 tunnel->stat.collisions++;
552 goto tx_error;
553 }
554
555 if (tiph->frag_off)
556 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
557 else
558 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
559
560 if (mtu < 68) {
561 tunnel->stat.collisions++;
562 ip_rt_put(rt);
563 goto tx_error;
564 }
565 if (skb->dst)
566 skb->dst->ops->update_pmtu(skb->dst, mtu);
567
568 df |= (old_iph->frag_off&htons(IP_DF));
569
570 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
571 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
572 ip_rt_put(rt);
573 goto tx_error;
574 }
575
576 if (tunnel->err_count > 0) {
577 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
578 tunnel->err_count--;
579 dst_link_failure(skb);
580 } else
581 tunnel->err_count = 0;
582 }
583
584 /*
585 * Okay, now see if we can stuff it in the buffer as-is.
586 */
587 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
588
cfbba49d
PM
589 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
590 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
591 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
592 if (!new_skb) {
593 ip_rt_put(rt);
e905a9ed 594 stats->tx_dropped++;
1da177e4
LT
595 dev_kfree_skb(skb);
596 tunnel->recursion--;
597 return 0;
598 }
599 if (skb->sk)
600 skb_set_owner_w(new_skb, skb->sk);
601 dev_kfree_skb(skb);
602 skb = new_skb;
eddc9ec5 603 old_iph = ip_hdr(skb);
1da177e4
LT
604 }
605
b0e380b1 606 skb->transport_header = skb->network_header;
e2d1bca7
ACM
607 skb_push(skb, sizeof(struct iphdr));
608 skb_reset_network_header(skb);
1da177e4 609 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
610 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
611 IPSKB_REROUTED);
1da177e4
LT
612 dst_release(skb->dst);
613 skb->dst = &rt->u.dst;
614
615 /*
616 * Push down and install the IPIP header.
617 */
618
eddc9ec5 619 iph = ip_hdr(skb);
1da177e4
LT
620 iph->version = 4;
621 iph->ihl = sizeof(struct iphdr)>>2;
622 iph->frag_off = df;
623 iph->protocol = IPPROTO_IPIP;
624 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
625 iph->daddr = rt->rt_dst;
626 iph->saddr = rt->rt_src;
627
628 if ((iph->ttl = tiph->ttl) == 0)
629 iph->ttl = old_iph->ttl;
630
631 nf_reset(skb);
632
633 IPTUNNEL_XMIT();
634 tunnel->recursion--;
635 return 0;
636
637tx_error_icmp:
638 dst_link_failure(skb);
639tx_error:
640 stats->tx_errors++;
641 dev_kfree_skb(skb);
642 tunnel->recursion--;
643 return 0;
644}
645
5533995b
MS
646static void ipip_tunnel_bind_dev(struct net_device *dev)
647{
648 struct net_device *tdev = NULL;
649 struct ip_tunnel *tunnel;
650 struct iphdr *iph;
651
652 tunnel = netdev_priv(dev);
653 iph = &tunnel->parms.iph;
654
655 if (iph->daddr) {
656 struct flowi fl = { .oif = tunnel->parms.link,
657 .nl_u = { .ip4_u =
658 { .daddr = iph->daddr,
659 .saddr = iph->saddr,
660 .tos = RT_TOS(iph->tos) } },
661 .proto = IPPROTO_IPIP };
662 struct rtable *rt;
f206351a 663 if (!ip_route_output_key(&init_net, &rt, &fl)) {
5533995b
MS
664 tdev = rt->u.dst.dev;
665 ip_rt_put(rt);
666 }
667 dev->flags |= IFF_POINTOPOINT;
668 }
669
670 if (!tdev && tunnel->parms.link)
671 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
672
673 if (tdev) {
674 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
675 dev->mtu = tdev->mtu - sizeof(struct iphdr);
676 }
677 dev->iflink = tunnel->parms.link;
678}
679
1da177e4
LT
680static int
681ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
682{
683 int err = 0;
684 struct ip_tunnel_parm p;
685 struct ip_tunnel *t;
686
687 switch (cmd) {
688 case SIOCGETTUNNEL:
689 t = NULL;
690 if (dev == ipip_fb_tunnel_dev) {
691 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
692 err = -EFAULT;
693 break;
694 }
695 t = ipip_tunnel_locate(&p, 0);
696 }
697 if (t == NULL)
2941a486 698 t = netdev_priv(dev);
1da177e4
LT
699 memcpy(&p, &t->parms, sizeof(p));
700 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
701 err = -EFAULT;
702 break;
703
704 case SIOCADDTUNNEL:
705 case SIOCCHGTUNNEL:
706 err = -EPERM;
707 if (!capable(CAP_NET_ADMIN))
708 goto done;
709
710 err = -EFAULT;
711 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
712 goto done;
713
714 err = -EINVAL;
715 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
716 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
717 goto done;
718 if (p.iph.ttl)
719 p.iph.frag_off |= htons(IP_DF);
720
721 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
722
723 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
724 if (t != NULL) {
725 if (t->dev != dev) {
726 err = -EEXIST;
727 break;
728 }
729 } else {
730 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
731 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
732 err = -EINVAL;
733 break;
734 }
2941a486 735 t = netdev_priv(dev);
1da177e4
LT
736 ipip_tunnel_unlink(t);
737 t->parms.iph.saddr = p.iph.saddr;
738 t->parms.iph.daddr = p.iph.daddr;
739 memcpy(dev->dev_addr, &p.iph.saddr, 4);
740 memcpy(dev->broadcast, &p.iph.daddr, 4);
741 ipip_tunnel_link(t);
742 netdev_state_change(dev);
743 }
744 }
745
746 if (t) {
747 err = 0;
748 if (cmd == SIOCCHGTUNNEL) {
749 t->parms.iph.ttl = p.iph.ttl;
750 t->parms.iph.tos = p.iph.tos;
751 t->parms.iph.frag_off = p.iph.frag_off;
5533995b
MS
752 if (t->parms.link != p.link) {
753 t->parms.link = p.link;
754 ipip_tunnel_bind_dev(dev);
755 netdev_state_change(dev);
756 }
1da177e4
LT
757 }
758 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
759 err = -EFAULT;
760 } else
761 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
762 break;
763
764 case SIOCDELTUNNEL:
765 err = -EPERM;
766 if (!capable(CAP_NET_ADMIN))
767 goto done;
768
769 if (dev == ipip_fb_tunnel_dev) {
770 err = -EFAULT;
771 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
772 goto done;
773 err = -ENOENT;
774 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
775 goto done;
776 err = -EPERM;
777 if (t->dev == ipip_fb_tunnel_dev)
778 goto done;
779 dev = t->dev;
780 }
22f8cde5
SH
781 unregister_netdevice(dev);
782 err = 0;
1da177e4
LT
783 break;
784
785 default:
786 err = -EINVAL;
787 }
788
789done:
790 return err;
791}
792
793static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
794{
2941a486 795 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1da177e4
LT
796}
797
798static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
799{
800 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
801 return -EINVAL;
802 dev->mtu = new_mtu;
803 return 0;
804}
805
806static void ipip_tunnel_setup(struct net_device *dev)
807{
1da177e4
LT
808 dev->uninit = ipip_tunnel_uninit;
809 dev->hard_start_xmit = ipip_tunnel_xmit;
810 dev->get_stats = ipip_tunnel_get_stats;
811 dev->do_ioctl = ipip_tunnel_ioctl;
812 dev->change_mtu = ipip_tunnel_change_mtu;
813 dev->destructor = free_netdev;
814
815 dev->type = ARPHRD_TUNNEL;
816 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
46f25dff 817 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
1da177e4
LT
818 dev->flags = IFF_NOARP;
819 dev->iflink = 0;
820 dev->addr_len = 4;
821}
822
823static int ipip_tunnel_init(struct net_device *dev)
824{
1da177e4 825 struct ip_tunnel *tunnel;
1da177e4 826
2941a486 827 tunnel = netdev_priv(dev);
1da177e4
LT
828
829 tunnel->dev = dev;
830 strcpy(tunnel->parms.name, dev->name);
831
832 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
833 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
834
5533995b 835 ipip_tunnel_bind_dev(dev);
1da177e4
LT
836
837 return 0;
838}
839
840static int __init ipip_fb_tunnel_init(struct net_device *dev)
841{
2941a486 842 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
843 struct iphdr *iph = &tunnel->parms.iph;
844
845 tunnel->dev = dev;
846 strcpy(tunnel->parms.name, dev->name);
847
848 iph->version = 4;
849 iph->protocol = IPPROTO_IPIP;
850 iph->ihl = 5;
851
852 dev_hold(dev);
853 tunnels_wc[0] = tunnel;
854 return 0;
855}
856
857static struct xfrm_tunnel ipip_handler = {
858 .handler = ipip_rcv,
859 .err_handler = ipip_err,
d2acc347 860 .priority = 1,
1da177e4
LT
861};
862
863static char banner[] __initdata =
864 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
865
866static int __init ipip_init(void)
867{
868 int err;
869
870 printk(banner);
871
c0d56408 872 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
1da177e4
LT
873 printk(KERN_INFO "ipip init: can't register tunnel\n");
874 return -EAGAIN;
875 }
876
877 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
878 "tunl0",
879 ipip_tunnel_setup);
880 if (!ipip_fb_tunnel_dev) {
881 err = -ENOMEM;
882 goto err1;
883 }
884
885 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
886
887 if ((err = register_netdev(ipip_fb_tunnel_dev)))
888 goto err2;
889 out:
890 return err;
891 err2:
892 free_netdev(ipip_fb_tunnel_dev);
893 err1:
c0d56408 894 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1da177e4
LT
895 goto out;
896}
897
db44575f
AK
898static void __exit ipip_destroy_tunnels(void)
899{
900 int prio;
901
902 for (prio = 1; prio < 4; prio++) {
903 int h;
904 for (h = 0; h < HASH_SIZE; h++) {
905 struct ip_tunnel *t;
906 while ((t = tunnels[prio][h]) != NULL)
907 unregister_netdevice(t->dev);
908 }
909 }
910}
911
1da177e4
LT
912static void __exit ipip_fini(void)
913{
c0d56408 914 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1da177e4
LT
915 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
916
db44575f
AK
917 rtnl_lock();
918 ipip_destroy_tunnels();
919 unregister_netdevice(ipip_fb_tunnel_dev);
920 rtnl_unlock();
1da177e4
LT
921}
922
923module_init(ipip_init);
924module_exit(ipip_fini);
925MODULE_LICENSE("GPL");