]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipip.c
[IPIP]: Use proper net in hash-lookup functions.
[net-next-2.6.git] / net / ipv4 / ipip.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: IP/IP protocol decoder.
1da177e4
LT
3 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 *
9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29/* tunnel.c: an IP tunnel driver
30
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
33
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
e905a9ed 38
1da177e4
LT
39 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat
e905a9ed 45
1da177e4
LT
46 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47
48 Reworked:
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
e905a9ed 55
1da177e4
LT
56*/
57
58/* Things I wish I had known when writing the tunnel driver:
59
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
63
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
66
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
e905a9ed 78 find out how much more space you can allocate by calling
1da177e4
LT
79 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
87 */
88
89/*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92 For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
e905a9ed 95
4fc268d2 96#include <linux/capability.h>
1da177e4
LT
97#include <linux/module.h>
98#include <linux/types.h>
1da177e4
LT
99#include <linux/kernel.h>
100#include <asm/uaccess.h>
101#include <linux/skbuff.h>
102#include <linux/netdevice.h>
103#include <linux/in.h>
104#include <linux/tcp.h>
105#include <linux/udp.h>
106#include <linux/if_arp.h>
107#include <linux/mroute.h>
108#include <linux/init.h>
109#include <linux/netfilter_ipv4.h>
46f25dff 110#include <linux/if_ether.h>
1da177e4
LT
111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
1da177e4
LT
115#include <net/ipip.h>
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
10dc4c7b
PE
118#include <net/net_namespace.h>
119#include <net/netns/generic.h>
1da177e4
LT
120
121#define HASH_SIZE 16
d5a0a1e3 122#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 123
10dc4c7b
PE
124static int ipip_net_id;
125struct ipip_net {
b9855c54 126 struct net_device *fb_tunnel_dev;
10dc4c7b
PE
127};
128
1da177e4
LT
129static int ipip_fb_tunnel_init(struct net_device *dev);
130static int ipip_tunnel_init(struct net_device *dev);
131static void ipip_tunnel_setup(struct net_device *dev);
132
1da177e4
LT
133static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
134static struct ip_tunnel *tunnels_r[HASH_SIZE];
135static struct ip_tunnel *tunnels_l[HASH_SIZE];
136static struct ip_tunnel *tunnels_wc[1];
137static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
138
139static DEFINE_RWLOCK(ipip_lock);
140
b9fae5c9
PE
141static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
142 __be32 remote, __be32 local)
1da177e4
LT
143{
144 unsigned h0 = HASH(remote);
145 unsigned h1 = HASH(local);
146 struct ip_tunnel *t;
147
148 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
149 if (local == t->parms.iph.saddr &&
150 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151 return t;
152 }
153 for (t = tunnels_r[h0]; t; t = t->next) {
154 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
155 return t;
156 }
157 for (t = tunnels_l[h1]; t; t = t->next) {
158 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
159 return t;
160 }
161 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
162 return t;
163 return NULL;
164}
165
b9fae5c9
PE
166static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
167 struct ip_tunnel_parm *parms)
1da177e4 168{
87d1a164
YH
169 __be32 remote = parms->iph.daddr;
170 __be32 local = parms->iph.saddr;
1da177e4
LT
171 unsigned h = 0;
172 int prio = 0;
173
174 if (remote) {
175 prio |= 2;
176 h ^= HASH(remote);
177 }
178 if (local) {
179 prio |= 1;
180 h ^= HASH(local);
181 }
182 return &tunnels[prio][h];
183}
184
b9fae5c9
PE
185static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
186 struct ip_tunnel *t)
87d1a164 187{
b9fae5c9 188 return __ipip_bucket(ipn, &t->parms);
87d1a164 189}
1da177e4 190
b9fae5c9 191static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
1da177e4
LT
192{
193 struct ip_tunnel **tp;
194
b9fae5c9 195 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
1da177e4
LT
196 if (t == *tp) {
197 write_lock_bh(&ipip_lock);
198 *tp = t->next;
199 write_unlock_bh(&ipip_lock);
200 break;
201 }
202 }
203}
204
b9fae5c9 205static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
1da177e4 206{
b9fae5c9 207 struct ip_tunnel **tp = ipip_bucket(ipn, t);
1da177e4
LT
208
209 t->next = *tp;
210 write_lock_bh(&ipip_lock);
211 *tp = t;
212 write_unlock_bh(&ipip_lock);
213}
214
b9fae5c9
PE
215static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
216 struct ip_tunnel_parm *parms, int create)
1da177e4 217{
d5a0a1e3
AV
218 __be32 remote = parms->iph.daddr;
219 __be32 local = parms->iph.saddr;
1da177e4
LT
220 struct ip_tunnel *t, **tp, *nt;
221 struct net_device *dev;
1da177e4 222 char name[IFNAMSIZ];
b9fae5c9 223 struct ipip_net *ipn = net_generic(net, ipip_net_id);
1da177e4 224
b9fae5c9 225 for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
1da177e4
LT
226 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
227 return t;
228 }
229 if (!create)
230 return NULL;
231
232 if (parms->name[0])
233 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
234 else
235 sprintf(name, "tunl%%d");
1da177e4
LT
236
237 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
238 if (dev == NULL)
239 return NULL;
240
b37d428b
PE
241 if (strchr(name, '%')) {
242 if (dev_alloc_name(dev, name) < 0)
243 goto failed_free;
244 }
245
2941a486 246 nt = netdev_priv(dev);
1da177e4
LT
247 dev->init = ipip_tunnel_init;
248 nt->parms = *parms;
249
b37d428b
PE
250 if (register_netdevice(dev) < 0)
251 goto failed_free;
1da177e4
LT
252
253 dev_hold(dev);
b9fae5c9 254 ipip_tunnel_link(ipn, nt);
1da177e4
LT
255 return nt;
256
b37d428b
PE
257failed_free:
258 free_netdev(dev);
1da177e4
LT
259 return NULL;
260}
261
262static void ipip_tunnel_uninit(struct net_device *dev)
263{
b9855c54
PE
264 struct net *net = dev_net(dev);
265 struct ipip_net *ipn = net_generic(net, ipip_net_id);
266
267 if (dev == ipn->fb_tunnel_dev) {
1da177e4
LT
268 write_lock_bh(&ipip_lock);
269 tunnels_wc[0] = NULL;
270 write_unlock_bh(&ipip_lock);
271 } else
b9fae5c9 272 ipip_tunnel_unlink(ipn, netdev_priv(dev));
1da177e4
LT
273 dev_put(dev);
274}
275
d2acc347 276static int ipip_err(struct sk_buff *skb, u32 info)
1da177e4
LT
277{
278#ifndef I_WISH_WORLD_WERE_PERFECT
279
280/* It is not :-( All the routers (except for Linux) return only
281 8 bytes of packet payload. It means, that precise relaying of
282 ICMP in the real Internet is absolutely infeasible.
283 */
284 struct iphdr *iph = (struct iphdr*)skb->data;
88c7664f
ACM
285 const int type = icmp_hdr(skb)->type;
286 const int code = icmp_hdr(skb)->code;
1da177e4 287 struct ip_tunnel *t;
d2acc347 288 int err;
1da177e4
LT
289
290 switch (type) {
291 default:
292 case ICMP_PARAMETERPROB:
d2acc347 293 return 0;
1da177e4
LT
294
295 case ICMP_DEST_UNREACH:
296 switch (code) {
297 case ICMP_SR_FAILED:
298 case ICMP_PORT_UNREACH:
299 /* Impossible event. */
d2acc347 300 return 0;
1da177e4
LT
301 case ICMP_FRAG_NEEDED:
302 /* Soft state for pmtu is maintained by IP core. */
d2acc347 303 return 0;
1da177e4
LT
304 default:
305 /* All others are translated to HOST_UNREACH.
306 rfc2003 contains "deep thoughts" about NET_UNREACH,
307 I believe they are just ether pollution. --ANK
308 */
309 break;
310 }
311 break;
312 case ICMP_TIME_EXCEEDED:
313 if (code != ICMP_EXC_TTL)
d2acc347 314 return 0;
1da177e4
LT
315 break;
316 }
317
d2acc347
HX
318 err = -ENOENT;
319
1da177e4 320 read_lock(&ipip_lock);
cec3ffae 321 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
1da177e4
LT
322 if (t == NULL || t->parms.iph.daddr == 0)
323 goto out;
d2acc347
HX
324
325 err = 0;
1da177e4
LT
326 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
327 goto out;
328
329 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
330 t->err_count++;
331 else
332 t->err_count = 1;
333 t->err_time = jiffies;
334out:
335 read_unlock(&ipip_lock);
d2acc347 336 return err;
1da177e4
LT
337#else
338 struct iphdr *iph = (struct iphdr*)dp;
339 int hlen = iph->ihl<<2;
340 struct iphdr *eiph;
88c7664f
ACM
341 const int type = icmp_hdr(skb)->type;
342 const int code = icmp_hdr(skb)->code;
1da177e4
LT
343 int rel_type = 0;
344 int rel_code = 0;
c55e2f49
AV
345 __be32 rel_info = 0;
346 __u32 n = 0;
1da177e4
LT
347 struct sk_buff *skb2;
348 struct flowi fl;
349 struct rtable *rt;
350
351 if (len < hlen + sizeof(struct iphdr))
d2acc347 352 return 0;
1da177e4
LT
353 eiph = (struct iphdr*)(dp + hlen);
354
355 switch (type) {
356 default:
d2acc347 357 return 0;
1da177e4 358 case ICMP_PARAMETERPROB:
88c7664f 359 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
c55e2f49 360 if (n < hlen)
d2acc347 361 return 0;
1da177e4
LT
362
363 /* So... This guy found something strange INSIDE encapsulated
364 packet. Well, he is fool, but what can we do ?
365 */
366 rel_type = ICMP_PARAMETERPROB;
c55e2f49 367 rel_info = htonl((n - hlen) << 24);
1da177e4
LT
368 break;
369
370 case ICMP_DEST_UNREACH:
371 switch (code) {
372 case ICMP_SR_FAILED:
373 case ICMP_PORT_UNREACH:
374 /* Impossible event. */
d2acc347 375 return 0;
1da177e4
LT
376 case ICMP_FRAG_NEEDED:
377 /* And it is the only really necessary thing :-) */
88c7664f 378 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
c55e2f49 379 if (n < hlen+68)
d2acc347 380 return 0;
c55e2f49 381 n -= hlen;
1da177e4 382 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
c55e2f49 383 if (n > ntohs(eiph->tot_len))
d2acc347 384 return 0;
c55e2f49 385 rel_info = htonl(n);
1da177e4
LT
386 break;
387 default:
388 /* All others are translated to HOST_UNREACH.
389 rfc2003 contains "deep thoughts" about NET_UNREACH,
390 I believe, it is just ether pollution. --ANK
391 */
392 rel_type = ICMP_DEST_UNREACH;
393 rel_code = ICMP_HOST_UNREACH;
394 break;
395 }
396 break;
397 case ICMP_TIME_EXCEEDED:
398 if (code != ICMP_EXC_TTL)
d2acc347 399 return 0;
1da177e4
LT
400 break;
401 }
402
403 /* Prepare fake skb to feed it to icmp_send */
404 skb2 = skb_clone(skb, GFP_ATOMIC);
405 if (skb2 == NULL)
d2acc347 406 return 0;
1da177e4
LT
407 dst_release(skb2->dst);
408 skb2->dst = NULL;
409 skb_pull(skb2, skb->data - (u8*)eiph);
c1d2bbe1 410 skb_reset_network_header(skb2);
1da177e4
LT
411
412 /* Try to guess incoming interface */
413 memset(&fl, 0, sizeof(fl));
414 fl.fl4_daddr = eiph->saddr;
415 fl.fl4_tos = RT_TOS(eiph->tos);
416 fl.proto = IPPROTO_IPIP;
f206351a 417 if (ip_route_output_key(&init_net, &rt, &key)) {
1da177e4 418 kfree_skb(skb2);
d2acc347 419 return 0;
1da177e4
LT
420 }
421 skb2->dev = rt->u.dst.dev;
422
423 /* route "incoming" packet */
424 if (rt->rt_flags&RTCF_LOCAL) {
425 ip_rt_put(rt);
426 rt = NULL;
427 fl.fl4_daddr = eiph->daddr;
428 fl.fl4_src = eiph->saddr;
429 fl.fl4_tos = eiph->tos;
f206351a 430 if (ip_route_output_key(&init_net, &rt, &fl) ||
1da177e4
LT
431 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
432 ip_rt_put(rt);
433 kfree_skb(skb2);
d2acc347 434 return 0;
1da177e4
LT
435 }
436 } else {
437 ip_rt_put(rt);
438 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
439 skb2->dst->dev->type != ARPHRD_TUNNEL) {
440 kfree_skb(skb2);
d2acc347 441 return 0;
1da177e4
LT
442 }
443 }
444
445 /* change mtu on this route */
446 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
c55e2f49 447 if (n > dst_mtu(skb2->dst)) {
1da177e4 448 kfree_skb(skb2);
d2acc347 449 return 0;
1da177e4 450 }
c55e2f49 451 skb2->dst->ops->update_pmtu(skb2->dst, n);
1da177e4 452 } else if (type == ICMP_TIME_EXCEEDED) {
2941a486 453 struct ip_tunnel *t = netdev_priv(skb2->dev);
1da177e4
LT
454 if (t->parms.iph.ttl) {
455 rel_type = ICMP_DEST_UNREACH;
456 rel_code = ICMP_HOST_UNREACH;
457 }
458 }
459
460 icmp_send(skb2, rel_type, rel_code, rel_info);
461 kfree_skb(skb2);
d2acc347 462 return 0;
1da177e4
LT
463#endif
464}
465
eddc9ec5
ACM
466static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
467 struct sk_buff *skb)
1da177e4 468{
eddc9ec5 469 struct iphdr *inner_iph = ip_hdr(skb);
1da177e4
LT
470
471 if (INET_ECN_is_ce(outer_iph->tos))
472 IP_ECN_set_ce(inner_iph);
473}
474
475static int ipip_rcv(struct sk_buff *skb)
476{
1da177e4 477 struct ip_tunnel *tunnel;
eddc9ec5 478 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
479
480 read_lock(&ipip_lock);
cec3ffae 481 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
b9fae5c9 482 iph->saddr, iph->daddr)) != NULL) {
1da177e4
LT
483 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
484 read_unlock(&ipip_lock);
485 kfree_skb(skb);
486 return 0;
487 }
488
489 secpath_reset(skb);
490
b0e380b1 491 skb->mac_header = skb->network_header;
c1d2bbe1 492 skb_reset_network_header(skb);
1da177e4
LT
493 skb->protocol = htons(ETH_P_IP);
494 skb->pkt_type = PACKET_HOST;
495
496 tunnel->stat.rx_packets++;
497 tunnel->stat.rx_bytes += skb->len;
498 skb->dev = tunnel->dev;
499 dst_release(skb->dst);
500 skb->dst = NULL;
501 nf_reset(skb);
502 ipip_ecn_decapsulate(iph, skb);
503 netif_rx(skb);
504 read_unlock(&ipip_lock);
505 return 0;
506 }
507 read_unlock(&ipip_lock);
508
1da177e4
LT
509 return -1;
510}
511
512/*
513 * This function assumes it is being called from dev_queue_xmit()
514 * and that skb is filled properly by that function.
515 */
516
517static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
518{
2941a486 519 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
520 struct net_device_stats *stats = &tunnel->stat;
521 struct iphdr *tiph = &tunnel->parms.iph;
522 u8 tos = tunnel->parms.iph.tos;
d5a0a1e3 523 __be16 df = tiph->frag_off;
1da177e4
LT
524 struct rtable *rt; /* Route to the other host */
525 struct net_device *tdev; /* Device to other host */
eddc9ec5 526 struct iphdr *old_iph = ip_hdr(skb);
1da177e4 527 struct iphdr *iph; /* Our new IP header */
c2636b4d 528 unsigned int max_headroom; /* The extra header space needed */
d5a0a1e3 529 __be32 dst = tiph->daddr;
1da177e4
LT
530 int mtu;
531
532 if (tunnel->recursion++) {
533 tunnel->stat.collisions++;
534 goto tx_error;
535 }
536
537 if (skb->protocol != htons(ETH_P_IP))
538 goto tx_error;
539
540 if (tos&1)
541 tos = old_iph->tos;
542
543 if (!dst) {
544 /* NBMA tunnel */
ee6b9673 545 if ((rt = skb->rtable) == NULL) {
1da177e4
LT
546 tunnel->stat.tx_fifo_errors++;
547 goto tx_error;
548 }
549 if ((dst = rt->rt_gateway) == 0)
550 goto tx_error_icmp;
551 }
552
553 {
554 struct flowi fl = { .oif = tunnel->parms.link,
555 .nl_u = { .ip4_u =
556 { .daddr = dst,
557 .saddr = tiph->saddr,
558 .tos = RT_TOS(tos) } },
559 .proto = IPPROTO_IPIP };
f206351a 560 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
561 tunnel->stat.tx_carrier_errors++;
562 goto tx_error_icmp;
563 }
564 }
565 tdev = rt->u.dst.dev;
566
567 if (tdev == dev) {
568 ip_rt_put(rt);
569 tunnel->stat.collisions++;
570 goto tx_error;
571 }
572
573 if (tiph->frag_off)
574 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
575 else
576 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
577
578 if (mtu < 68) {
579 tunnel->stat.collisions++;
580 ip_rt_put(rt);
581 goto tx_error;
582 }
583 if (skb->dst)
584 skb->dst->ops->update_pmtu(skb->dst, mtu);
585
586 df |= (old_iph->frag_off&htons(IP_DF));
587
588 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
589 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
590 ip_rt_put(rt);
591 goto tx_error;
592 }
593
594 if (tunnel->err_count > 0) {
595 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
596 tunnel->err_count--;
597 dst_link_failure(skb);
598 } else
599 tunnel->err_count = 0;
600 }
601
602 /*
603 * Okay, now see if we can stuff it in the buffer as-is.
604 */
605 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
606
cfbba49d
PM
607 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
608 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
609 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
610 if (!new_skb) {
611 ip_rt_put(rt);
e905a9ed 612 stats->tx_dropped++;
1da177e4
LT
613 dev_kfree_skb(skb);
614 tunnel->recursion--;
615 return 0;
616 }
617 if (skb->sk)
618 skb_set_owner_w(new_skb, skb->sk);
619 dev_kfree_skb(skb);
620 skb = new_skb;
eddc9ec5 621 old_iph = ip_hdr(skb);
1da177e4
LT
622 }
623
b0e380b1 624 skb->transport_header = skb->network_header;
e2d1bca7
ACM
625 skb_push(skb, sizeof(struct iphdr));
626 skb_reset_network_header(skb);
1da177e4 627 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
628 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
629 IPSKB_REROUTED);
1da177e4
LT
630 dst_release(skb->dst);
631 skb->dst = &rt->u.dst;
632
633 /*
634 * Push down and install the IPIP header.
635 */
636
eddc9ec5 637 iph = ip_hdr(skb);
1da177e4
LT
638 iph->version = 4;
639 iph->ihl = sizeof(struct iphdr)>>2;
640 iph->frag_off = df;
641 iph->protocol = IPPROTO_IPIP;
642 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
643 iph->daddr = rt->rt_dst;
644 iph->saddr = rt->rt_src;
645
646 if ((iph->ttl = tiph->ttl) == 0)
647 iph->ttl = old_iph->ttl;
648
649 nf_reset(skb);
650
651 IPTUNNEL_XMIT();
652 tunnel->recursion--;
653 return 0;
654
655tx_error_icmp:
656 dst_link_failure(skb);
657tx_error:
658 stats->tx_errors++;
659 dev_kfree_skb(skb);
660 tunnel->recursion--;
661 return 0;
662}
663
5533995b
MS
664static void ipip_tunnel_bind_dev(struct net_device *dev)
665{
666 struct net_device *tdev = NULL;
667 struct ip_tunnel *tunnel;
668 struct iphdr *iph;
669
670 tunnel = netdev_priv(dev);
671 iph = &tunnel->parms.iph;
672
673 if (iph->daddr) {
674 struct flowi fl = { .oif = tunnel->parms.link,
675 .nl_u = { .ip4_u =
676 { .daddr = iph->daddr,
677 .saddr = iph->saddr,
678 .tos = RT_TOS(iph->tos) } },
679 .proto = IPPROTO_IPIP };
680 struct rtable *rt;
f206351a 681 if (!ip_route_output_key(&init_net, &rt, &fl)) {
5533995b
MS
682 tdev = rt->u.dst.dev;
683 ip_rt_put(rt);
684 }
685 dev->flags |= IFF_POINTOPOINT;
686 }
687
688 if (!tdev && tunnel->parms.link)
689 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
690
691 if (tdev) {
692 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
693 dev->mtu = tdev->mtu - sizeof(struct iphdr);
694 }
695 dev->iflink = tunnel->parms.link;
696}
697
1da177e4
LT
698static int
699ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
700{
701 int err = 0;
702 struct ip_tunnel_parm p;
703 struct ip_tunnel *t;
b9855c54
PE
704 struct net *net = dev_net(dev);
705 struct ipip_net *ipn = net_generic(net, ipip_net_id);
1da177e4
LT
706
707 switch (cmd) {
708 case SIOCGETTUNNEL:
709 t = NULL;
b9855c54 710 if (dev == ipn->fb_tunnel_dev) {
1da177e4
LT
711 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
712 err = -EFAULT;
713 break;
714 }
b9fae5c9 715 t = ipip_tunnel_locate(net, &p, 0);
1da177e4
LT
716 }
717 if (t == NULL)
2941a486 718 t = netdev_priv(dev);
1da177e4
LT
719 memcpy(&p, &t->parms, sizeof(p));
720 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
721 err = -EFAULT;
722 break;
723
724 case SIOCADDTUNNEL:
725 case SIOCCHGTUNNEL:
726 err = -EPERM;
727 if (!capable(CAP_NET_ADMIN))
728 goto done;
729
730 err = -EFAULT;
731 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
732 goto done;
733
734 err = -EINVAL;
735 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
736 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
737 goto done;
738 if (p.iph.ttl)
739 p.iph.frag_off |= htons(IP_DF);
740
b9fae5c9 741 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 742
b9855c54 743 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
744 if (t != NULL) {
745 if (t->dev != dev) {
746 err = -EEXIST;
747 break;
748 }
749 } else {
750 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
751 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
752 err = -EINVAL;
753 break;
754 }
2941a486 755 t = netdev_priv(dev);
b9fae5c9 756 ipip_tunnel_unlink(ipn, t);
1da177e4
LT
757 t->parms.iph.saddr = p.iph.saddr;
758 t->parms.iph.daddr = p.iph.daddr;
759 memcpy(dev->dev_addr, &p.iph.saddr, 4);
760 memcpy(dev->broadcast, &p.iph.daddr, 4);
b9fae5c9 761 ipip_tunnel_link(ipn, t);
1da177e4
LT
762 netdev_state_change(dev);
763 }
764 }
765
766 if (t) {
767 err = 0;
768 if (cmd == SIOCCHGTUNNEL) {
769 t->parms.iph.ttl = p.iph.ttl;
770 t->parms.iph.tos = p.iph.tos;
771 t->parms.iph.frag_off = p.iph.frag_off;
5533995b
MS
772 if (t->parms.link != p.link) {
773 t->parms.link = p.link;
774 ipip_tunnel_bind_dev(dev);
775 netdev_state_change(dev);
776 }
1da177e4
LT
777 }
778 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
779 err = -EFAULT;
780 } else
781 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
782 break;
783
784 case SIOCDELTUNNEL:
785 err = -EPERM;
786 if (!capable(CAP_NET_ADMIN))
787 goto done;
788
b9855c54 789 if (dev == ipn->fb_tunnel_dev) {
1da177e4
LT
790 err = -EFAULT;
791 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
792 goto done;
793 err = -ENOENT;
b9fae5c9 794 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
795 goto done;
796 err = -EPERM;
b9855c54 797 if (t->dev == ipn->fb_tunnel_dev)
1da177e4
LT
798 goto done;
799 dev = t->dev;
800 }
22f8cde5
SH
801 unregister_netdevice(dev);
802 err = 0;
1da177e4
LT
803 break;
804
805 default:
806 err = -EINVAL;
807 }
808
809done:
810 return err;
811}
812
813static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
814{
2941a486 815 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1da177e4
LT
816}
817
818static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
819{
820 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
821 return -EINVAL;
822 dev->mtu = new_mtu;
823 return 0;
824}
825
826static void ipip_tunnel_setup(struct net_device *dev)
827{
1da177e4
LT
828 dev->uninit = ipip_tunnel_uninit;
829 dev->hard_start_xmit = ipip_tunnel_xmit;
830 dev->get_stats = ipip_tunnel_get_stats;
831 dev->do_ioctl = ipip_tunnel_ioctl;
832 dev->change_mtu = ipip_tunnel_change_mtu;
833 dev->destructor = free_netdev;
834
835 dev->type = ARPHRD_TUNNEL;
836 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
46f25dff 837 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
1da177e4
LT
838 dev->flags = IFF_NOARP;
839 dev->iflink = 0;
840 dev->addr_len = 4;
841}
842
843static int ipip_tunnel_init(struct net_device *dev)
844{
1da177e4 845 struct ip_tunnel *tunnel;
1da177e4 846
2941a486 847 tunnel = netdev_priv(dev);
1da177e4
LT
848
849 tunnel->dev = dev;
850 strcpy(tunnel->parms.name, dev->name);
851
852 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
853 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
854
5533995b 855 ipip_tunnel_bind_dev(dev);
1da177e4
LT
856
857 return 0;
858}
859
b9855c54 860static int ipip_fb_tunnel_init(struct net_device *dev)
1da177e4 861{
2941a486 862 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
863 struct iphdr *iph = &tunnel->parms.iph;
864
865 tunnel->dev = dev;
866 strcpy(tunnel->parms.name, dev->name);
867
868 iph->version = 4;
869 iph->protocol = IPPROTO_IPIP;
870 iph->ihl = 5;
871
872 dev_hold(dev);
873 tunnels_wc[0] = tunnel;
874 return 0;
875}
876
877static struct xfrm_tunnel ipip_handler = {
878 .handler = ipip_rcv,
879 .err_handler = ipip_err,
d2acc347 880 .priority = 1,
1da177e4
LT
881};
882
883static char banner[] __initdata =
884 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
885
10dc4c7b
PE
886static int ipip_init_net(struct net *net)
887{
888 int err;
889 struct ipip_net *ipn;
890
891 err = -ENOMEM;
892 ipn = kmalloc(sizeof(struct ipip_net), GFP_KERNEL);
893 if (ipn == NULL)
894 goto err_alloc;
895
896 err = net_assign_generic(net, ipip_net_id, ipn);
897 if (err < 0)
898 goto err_assign;
899
b9855c54
PE
900 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
901 "tunl0",
902 ipip_tunnel_setup);
903 if (!ipn->fb_tunnel_dev) {
904 err = -ENOMEM;
905 goto err_alloc_dev;
906 }
907
908 ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
909 dev_net_set(ipn->fb_tunnel_dev, net);
910
911 if ((err = register_netdev(ipn->fb_tunnel_dev)))
912 goto err_reg_dev;
913
10dc4c7b
PE
914 return 0;
915
b9855c54
PE
916err_reg_dev:
917 free_netdev(ipn->fb_tunnel_dev);
918err_alloc_dev:
919 /* nothing */
10dc4c7b
PE
920err_assign:
921 kfree(ipn);
922err_alloc:
923 return err;
924}
925
926static void ipip_exit_net(struct net *net)
927{
928 struct ipip_net *ipn;
929
930 ipn = net_generic(net, ipip_net_id);
b9855c54
PE
931 rtnl_lock();
932 unregister_netdevice(ipn->fb_tunnel_dev);
933 rtnl_unlock();
10dc4c7b
PE
934 kfree(ipn);
935}
936
937static struct pernet_operations ipip_net_ops = {
938 .init = ipip_init_net,
939 .exit = ipip_exit_net,
940};
941
1da177e4
LT
942static int __init ipip_init(void)
943{
944 int err;
945
946 printk(banner);
947
c0d56408 948 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
1da177e4
LT
949 printk(KERN_INFO "ipip init: can't register tunnel\n");
950 return -EAGAIN;
951 }
952
10dc4c7b
PE
953 err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
954 if (err)
b9855c54
PE
955 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
956
1da177e4 957 return err;
1da177e4
LT
958}
959
db44575f
AK
960static void __exit ipip_destroy_tunnels(void)
961{
962 int prio;
963
964 for (prio = 1; prio < 4; prio++) {
965 int h;
966 for (h = 0; h < HASH_SIZE; h++) {
967 struct ip_tunnel *t;
968 while ((t = tunnels[prio][h]) != NULL)
969 unregister_netdevice(t->dev);
970 }
971 }
972}
973
1da177e4
LT
974static void __exit ipip_fini(void)
975{
c0d56408 976 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1da177e4
LT
977 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
978
db44575f
AK
979 rtnl_lock();
980 ipip_destroy_tunnels();
db44575f 981 rtnl_unlock();
10dc4c7b
PE
982
983 unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
1da177e4
LT
984}
985
986module_init(ipip_init);
987module_exit(ipip_fini);
988MODULE_LICENSE("GPL");