]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
[GRE]: Make tunnels hashes per-net.
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
46f25dff 30#include <linux/if_ether.h>
1da177e4
LT
31
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
59a4c759
PE
42#include <net/net_namespace.h>
43#include <net/netns/generic.h>
1da177e4
LT
44
45#ifdef CONFIG_IPV6
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#endif
50
51/*
52 Problems & solutions
53 --------------------
54
55 1. The most important issue is detecting local dead loops.
56 They would cause complete host lockup in transmit, which
57 would be "resolved" by stack overflow or, if queueing is enabled,
58 with infinite looping in net_bh.
59
60 We cannot track such dead loops during route installation,
61 it is infeasible task. The most general solutions would be
62 to keep skb->encapsulation counter (sort of local ttl),
63 and silently drop packet when it expires. It is the best
64 solution, but it supposes maintaing new variable in ALL
65 skb, even if no tunneling is used.
66
e905a9ed 67 Current solution: t->recursion lock breaks dead loops. It looks
1da177e4
LT
68 like dev->tbusy flag, but I preferred new variable, because
69 the semantics is different. One day, when hard_start_xmit
70 will be multithreaded we will have to use skb->encapsulation.
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
122
123/* Fallback tunnel: no source, no destination, no key, no options */
124
125static int ipgre_fb_tunnel_init(struct net_device *dev);
126
eb8ce741
PE
127#define HASH_SIZE 16
128
59a4c759
PE
129static int ipgre_net_id;
130struct ipgre_net {
eb8ce741
PE
131 struct ip_tunnel *tunnels[4][HASH_SIZE];
132
7daa0004 133 struct net_device *fb_tunnel_dev;
59a4c759
PE
134};
135
1da177e4
LT
136/* Tunnel hash table */
137
138/*
139 4 hash tables:
140
141 3: (remote,local)
142 2: (remote,*)
143 1: (*,local)
144 0: (*,*)
145
146 We require exact key match i.e. if a key is present in packet
147 it will match only tunnel with the same key; if it is not present,
148 it will match only keyless tunnel.
149
150 All keysless packets, if not matched configured keyless tunnels
151 will match fallback tunnel.
152 */
153
d5a0a1e3 154#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 155
eb8ce741
PE
156#define tunnels_r_l tunnels[3]
157#define tunnels_r tunnels[2]
158#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0]
1da177e4
LT
160
161static DEFINE_RWLOCK(ipgre_lock);
162
163/* Given src, dst and key, find appropriate for input tunnel. */
164
f57e7d5a
PE
165static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166 __be32 remote, __be32 local, __be32 key)
1da177e4
LT
167{
168 unsigned h0 = HASH(remote);
169 unsigned h1 = HASH(key);
170 struct ip_tunnel *t;
7daa0004 171 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 172
eb8ce741 173 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
1da177e4
LT
174 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
175 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
176 return t;
177 }
178 }
eb8ce741 179 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
1da177e4
LT
180 if (remote == t->parms.iph.daddr) {
181 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182 return t;
183 }
184 }
eb8ce741 185 for (t = ign->tunnels_l[h1]; t; t = t->next) {
1da177e4 186 if (local == t->parms.iph.saddr ||
f97c1e0c
JP
187 (local == t->parms.iph.daddr &&
188 ipv4_is_multicast(local))) {
1da177e4
LT
189 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
190 return t;
191 }
192 }
eb8ce741 193 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
1da177e4
LT
194 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
195 return t;
196 }
197
7daa0004
PE
198 if (ign->fb_tunnel_dev->flags&IFF_UP)
199 return netdev_priv(ign->fb_tunnel_dev);
1da177e4
LT
200 return NULL;
201}
202
f57e7d5a
PE
203static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
204 struct ip_tunnel_parm *parms)
1da177e4 205{
5056a1ef
YH
206 __be32 remote = parms->iph.daddr;
207 __be32 local = parms->iph.saddr;
208 __be32 key = parms->i_key;
1da177e4
LT
209 unsigned h = HASH(key);
210 int prio = 0;
211
212 if (local)
213 prio |= 1;
f97c1e0c 214 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
215 prio |= 2;
216 h ^= HASH(remote);
217 }
218
eb8ce741 219 return &ign->tunnels[prio][h];
1da177e4
LT
220}
221
f57e7d5a
PE
222static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
223 struct ip_tunnel *t)
5056a1ef 224{
f57e7d5a 225 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
226}
227
f57e7d5a 228static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 229{
f57e7d5a 230 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4
LT
231
232 t->next = *tp;
233 write_lock_bh(&ipgre_lock);
234 *tp = t;
235 write_unlock_bh(&ipgre_lock);
236}
237
f57e7d5a 238static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
239{
240 struct ip_tunnel **tp;
241
f57e7d5a 242 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4
LT
243 if (t == *tp) {
244 write_lock_bh(&ipgre_lock);
245 *tp = t->next;
246 write_unlock_bh(&ipgre_lock);
247 break;
248 }
249 }
250}
251
f57e7d5a
PE
252static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
253 struct ip_tunnel_parm *parms, int create)
1da177e4 254{
d5a0a1e3
AV
255 __be32 remote = parms->iph.daddr;
256 __be32 local = parms->iph.saddr;
257 __be32 key = parms->i_key;
1da177e4
LT
258 struct ip_tunnel *t, **tp, *nt;
259 struct net_device *dev;
1da177e4 260 char name[IFNAMSIZ];
f57e7d5a 261 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 262
f57e7d5a 263 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
1da177e4
LT
264 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
265 if (key == t->parms.i_key)
266 return t;
267 }
268 }
269 if (!create)
270 return NULL;
271
272 if (parms->name[0])
273 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
274 else
275 sprintf(name, "gre%%d");
1da177e4
LT
276
277 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
278 if (!dev)
279 return NULL;
280
b37d428b
PE
281 if (strchr(name, '%')) {
282 if (dev_alloc_name(dev, name) < 0)
283 goto failed_free;
284 }
285
1da177e4 286 dev->init = ipgre_tunnel_init;
2941a486 287 nt = netdev_priv(dev);
1da177e4
LT
288 nt->parms = *parms;
289
b37d428b
PE
290 if (register_netdevice(dev) < 0)
291 goto failed_free;
1da177e4 292
1da177e4 293 dev_hold(dev);
f57e7d5a 294 ipgre_tunnel_link(ign, nt);
1da177e4
LT
295 return nt;
296
b37d428b
PE
297failed_free:
298 free_netdev(dev);
1da177e4
LT
299 return NULL;
300}
301
302static void ipgre_tunnel_uninit(struct net_device *dev)
303{
f57e7d5a
PE
304 struct net *net = dev_net(dev);
305 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
306
307 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
308 dev_put(dev);
309}
310
311
312static void ipgre_err(struct sk_buff *skb, u32 info)
313{
314#ifndef I_WISH_WORLD_WERE_PERFECT
315
316/* It is not :-( All the routers (except for Linux) return only
317 8 bytes of packet payload. It means, that precise relaying of
318 ICMP in the real Internet is absolutely infeasible.
319
320 Moreover, Cisco "wise men" put GRE key to the third word
321 in GRE header. It makes impossible maintaining even soft state for keyed
322 GRE tunnels with enabled checksum. Tell them "thank you".
323
324 Well, I wonder, rfc1812 was written by Cisco employee,
325 what the hell these idiots break standrads established
326 by themself???
327 */
328
329 struct iphdr *iph = (struct iphdr*)skb->data;
d5a0a1e3 330 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 331 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
332 const int type = icmp_hdr(skb)->type;
333 const int code = icmp_hdr(skb)->code;
1da177e4 334 struct ip_tunnel *t;
d5a0a1e3 335 __be16 flags;
1da177e4
LT
336
337 flags = p[0];
338 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
339 if (flags&(GRE_VERSION|GRE_ROUTING))
340 return;
341 if (flags&GRE_KEY) {
342 grehlen += 4;
343 if (flags&GRE_CSUM)
344 grehlen += 4;
345 }
346 }
347
348 /* If only 8 bytes returned, keyed message will be dropped here */
349 if (skb_headlen(skb) < grehlen)
350 return;
351
352 switch (type) {
353 default:
354 case ICMP_PARAMETERPROB:
355 return;
356
357 case ICMP_DEST_UNREACH:
358 switch (code) {
359 case ICMP_SR_FAILED:
360 case ICMP_PORT_UNREACH:
361 /* Impossible event. */
362 return;
363 case ICMP_FRAG_NEEDED:
364 /* Soft state for pmtu is maintained by IP core. */
365 return;
366 default:
367 /* All others are translated to HOST_UNREACH.
368 rfc2003 contains "deep thoughts" about NET_UNREACH,
369 I believe they are just ether pollution. --ANK
370 */
371 break;
372 }
373 break;
374 case ICMP_TIME_EXCEEDED:
375 if (code != ICMP_EXC_TTL)
376 return;
377 break;
378 }
379
380 read_lock(&ipgre_lock);
3b4667f3 381 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
f57e7d5a
PE
382 (flags&GRE_KEY) ?
383 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
f97c1e0c
JP
384 if (t == NULL || t->parms.iph.daddr == 0 ||
385 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
386 goto out;
387
388 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
389 goto out;
390
391 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
392 t->err_count++;
393 else
394 t->err_count = 1;
395 t->err_time = jiffies;
396out:
397 read_unlock(&ipgre_lock);
398 return;
399#else
400 struct iphdr *iph = (struct iphdr*)dp;
401 struct iphdr *eiph;
d5a0a1e3 402 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
88c7664f
ACM
403 const int type = icmp_hdr(skb)->type;
404 const int code = icmp_hdr(skb)->code;
1da177e4
LT
405 int rel_type = 0;
406 int rel_code = 0;
c55e2f49
AV
407 __be32 rel_info = 0;
408 __u32 n = 0;
d5a0a1e3 409 __be16 flags;
1da177e4
LT
410 int grehlen = (iph->ihl<<2) + 4;
411 struct sk_buff *skb2;
412 struct flowi fl;
413 struct rtable *rt;
414
415 if (p[1] != htons(ETH_P_IP))
416 return;
417
418 flags = p[0];
419 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
420 if (flags&(GRE_VERSION|GRE_ROUTING))
421 return;
422 if (flags&GRE_CSUM)
423 grehlen += 4;
424 if (flags&GRE_KEY)
425 grehlen += 4;
426 if (flags&GRE_SEQ)
427 grehlen += 4;
428 }
429 if (len < grehlen + sizeof(struct iphdr))
430 return;
431 eiph = (struct iphdr*)(dp + grehlen);
432
433 switch (type) {
434 default:
435 return;
436 case ICMP_PARAMETERPROB:
88c7664f 437 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
c55e2f49 438 if (n < (iph->ihl<<2))
1da177e4
LT
439 return;
440
441 /* So... This guy found something strange INSIDE encapsulated
442 packet. Well, he is fool, but what can we do ?
443 */
444 rel_type = ICMP_PARAMETERPROB;
c55e2f49
AV
445 n -= grehlen;
446 rel_info = htonl(n << 24);
1da177e4
LT
447 break;
448
449 case ICMP_DEST_UNREACH:
450 switch (code) {
451 case ICMP_SR_FAILED:
452 case ICMP_PORT_UNREACH:
453 /* Impossible event. */
454 return;
455 case ICMP_FRAG_NEEDED:
456 /* And it is the only really necessary thing :-) */
88c7664f 457 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
c55e2f49 458 if (n < grehlen+68)
1da177e4 459 return;
c55e2f49 460 n -= grehlen;
1da177e4 461 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
c55e2f49 462 if (n > ntohs(eiph->tot_len))
1da177e4 463 return;
c55e2f49 464 rel_info = htonl(n);
1da177e4
LT
465 break;
466 default:
467 /* All others are translated to HOST_UNREACH.
468 rfc2003 contains "deep thoughts" about NET_UNREACH,
469 I believe, it is just ether pollution. --ANK
470 */
471 rel_type = ICMP_DEST_UNREACH;
472 rel_code = ICMP_HOST_UNREACH;
473 break;
474 }
475 break;
476 case ICMP_TIME_EXCEEDED:
477 if (code != ICMP_EXC_TTL)
478 return;
479 break;
480 }
481
482 /* Prepare fake skb to feed it to icmp_send */
483 skb2 = skb_clone(skb, GFP_ATOMIC);
484 if (skb2 == NULL)
485 return;
486 dst_release(skb2->dst);
487 skb2->dst = NULL;
488 skb_pull(skb2, skb->data - (u8*)eiph);
c1d2bbe1 489 skb_reset_network_header(skb2);
1da177e4
LT
490
491 /* Try to guess incoming interface */
492 memset(&fl, 0, sizeof(fl));
493 fl.fl4_dst = eiph->saddr;
494 fl.fl4_tos = RT_TOS(eiph->tos);
495 fl.proto = IPPROTO_GRE;
f206351a 496 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
497 kfree_skb(skb2);
498 return;
499 }
500 skb2->dev = rt->u.dst.dev;
501
502 /* route "incoming" packet */
503 if (rt->rt_flags&RTCF_LOCAL) {
504 ip_rt_put(rt);
505 rt = NULL;
506 fl.fl4_dst = eiph->daddr;
507 fl.fl4_src = eiph->saddr;
508 fl.fl4_tos = eiph->tos;
f206351a 509 if (ip_route_output_key(&init_net, &rt, &fl) ||
1da177e4
LT
510 rt->u.dst.dev->type != ARPHRD_IPGRE) {
511 ip_rt_put(rt);
512 kfree_skb(skb2);
513 return;
514 }
515 } else {
516 ip_rt_put(rt);
517 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
518 skb2->dst->dev->type != ARPHRD_IPGRE) {
519 kfree_skb(skb2);
520 return;
521 }
522 }
523
524 /* change mtu on this route */
525 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
c55e2f49 526 if (n > dst_mtu(skb2->dst)) {
1da177e4
LT
527 kfree_skb(skb2);
528 return;
529 }
c55e2f49 530 skb2->dst->ops->update_pmtu(skb2->dst, n);
1da177e4 531 } else if (type == ICMP_TIME_EXCEEDED) {
2941a486 532 struct ip_tunnel *t = netdev_priv(skb2->dev);
1da177e4
LT
533 if (t->parms.iph.ttl) {
534 rel_type = ICMP_DEST_UNREACH;
535 rel_code = ICMP_HOST_UNREACH;
536 }
537 }
538
539 icmp_send(skb2, rel_type, rel_code, rel_info);
540 kfree_skb(skb2);
541#endif
542}
543
544static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
545{
546 if (INET_ECN_is_ce(iph->tos)) {
547 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 548 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 549 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 550 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
551 }
552 }
553}
554
555static inline u8
556ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
557{
558 u8 inner = 0;
559 if (skb->protocol == htons(ETH_P_IP))
560 inner = old_iph->tos;
561 else if (skb->protocol == htons(ETH_P_IPV6))
562 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
563 return INET_ECN_encapsulate(tos, inner);
564}
565
566static int ipgre_rcv(struct sk_buff *skb)
567{
568 struct iphdr *iph;
569 u8 *h;
d5a0a1e3 570 __be16 flags;
d3bc23e7 571 __sum16 csum = 0;
d5a0a1e3 572 __be32 key = 0;
1da177e4
LT
573 u32 seqno = 0;
574 struct ip_tunnel *tunnel;
575 int offset = 4;
576
577 if (!pskb_may_pull(skb, 16))
578 goto drop_nolock;
579
eddc9ec5 580 iph = ip_hdr(skb);
1da177e4 581 h = skb->data;
d5a0a1e3 582 flags = *(__be16*)h;
1da177e4
LT
583
584 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
585 /* - Version must be 0.
586 - We do not support routing headers.
587 */
588 if (flags&(GRE_VERSION|GRE_ROUTING))
589 goto drop_nolock;
590
591 if (flags&GRE_CSUM) {
fb286bb2 592 switch (skb->ip_summed) {
84fa7933 593 case CHECKSUM_COMPLETE:
d3bc23e7 594 csum = csum_fold(skb->csum);
fb286bb2
HX
595 if (!csum)
596 break;
597 /* fall through */
598 case CHECKSUM_NONE:
599 skb->csum = 0;
600 csum = __skb_checksum_complete(skb);
84fa7933 601 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
602 }
603 offset += 4;
604 }
605 if (flags&GRE_KEY) {
d5a0a1e3 606 key = *(__be32*)(h + offset);
1da177e4
LT
607 offset += 4;
608 }
609 if (flags&GRE_SEQ) {
d5a0a1e3 610 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
611 offset += 4;
612 }
613 }
614
615 read_lock(&ipgre_lock);
3b4667f3 616 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
f57e7d5a 617 iph->saddr, iph->daddr, key)) != NULL) {
1da177e4
LT
618 secpath_reset(skb);
619
d5a0a1e3 620 skb->protocol = *(__be16*)(h + 2);
1da177e4
LT
621 /* WCCP version 1 and 2 protocol decoding.
622 * - Change protocol to IP
623 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
624 */
625 if (flags == 0 &&
496c98df
YH
626 skb->protocol == htons(ETH_P_WCCP)) {
627 skb->protocol = htons(ETH_P_IP);
e905a9ed 628 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
629 offset += 4;
630 }
631
1d069167 632 skb->mac_header = skb->network_header;
4209fb60
ACM
633 __pskb_pull(skb, offset);
634 skb_reset_network_header(skb);
9c70220b 635 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
636 skb->pkt_type = PACKET_HOST;
637#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 638 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 639 /* Looped back packet, drop it! */
ee6b9673 640 if (skb->rtable->fl.iif == 0)
1da177e4
LT
641 goto drop;
642 tunnel->stat.multicast++;
643 skb->pkt_type = PACKET_BROADCAST;
644 }
645#endif
646
647 if (((flags&GRE_CSUM) && csum) ||
648 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
649 tunnel->stat.rx_crc_errors++;
650 tunnel->stat.rx_errors++;
651 goto drop;
652 }
653 if (tunnel->parms.i_flags&GRE_SEQ) {
654 if (!(flags&GRE_SEQ) ||
655 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
656 tunnel->stat.rx_fifo_errors++;
657 tunnel->stat.rx_errors++;
658 goto drop;
659 }
660 tunnel->i_seqno = seqno + 1;
661 }
662 tunnel->stat.rx_packets++;
663 tunnel->stat.rx_bytes += skb->len;
664 skb->dev = tunnel->dev;
665 dst_release(skb->dst);
666 skb->dst = NULL;
667 nf_reset(skb);
668 ipgre_ecn_decapsulate(iph, skb);
669 netif_rx(skb);
670 read_unlock(&ipgre_lock);
671 return(0);
672 }
45af08be 673 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
674
675drop:
676 read_unlock(&ipgre_lock);
677drop_nolock:
678 kfree_skb(skb);
679 return(0);
680}
681
682static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
683{
2941a486 684 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 685 struct net_device_stats *stats = &tunnel->stat;
eddc9ec5 686 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
687 struct iphdr *tiph;
688 u8 tos;
d5a0a1e3 689 __be16 df;
1da177e4
LT
690 struct rtable *rt; /* Route to the other host */
691 struct net_device *tdev; /* Device to other host */
692 struct iphdr *iph; /* Our new IP header */
c2636b4d 693 unsigned int max_headroom; /* The extra header space needed */
1da177e4 694 int gre_hlen;
d5a0a1e3 695 __be32 dst;
1da177e4
LT
696 int mtu;
697
698 if (tunnel->recursion++) {
699 tunnel->stat.collisions++;
700 goto tx_error;
701 }
702
3b04ddde 703 if (dev->header_ops) {
1da177e4
LT
704 gre_hlen = 0;
705 tiph = (struct iphdr*)skb->data;
706 } else {
707 gre_hlen = tunnel->hlen;
708 tiph = &tunnel->parms.iph;
709 }
710
711 if ((dst = tiph->daddr) == 0) {
712 /* NBMA tunnel */
713
714 if (skb->dst == NULL) {
715 tunnel->stat.tx_fifo_errors++;
716 goto tx_error;
717 }
718
719 if (skb->protocol == htons(ETH_P_IP)) {
ee6b9673 720 rt = skb->rtable;
1da177e4
LT
721 if ((dst = rt->rt_gateway) == 0)
722 goto tx_error_icmp;
723 }
724#ifdef CONFIG_IPV6
725 else if (skb->protocol == htons(ETH_P_IPV6)) {
726 struct in6_addr *addr6;
727 int addr_type;
728 struct neighbour *neigh = skb->dst->neighbour;
729
730 if (neigh == NULL)
731 goto tx_error;
732
733 addr6 = (struct in6_addr*)&neigh->primary_key;
734 addr_type = ipv6_addr_type(addr6);
735
736 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 737 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
738 addr_type = ipv6_addr_type(addr6);
739 }
740
741 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
742 goto tx_error_icmp;
743
744 dst = addr6->s6_addr32[3];
745 }
746#endif
747 else
748 goto tx_error;
749 }
750
751 tos = tiph->tos;
752 if (tos&1) {
753 if (skb->protocol == htons(ETH_P_IP))
754 tos = old_iph->tos;
755 tos &= ~1;
756 }
757
758 {
759 struct flowi fl = { .oif = tunnel->parms.link,
760 .nl_u = { .ip4_u =
761 { .daddr = dst,
762 .saddr = tiph->saddr,
763 .tos = RT_TOS(tos) } },
764 .proto = IPPROTO_GRE };
f206351a 765 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
766 tunnel->stat.tx_carrier_errors++;
767 goto tx_error;
768 }
769 }
770 tdev = rt->u.dst.dev;
771
772 if (tdev == dev) {
773 ip_rt_put(rt);
774 tunnel->stat.collisions++;
775 goto tx_error;
776 }
777
778 df = tiph->frag_off;
779 if (df)
780 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
781 else
782 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
783
784 if (skb->dst)
785 skb->dst->ops->update_pmtu(skb->dst, mtu);
786
787 if (skb->protocol == htons(ETH_P_IP)) {
788 df |= (old_iph->frag_off&htons(IP_DF));
789
790 if ((old_iph->frag_off&htons(IP_DF)) &&
791 mtu < ntohs(old_iph->tot_len)) {
792 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
793 ip_rt_put(rt);
794 goto tx_error;
795 }
796 }
797#ifdef CONFIG_IPV6
798 else if (skb->protocol == htons(ETH_P_IPV6)) {
799 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
800
801 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
802 if ((tunnel->parms.iph.daddr &&
803 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
804 rt6->rt6i_dst.plen == 128) {
805 rt6->rt6i_flags |= RTF_MODIFIED;
806 skb->dst->metrics[RTAX_MTU-1] = mtu;
807 }
808 }
809
810 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
811 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
812 ip_rt_put(rt);
813 goto tx_error;
814 }
815 }
816#endif
817
818 if (tunnel->err_count > 0) {
819 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
820 tunnel->err_count--;
821
822 dst_link_failure(skb);
823 } else
824 tunnel->err_count = 0;
825 }
826
827 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
828
cfbba49d
PM
829 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
830 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
831 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
832 if (!new_skb) {
833 ip_rt_put(rt);
e905a9ed 834 stats->tx_dropped++;
1da177e4
LT
835 dev_kfree_skb(skb);
836 tunnel->recursion--;
837 return 0;
838 }
839 if (skb->sk)
840 skb_set_owner_w(new_skb, skb->sk);
841 dev_kfree_skb(skb);
842 skb = new_skb;
eddc9ec5 843 old_iph = ip_hdr(skb);
1da177e4
LT
844 }
845
b0e380b1 846 skb->transport_header = skb->network_header;
e2d1bca7
ACM
847 skb_push(skb, gre_hlen);
848 skb_reset_network_header(skb);
1da177e4 849 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
850 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
851 IPSKB_REROUTED);
1da177e4
LT
852 dst_release(skb->dst);
853 skb->dst = &rt->u.dst;
854
855 /*
856 * Push down and install the IPIP header.
857 */
858
eddc9ec5 859 iph = ip_hdr(skb);
1da177e4
LT
860 iph->version = 4;
861 iph->ihl = sizeof(struct iphdr) >> 2;
862 iph->frag_off = df;
863 iph->protocol = IPPROTO_GRE;
864 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
865 iph->daddr = rt->rt_dst;
866 iph->saddr = rt->rt_src;
867
868 if ((iph->ttl = tiph->ttl) == 0) {
869 if (skb->protocol == htons(ETH_P_IP))
870 iph->ttl = old_iph->ttl;
871#ifdef CONFIG_IPV6
872 else if (skb->protocol == htons(ETH_P_IPV6))
873 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
874#endif
875 else
876 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
877 }
878
d5a0a1e3
AV
879 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
880 ((__be16*)(iph+1))[1] = skb->protocol;
1da177e4
LT
881
882 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 883 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
884
885 if (tunnel->parms.o_flags&GRE_SEQ) {
886 ++tunnel->o_seqno;
887 *ptr = htonl(tunnel->o_seqno);
888 ptr--;
889 }
890 if (tunnel->parms.o_flags&GRE_KEY) {
891 *ptr = tunnel->parms.o_key;
892 ptr--;
893 }
894 if (tunnel->parms.o_flags&GRE_CSUM) {
895 *ptr = 0;
5f92a738 896 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
897 }
898 }
899
900 nf_reset(skb);
901
902 IPTUNNEL_XMIT();
903 tunnel->recursion--;
904 return 0;
905
906tx_error_icmp:
907 dst_link_failure(skb);
908
909tx_error:
910 stats->tx_errors++;
911 dev_kfree_skb(skb);
912 tunnel->recursion--;
913 return 0;
914}
915
ee34c1eb
MS
916static void ipgre_tunnel_bind_dev(struct net_device *dev)
917{
918 struct net_device *tdev = NULL;
919 struct ip_tunnel *tunnel;
920 struct iphdr *iph;
921 int hlen = LL_MAX_HEADER;
922 int mtu = ETH_DATA_LEN;
923 int addend = sizeof(struct iphdr) + 4;
924
925 tunnel = netdev_priv(dev);
926 iph = &tunnel->parms.iph;
927
928 /* Guess output device to choose reasonable mtu and hard_header_len */
929
930 if (iph->daddr) {
931 struct flowi fl = { .oif = tunnel->parms.link,
932 .nl_u = { .ip4_u =
933 { .daddr = iph->daddr,
934 .saddr = iph->saddr,
935 .tos = RT_TOS(iph->tos) } },
936 .proto = IPPROTO_GRE };
937 struct rtable *rt;
f206351a 938 if (!ip_route_output_key(&init_net, &rt, &fl)) {
ee34c1eb
MS
939 tdev = rt->u.dst.dev;
940 ip_rt_put(rt);
941 }
942 dev->flags |= IFF_POINTOPOINT;
943 }
944
945 if (!tdev && tunnel->parms.link)
946 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
947
948 if (tdev) {
949 hlen = tdev->hard_header_len;
950 mtu = tdev->mtu;
951 }
952 dev->iflink = tunnel->parms.link;
953
954 /* Precalculate GRE options length */
955 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
956 if (tunnel->parms.o_flags&GRE_CSUM)
957 addend += 4;
958 if (tunnel->parms.o_flags&GRE_KEY)
959 addend += 4;
960 if (tunnel->parms.o_flags&GRE_SEQ)
961 addend += 4;
962 }
963 dev->hard_header_len = hlen + addend;
964 dev->mtu = mtu - addend;
965 tunnel->hlen = addend;
966
967}
968
1da177e4
LT
969static int
970ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
971{
972 int err = 0;
973 struct ip_tunnel_parm p;
974 struct ip_tunnel *t;
f57e7d5a
PE
975 struct net *net = dev_net(dev);
976 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
977
978 switch (cmd) {
979 case SIOCGETTUNNEL:
980 t = NULL;
7daa0004 981 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
982 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
983 err = -EFAULT;
984 break;
985 }
f57e7d5a 986 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
987 }
988 if (t == NULL)
2941a486 989 t = netdev_priv(dev);
1da177e4
LT
990 memcpy(&p, &t->parms, sizeof(p));
991 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
992 err = -EFAULT;
993 break;
994
995 case SIOCADDTUNNEL:
996 case SIOCCHGTUNNEL:
997 err = -EPERM;
998 if (!capable(CAP_NET_ADMIN))
999 goto done;
1000
1001 err = -EFAULT;
1002 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1003 goto done;
1004
1005 err = -EINVAL;
1006 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1007 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1008 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1009 goto done;
1010 if (p.iph.ttl)
1011 p.iph.frag_off |= htons(IP_DF);
1012
1013 if (!(p.i_flags&GRE_KEY))
1014 p.i_key = 0;
1015 if (!(p.o_flags&GRE_KEY))
1016 p.o_key = 0;
1017
f57e7d5a 1018 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1019
7daa0004 1020 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1021 if (t != NULL) {
1022 if (t->dev != dev) {
1023 err = -EEXIST;
1024 break;
1025 }
1026 } else {
1027 unsigned nflags=0;
1028
2941a486 1029 t = netdev_priv(dev);
1da177e4 1030
f97c1e0c 1031 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1032 nflags = IFF_BROADCAST;
1033 else if (p.iph.daddr)
1034 nflags = IFF_POINTOPOINT;
1035
1036 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1037 err = -EINVAL;
1038 break;
1039 }
f57e7d5a 1040 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1041 t->parms.iph.saddr = p.iph.saddr;
1042 t->parms.iph.daddr = p.iph.daddr;
1043 t->parms.i_key = p.i_key;
1044 t->parms.o_key = p.o_key;
1045 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1046 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1047 ipgre_tunnel_link(ign, t);
1da177e4
LT
1048 netdev_state_change(dev);
1049 }
1050 }
1051
1052 if (t) {
1053 err = 0;
1054 if (cmd == SIOCCHGTUNNEL) {
1055 t->parms.iph.ttl = p.iph.ttl;
1056 t->parms.iph.tos = p.iph.tos;
1057 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1058 if (t->parms.link != p.link) {
1059 t->parms.link = p.link;
1060 ipgre_tunnel_bind_dev(dev);
1061 netdev_state_change(dev);
1062 }
1da177e4
LT
1063 }
1064 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1065 err = -EFAULT;
1066 } else
1067 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1068 break;
1069
1070 case SIOCDELTUNNEL:
1071 err = -EPERM;
1072 if (!capable(CAP_NET_ADMIN))
1073 goto done;
1074
7daa0004 1075 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1076 err = -EFAULT;
1077 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1078 goto done;
1079 err = -ENOENT;
f57e7d5a 1080 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1081 goto done;
1082 err = -EPERM;
7daa0004 1083 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1084 goto done;
1085 dev = t->dev;
1086 }
22f8cde5
SH
1087 unregister_netdevice(dev);
1088 err = 0;
1da177e4
LT
1089 break;
1090
1091 default:
1092 err = -EINVAL;
1093 }
1094
1095done:
1096 return err;
1097}
1098
1099static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1100{
2941a486 1101 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1da177e4
LT
1102}
1103
1104static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1105{
2941a486 1106 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
1107 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1108 return -EINVAL;
1109 dev->mtu = new_mtu;
1110 return 0;
1111}
1112
1da177e4
LT
1113/* Nice toy. Unfortunately, useless in real life :-)
1114 It allows to construct virtual multiprotocol broadcast "LAN"
1115 over the Internet, provided multicast routing is tuned.
1116
1117
1118 I have no idea was this bicycle invented before me,
1119 so that I had to set ARPHRD_IPGRE to a random value.
1120 I have an impression, that Cisco could make something similar,
1121 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1122
1da177e4
LT
1123 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1124 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1125
1126 ping -t 255 224.66.66.66
1127
1128 If nobody answers, mbone does not work.
1129
1130 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1131 ip addr add 10.66.66.<somewhat>/24 dev Universe
1132 ifconfig Universe up
1133 ifconfig Universe add fe80::<Your_real_addr>/10
1134 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1135 ftp 10.66.66.66
1136 ...
1137 ftp fec0:6666:6666::193.233.7.65
1138 ...
1139
1140 */
1141
3b04ddde
SH
1142static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1143 unsigned short type,
1144 const void *daddr, const void *saddr, unsigned len)
1da177e4 1145{
2941a486 1146 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1147 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1148 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1149
1150 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1151 p[0] = t->parms.o_flags;
1152 p[1] = htons(type);
1153
1154 /*
e905a9ed 1155 * Set the source hardware address.
1da177e4 1156 */
e905a9ed 1157
1da177e4
LT
1158 if (saddr)
1159 memcpy(&iph->saddr, saddr, 4);
1160
1161 if (daddr) {
1162 memcpy(&iph->daddr, daddr, 4);
1163 return t->hlen;
1164 }
f97c1e0c 1165 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1166 return t->hlen;
e905a9ed 1167
1da177e4
LT
1168 return -t->hlen;
1169}
1170
6a5f44d7
TT
1171static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1172{
1173 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1174 memcpy(haddr, &iph->saddr, 4);
1175 return 4;
1176}
1177
3b04ddde
SH
1178static const struct header_ops ipgre_header_ops = {
1179 .create = ipgre_header,
6a5f44d7 1180 .parse = ipgre_header_parse,
3b04ddde
SH
1181};
1182
6a5f44d7 1183#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1184static int ipgre_open(struct net_device *dev)
1185{
2941a486 1186 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1187
f97c1e0c 1188 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1189 struct flowi fl = { .oif = t->parms.link,
1190 .nl_u = { .ip4_u =
1191 { .daddr = t->parms.iph.daddr,
1192 .saddr = t->parms.iph.saddr,
1193 .tos = RT_TOS(t->parms.iph.tos) } },
1194 .proto = IPPROTO_GRE };
1195 struct rtable *rt;
f206351a 1196 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1197 return -EADDRNOTAVAIL;
1198 dev = rt->u.dst.dev;
1199 ip_rt_put(rt);
e5ed6399 1200 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1201 return -EADDRNOTAVAIL;
1202 t->mlink = dev->ifindex;
e5ed6399 1203 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1204 }
1205 return 0;
1206}
1207
1208static int ipgre_close(struct net_device *dev)
1209{
2941a486 1210 struct ip_tunnel *t = netdev_priv(dev);
f97c1e0c 1211 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1212 struct in_device *in_dev;
c346dca1 1213 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1214 if (in_dev) {
1215 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1216 in_dev_put(in_dev);
1217 }
1218 }
1219 return 0;
1220}
1221
1222#endif
1223
1224static void ipgre_tunnel_setup(struct net_device *dev)
1225{
1da177e4
LT
1226 dev->uninit = ipgre_tunnel_uninit;
1227 dev->destructor = free_netdev;
1228 dev->hard_start_xmit = ipgre_tunnel_xmit;
1229 dev->get_stats = ipgre_tunnel_get_stats;
1230 dev->do_ioctl = ipgre_tunnel_ioctl;
1231 dev->change_mtu = ipgre_tunnel_change_mtu;
1232
1233 dev->type = ARPHRD_IPGRE;
1234 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1235 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1236 dev->flags = IFF_NOARP;
1237 dev->iflink = 0;
1238 dev->addr_len = 4;
1239}
1240
1241static int ipgre_tunnel_init(struct net_device *dev)
1242{
1da177e4
LT
1243 struct ip_tunnel *tunnel;
1244 struct iphdr *iph;
1da177e4 1245
2941a486 1246 tunnel = netdev_priv(dev);
1da177e4
LT
1247 iph = &tunnel->parms.iph;
1248
1249 tunnel->dev = dev;
1250 strcpy(tunnel->parms.name, dev->name);
1251
1252 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1253 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1254
ee34c1eb 1255 ipgre_tunnel_bind_dev(dev);
1da177e4
LT
1256
1257 if (iph->daddr) {
1da177e4 1258#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1259 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1260 if (!iph->saddr)
1261 return -EINVAL;
1262 dev->flags = IFF_BROADCAST;
3b04ddde 1263 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1264 dev->open = ipgre_open;
1265 dev->stop = ipgre_close;
1266 }
1267#endif
ee34c1eb 1268 } else
6a5f44d7 1269 dev->header_ops = &ipgre_header_ops;
1da177e4 1270
1da177e4
LT
1271 return 0;
1272}
1273
7daa0004 1274static int ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1275{
2941a486 1276 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1277 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1278 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1279
1280 tunnel->dev = dev;
1281 strcpy(tunnel->parms.name, dev->name);
1282
1283 iph->version = 4;
1284 iph->protocol = IPPROTO_GRE;
1285 iph->ihl = 5;
1286 tunnel->hlen = sizeof(struct iphdr) + 4;
1287
1288 dev_hold(dev);
eb8ce741 1289 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1290 return 0;
1291}
1292
1293
1294static struct net_protocol ipgre_protocol = {
1295 .handler = ipgre_rcv,
1296 .err_handler = ipgre_err,
1297};
1298
eb8ce741
PE
1299static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1300{
1301 int prio;
1302
1303 for (prio = 0; prio < 4; prio++) {
1304 int h;
1305 for (h = 0; h < HASH_SIZE; h++) {
1306 struct ip_tunnel *t;
1307 while ((t = ign->tunnels[prio][h]) != NULL)
1308 unregister_netdevice(t->dev);
1309 }
1310 }
1311}
1312
59a4c759
PE
1313static int ipgre_init_net(struct net *net)
1314{
1315 int err;
1316 struct ipgre_net *ign;
1317
1318 err = -ENOMEM;
eb8ce741 1319 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
59a4c759
PE
1320 if (ign == NULL)
1321 goto err_alloc;
1322
1323 err = net_assign_generic(net, ipgre_net_id, ign);
1324 if (err < 0)
1325 goto err_assign;
1326
7daa0004
PE
1327 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1328 ipgre_tunnel_setup);
1329 if (!ign->fb_tunnel_dev) {
1330 err = -ENOMEM;
1331 goto err_alloc_dev;
1332 }
1333
1334 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1335 dev_net_set(ign->fb_tunnel_dev, net);
1336
1337 if ((err = register_netdev(ign->fb_tunnel_dev)))
1338 goto err_reg_dev;
1339
59a4c759
PE
1340 return 0;
1341
7daa0004
PE
1342err_reg_dev:
1343 free_netdev(ign->fb_tunnel_dev);
1344err_alloc_dev:
1345 /* nothing */
59a4c759
PE
1346err_assign:
1347 kfree(ign);
1348err_alloc:
1349 return err;
1350}
1351
1352static void ipgre_exit_net(struct net *net)
1353{
1354 struct ipgre_net *ign;
1355
1356 ign = net_generic(net, ipgre_net_id);
7daa0004 1357 rtnl_lock();
eb8ce741 1358 ipgre_destroy_tunnels(ign);
7daa0004 1359 rtnl_unlock();
59a4c759
PE
1360 kfree(ign);
1361}
1362
1363static struct pernet_operations ipgre_net_ops = {
1364 .init = ipgre_init_net,
1365 .exit = ipgre_exit_net,
1366};
1da177e4
LT
1367
1368/*
1369 * And now the modules code and kernel interface.
1370 */
1371
1372static int __init ipgre_init(void)
1373{
1374 int err;
1375
1376 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1377
1378 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1379 printk(KERN_INFO "ipgre init: can't add protocol\n");
1380 return -EAGAIN;
1381 }
1382
59a4c759
PE
1383 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1384 if (err < 0)
7daa0004
PE
1385 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1386
1da177e4 1387 return err;
1da177e4
LT
1388}
1389
db44575f 1390static void __exit ipgre_fini(void)
1da177e4
LT
1391{
1392 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1393 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1394
59a4c759 1395 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1396}
1397
1398module_init(ipgre_init);
1399module_exit(ipgre_fini);
1400MODULE_LICENSE("GPL");