]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
[GRE]: Make the fallback tunnel device per-net.
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
46f25dff 30#include <linux/if_ether.h>
1da177e4
LT
31
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
59a4c759
PE
42#include <net/net_namespace.h>
43#include <net/netns/generic.h>
1da177e4
LT
44
45#ifdef CONFIG_IPV6
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#endif
50
51/*
52 Problems & solutions
53 --------------------
54
55 1. The most important issue is detecting local dead loops.
56 They would cause complete host lockup in transmit, which
57 would be "resolved" by stack overflow or, if queueing is enabled,
58 with infinite looping in net_bh.
59
60 We cannot track such dead loops during route installation,
61 it is infeasible task. The most general solutions would be
62 to keep skb->encapsulation counter (sort of local ttl),
63 and silently drop packet when it expires. It is the best
64 solution, but it supposes maintaing new variable in ALL
65 skb, even if no tunneling is used.
66
e905a9ed 67 Current solution: t->recursion lock breaks dead loops. It looks
1da177e4
LT
68 like dev->tbusy flag, but I preferred new variable, because
69 the semantics is different. One day, when hard_start_xmit
70 will be multithreaded we will have to use skb->encapsulation.
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
122
123/* Fallback tunnel: no source, no destination, no key, no options */
124
125static int ipgre_fb_tunnel_init(struct net_device *dev);
126
59a4c759
PE
127static int ipgre_net_id;
128struct ipgre_net {
7daa0004 129 struct net_device *fb_tunnel_dev;
59a4c759
PE
130};
131
1da177e4
LT
132/* Tunnel hash table */
133
134/*
135 4 hash tables:
136
137 3: (remote,local)
138 2: (remote,*)
139 1: (*,local)
140 0: (*,*)
141
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
145
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
148 */
149
150#define HASH_SIZE 16
d5a0a1e3 151#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4
LT
152
153static struct ip_tunnel *tunnels[4][HASH_SIZE];
154
155#define tunnels_r_l (tunnels[3])
156#define tunnels_r (tunnels[2])
157#define tunnels_l (tunnels[1])
158#define tunnels_wc (tunnels[0])
159
160static DEFINE_RWLOCK(ipgre_lock);
161
162/* Given src, dst and key, find appropriate for input tunnel. */
163
f57e7d5a
PE
164static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
165 __be32 remote, __be32 local, __be32 key)
1da177e4
LT
166{
167 unsigned h0 = HASH(remote);
168 unsigned h1 = HASH(key);
169 struct ip_tunnel *t;
7daa0004 170 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
171
172 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
173 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
174 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
175 return t;
176 }
177 }
178 for (t = tunnels_r[h0^h1]; t; t = t->next) {
179 if (remote == t->parms.iph.daddr) {
180 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
181 return t;
182 }
183 }
184 for (t = tunnels_l[h1]; t; t = t->next) {
185 if (local == t->parms.iph.saddr ||
f97c1e0c
JP
186 (local == t->parms.iph.daddr &&
187 ipv4_is_multicast(local))) {
1da177e4
LT
188 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
189 return t;
190 }
191 }
192 for (t = tunnels_wc[h1]; t; t = t->next) {
193 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
194 return t;
195 }
196
7daa0004
PE
197 if (ign->fb_tunnel_dev->flags&IFF_UP)
198 return netdev_priv(ign->fb_tunnel_dev);
1da177e4
LT
199 return NULL;
200}
201
f57e7d5a
PE
202static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
203 struct ip_tunnel_parm *parms)
1da177e4 204{
5056a1ef
YH
205 __be32 remote = parms->iph.daddr;
206 __be32 local = parms->iph.saddr;
207 __be32 key = parms->i_key;
1da177e4
LT
208 unsigned h = HASH(key);
209 int prio = 0;
210
211 if (local)
212 prio |= 1;
f97c1e0c 213 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
214 prio |= 2;
215 h ^= HASH(remote);
216 }
217
218 return &tunnels[prio][h];
219}
220
f57e7d5a
PE
221static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
222 struct ip_tunnel *t)
5056a1ef 223{
f57e7d5a 224 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
225}
226
f57e7d5a 227static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 228{
f57e7d5a 229 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4
LT
230
231 t->next = *tp;
232 write_lock_bh(&ipgre_lock);
233 *tp = t;
234 write_unlock_bh(&ipgre_lock);
235}
236
f57e7d5a 237static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
238{
239 struct ip_tunnel **tp;
240
f57e7d5a 241 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4
LT
242 if (t == *tp) {
243 write_lock_bh(&ipgre_lock);
244 *tp = t->next;
245 write_unlock_bh(&ipgre_lock);
246 break;
247 }
248 }
249}
250
f57e7d5a
PE
251static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
252 struct ip_tunnel_parm *parms, int create)
1da177e4 253{
d5a0a1e3
AV
254 __be32 remote = parms->iph.daddr;
255 __be32 local = parms->iph.saddr;
256 __be32 key = parms->i_key;
1da177e4
LT
257 struct ip_tunnel *t, **tp, *nt;
258 struct net_device *dev;
1da177e4 259 char name[IFNAMSIZ];
f57e7d5a 260 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 261
f57e7d5a 262 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
1da177e4
LT
263 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
264 if (key == t->parms.i_key)
265 return t;
266 }
267 }
268 if (!create)
269 return NULL;
270
271 if (parms->name[0])
272 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
273 else
274 sprintf(name, "gre%%d");
1da177e4
LT
275
276 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
277 if (!dev)
278 return NULL;
279
b37d428b
PE
280 if (strchr(name, '%')) {
281 if (dev_alloc_name(dev, name) < 0)
282 goto failed_free;
283 }
284
1da177e4 285 dev->init = ipgre_tunnel_init;
2941a486 286 nt = netdev_priv(dev);
1da177e4
LT
287 nt->parms = *parms;
288
b37d428b
PE
289 if (register_netdevice(dev) < 0)
290 goto failed_free;
1da177e4 291
1da177e4 292 dev_hold(dev);
f57e7d5a 293 ipgre_tunnel_link(ign, nt);
1da177e4
LT
294 return nt;
295
b37d428b
PE
296failed_free:
297 free_netdev(dev);
1da177e4
LT
298 return NULL;
299}
300
301static void ipgre_tunnel_uninit(struct net_device *dev)
302{
f57e7d5a
PE
303 struct net *net = dev_net(dev);
304 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
305
306 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
307 dev_put(dev);
308}
309
310
311static void ipgre_err(struct sk_buff *skb, u32 info)
312{
313#ifndef I_WISH_WORLD_WERE_PERFECT
314
315/* It is not :-( All the routers (except for Linux) return only
316 8 bytes of packet payload. It means, that precise relaying of
317 ICMP in the real Internet is absolutely infeasible.
318
319 Moreover, Cisco "wise men" put GRE key to the third word
320 in GRE header. It makes impossible maintaining even soft state for keyed
321 GRE tunnels with enabled checksum. Tell them "thank you".
322
323 Well, I wonder, rfc1812 was written by Cisco employee,
324 what the hell these idiots break standrads established
325 by themself???
326 */
327
328 struct iphdr *iph = (struct iphdr*)skb->data;
d5a0a1e3 329 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 330 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
331 const int type = icmp_hdr(skb)->type;
332 const int code = icmp_hdr(skb)->code;
1da177e4 333 struct ip_tunnel *t;
d5a0a1e3 334 __be16 flags;
1da177e4
LT
335
336 flags = p[0];
337 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
338 if (flags&(GRE_VERSION|GRE_ROUTING))
339 return;
340 if (flags&GRE_KEY) {
341 grehlen += 4;
342 if (flags&GRE_CSUM)
343 grehlen += 4;
344 }
345 }
346
347 /* If only 8 bytes returned, keyed message will be dropped here */
348 if (skb_headlen(skb) < grehlen)
349 return;
350
351 switch (type) {
352 default:
353 case ICMP_PARAMETERPROB:
354 return;
355
356 case ICMP_DEST_UNREACH:
357 switch (code) {
358 case ICMP_SR_FAILED:
359 case ICMP_PORT_UNREACH:
360 /* Impossible event. */
361 return;
362 case ICMP_FRAG_NEEDED:
363 /* Soft state for pmtu is maintained by IP core. */
364 return;
365 default:
366 /* All others are translated to HOST_UNREACH.
367 rfc2003 contains "deep thoughts" about NET_UNREACH,
368 I believe they are just ether pollution. --ANK
369 */
370 break;
371 }
372 break;
373 case ICMP_TIME_EXCEEDED:
374 if (code != ICMP_EXC_TTL)
375 return;
376 break;
377 }
378
379 read_lock(&ipgre_lock);
3b4667f3 380 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
f57e7d5a
PE
381 (flags&GRE_KEY) ?
382 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
f97c1e0c
JP
383 if (t == NULL || t->parms.iph.daddr == 0 ||
384 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
385 goto out;
386
387 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
388 goto out;
389
390 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
391 t->err_count++;
392 else
393 t->err_count = 1;
394 t->err_time = jiffies;
395out:
396 read_unlock(&ipgre_lock);
397 return;
398#else
399 struct iphdr *iph = (struct iphdr*)dp;
400 struct iphdr *eiph;
d5a0a1e3 401 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
88c7664f
ACM
402 const int type = icmp_hdr(skb)->type;
403 const int code = icmp_hdr(skb)->code;
1da177e4
LT
404 int rel_type = 0;
405 int rel_code = 0;
c55e2f49
AV
406 __be32 rel_info = 0;
407 __u32 n = 0;
d5a0a1e3 408 __be16 flags;
1da177e4
LT
409 int grehlen = (iph->ihl<<2) + 4;
410 struct sk_buff *skb2;
411 struct flowi fl;
412 struct rtable *rt;
413
414 if (p[1] != htons(ETH_P_IP))
415 return;
416
417 flags = p[0];
418 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
419 if (flags&(GRE_VERSION|GRE_ROUTING))
420 return;
421 if (flags&GRE_CSUM)
422 grehlen += 4;
423 if (flags&GRE_KEY)
424 grehlen += 4;
425 if (flags&GRE_SEQ)
426 grehlen += 4;
427 }
428 if (len < grehlen + sizeof(struct iphdr))
429 return;
430 eiph = (struct iphdr*)(dp + grehlen);
431
432 switch (type) {
433 default:
434 return;
435 case ICMP_PARAMETERPROB:
88c7664f 436 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
c55e2f49 437 if (n < (iph->ihl<<2))
1da177e4
LT
438 return;
439
440 /* So... This guy found something strange INSIDE encapsulated
441 packet. Well, he is fool, but what can we do ?
442 */
443 rel_type = ICMP_PARAMETERPROB;
c55e2f49
AV
444 n -= grehlen;
445 rel_info = htonl(n << 24);
1da177e4
LT
446 break;
447
448 case ICMP_DEST_UNREACH:
449 switch (code) {
450 case ICMP_SR_FAILED:
451 case ICMP_PORT_UNREACH:
452 /* Impossible event. */
453 return;
454 case ICMP_FRAG_NEEDED:
455 /* And it is the only really necessary thing :-) */
88c7664f 456 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
c55e2f49 457 if (n < grehlen+68)
1da177e4 458 return;
c55e2f49 459 n -= grehlen;
1da177e4 460 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
c55e2f49 461 if (n > ntohs(eiph->tot_len))
1da177e4 462 return;
c55e2f49 463 rel_info = htonl(n);
1da177e4
LT
464 break;
465 default:
466 /* All others are translated to HOST_UNREACH.
467 rfc2003 contains "deep thoughts" about NET_UNREACH,
468 I believe, it is just ether pollution. --ANK
469 */
470 rel_type = ICMP_DEST_UNREACH;
471 rel_code = ICMP_HOST_UNREACH;
472 break;
473 }
474 break;
475 case ICMP_TIME_EXCEEDED:
476 if (code != ICMP_EXC_TTL)
477 return;
478 break;
479 }
480
481 /* Prepare fake skb to feed it to icmp_send */
482 skb2 = skb_clone(skb, GFP_ATOMIC);
483 if (skb2 == NULL)
484 return;
485 dst_release(skb2->dst);
486 skb2->dst = NULL;
487 skb_pull(skb2, skb->data - (u8*)eiph);
c1d2bbe1 488 skb_reset_network_header(skb2);
1da177e4
LT
489
490 /* Try to guess incoming interface */
491 memset(&fl, 0, sizeof(fl));
492 fl.fl4_dst = eiph->saddr;
493 fl.fl4_tos = RT_TOS(eiph->tos);
494 fl.proto = IPPROTO_GRE;
f206351a 495 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
496 kfree_skb(skb2);
497 return;
498 }
499 skb2->dev = rt->u.dst.dev;
500
501 /* route "incoming" packet */
502 if (rt->rt_flags&RTCF_LOCAL) {
503 ip_rt_put(rt);
504 rt = NULL;
505 fl.fl4_dst = eiph->daddr;
506 fl.fl4_src = eiph->saddr;
507 fl.fl4_tos = eiph->tos;
f206351a 508 if (ip_route_output_key(&init_net, &rt, &fl) ||
1da177e4
LT
509 rt->u.dst.dev->type != ARPHRD_IPGRE) {
510 ip_rt_put(rt);
511 kfree_skb(skb2);
512 return;
513 }
514 } else {
515 ip_rt_put(rt);
516 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
517 skb2->dst->dev->type != ARPHRD_IPGRE) {
518 kfree_skb(skb2);
519 return;
520 }
521 }
522
523 /* change mtu on this route */
524 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
c55e2f49 525 if (n > dst_mtu(skb2->dst)) {
1da177e4
LT
526 kfree_skb(skb2);
527 return;
528 }
c55e2f49 529 skb2->dst->ops->update_pmtu(skb2->dst, n);
1da177e4 530 } else if (type == ICMP_TIME_EXCEEDED) {
2941a486 531 struct ip_tunnel *t = netdev_priv(skb2->dev);
1da177e4
LT
532 if (t->parms.iph.ttl) {
533 rel_type = ICMP_DEST_UNREACH;
534 rel_code = ICMP_HOST_UNREACH;
535 }
536 }
537
538 icmp_send(skb2, rel_type, rel_code, rel_info);
539 kfree_skb(skb2);
540#endif
541}
542
543static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
544{
545 if (INET_ECN_is_ce(iph->tos)) {
546 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 547 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 548 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 549 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
550 }
551 }
552}
553
554static inline u8
555ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
556{
557 u8 inner = 0;
558 if (skb->protocol == htons(ETH_P_IP))
559 inner = old_iph->tos;
560 else if (skb->protocol == htons(ETH_P_IPV6))
561 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
562 return INET_ECN_encapsulate(tos, inner);
563}
564
565static int ipgre_rcv(struct sk_buff *skb)
566{
567 struct iphdr *iph;
568 u8 *h;
d5a0a1e3 569 __be16 flags;
d3bc23e7 570 __sum16 csum = 0;
d5a0a1e3 571 __be32 key = 0;
1da177e4
LT
572 u32 seqno = 0;
573 struct ip_tunnel *tunnel;
574 int offset = 4;
575
576 if (!pskb_may_pull(skb, 16))
577 goto drop_nolock;
578
eddc9ec5 579 iph = ip_hdr(skb);
1da177e4 580 h = skb->data;
d5a0a1e3 581 flags = *(__be16*)h;
1da177e4
LT
582
583 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
584 /* - Version must be 0.
585 - We do not support routing headers.
586 */
587 if (flags&(GRE_VERSION|GRE_ROUTING))
588 goto drop_nolock;
589
590 if (flags&GRE_CSUM) {
fb286bb2 591 switch (skb->ip_summed) {
84fa7933 592 case CHECKSUM_COMPLETE:
d3bc23e7 593 csum = csum_fold(skb->csum);
fb286bb2
HX
594 if (!csum)
595 break;
596 /* fall through */
597 case CHECKSUM_NONE:
598 skb->csum = 0;
599 csum = __skb_checksum_complete(skb);
84fa7933 600 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
601 }
602 offset += 4;
603 }
604 if (flags&GRE_KEY) {
d5a0a1e3 605 key = *(__be32*)(h + offset);
1da177e4
LT
606 offset += 4;
607 }
608 if (flags&GRE_SEQ) {
d5a0a1e3 609 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
610 offset += 4;
611 }
612 }
613
614 read_lock(&ipgre_lock);
3b4667f3 615 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
f57e7d5a 616 iph->saddr, iph->daddr, key)) != NULL) {
1da177e4
LT
617 secpath_reset(skb);
618
d5a0a1e3 619 skb->protocol = *(__be16*)(h + 2);
1da177e4
LT
620 /* WCCP version 1 and 2 protocol decoding.
621 * - Change protocol to IP
622 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
623 */
624 if (flags == 0 &&
496c98df
YH
625 skb->protocol == htons(ETH_P_WCCP)) {
626 skb->protocol = htons(ETH_P_IP);
e905a9ed 627 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
628 offset += 4;
629 }
630
1d069167 631 skb->mac_header = skb->network_header;
4209fb60
ACM
632 __pskb_pull(skb, offset);
633 skb_reset_network_header(skb);
9c70220b 634 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
635 skb->pkt_type = PACKET_HOST;
636#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 637 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 638 /* Looped back packet, drop it! */
ee6b9673 639 if (skb->rtable->fl.iif == 0)
1da177e4
LT
640 goto drop;
641 tunnel->stat.multicast++;
642 skb->pkt_type = PACKET_BROADCAST;
643 }
644#endif
645
646 if (((flags&GRE_CSUM) && csum) ||
647 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
648 tunnel->stat.rx_crc_errors++;
649 tunnel->stat.rx_errors++;
650 goto drop;
651 }
652 if (tunnel->parms.i_flags&GRE_SEQ) {
653 if (!(flags&GRE_SEQ) ||
654 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
655 tunnel->stat.rx_fifo_errors++;
656 tunnel->stat.rx_errors++;
657 goto drop;
658 }
659 tunnel->i_seqno = seqno + 1;
660 }
661 tunnel->stat.rx_packets++;
662 tunnel->stat.rx_bytes += skb->len;
663 skb->dev = tunnel->dev;
664 dst_release(skb->dst);
665 skb->dst = NULL;
666 nf_reset(skb);
667 ipgre_ecn_decapsulate(iph, skb);
668 netif_rx(skb);
669 read_unlock(&ipgre_lock);
670 return(0);
671 }
45af08be 672 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
673
674drop:
675 read_unlock(&ipgre_lock);
676drop_nolock:
677 kfree_skb(skb);
678 return(0);
679}
680
681static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
682{
2941a486 683 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 684 struct net_device_stats *stats = &tunnel->stat;
eddc9ec5 685 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
686 struct iphdr *tiph;
687 u8 tos;
d5a0a1e3 688 __be16 df;
1da177e4
LT
689 struct rtable *rt; /* Route to the other host */
690 struct net_device *tdev; /* Device to other host */
691 struct iphdr *iph; /* Our new IP header */
c2636b4d 692 unsigned int max_headroom; /* The extra header space needed */
1da177e4 693 int gre_hlen;
d5a0a1e3 694 __be32 dst;
1da177e4
LT
695 int mtu;
696
697 if (tunnel->recursion++) {
698 tunnel->stat.collisions++;
699 goto tx_error;
700 }
701
3b04ddde 702 if (dev->header_ops) {
1da177e4
LT
703 gre_hlen = 0;
704 tiph = (struct iphdr*)skb->data;
705 } else {
706 gre_hlen = tunnel->hlen;
707 tiph = &tunnel->parms.iph;
708 }
709
710 if ((dst = tiph->daddr) == 0) {
711 /* NBMA tunnel */
712
713 if (skb->dst == NULL) {
714 tunnel->stat.tx_fifo_errors++;
715 goto tx_error;
716 }
717
718 if (skb->protocol == htons(ETH_P_IP)) {
ee6b9673 719 rt = skb->rtable;
1da177e4
LT
720 if ((dst = rt->rt_gateway) == 0)
721 goto tx_error_icmp;
722 }
723#ifdef CONFIG_IPV6
724 else if (skb->protocol == htons(ETH_P_IPV6)) {
725 struct in6_addr *addr6;
726 int addr_type;
727 struct neighbour *neigh = skb->dst->neighbour;
728
729 if (neigh == NULL)
730 goto tx_error;
731
732 addr6 = (struct in6_addr*)&neigh->primary_key;
733 addr_type = ipv6_addr_type(addr6);
734
735 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 736 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
737 addr_type = ipv6_addr_type(addr6);
738 }
739
740 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
741 goto tx_error_icmp;
742
743 dst = addr6->s6_addr32[3];
744 }
745#endif
746 else
747 goto tx_error;
748 }
749
750 tos = tiph->tos;
751 if (tos&1) {
752 if (skb->protocol == htons(ETH_P_IP))
753 tos = old_iph->tos;
754 tos &= ~1;
755 }
756
757 {
758 struct flowi fl = { .oif = tunnel->parms.link,
759 .nl_u = { .ip4_u =
760 { .daddr = dst,
761 .saddr = tiph->saddr,
762 .tos = RT_TOS(tos) } },
763 .proto = IPPROTO_GRE };
f206351a 764 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
765 tunnel->stat.tx_carrier_errors++;
766 goto tx_error;
767 }
768 }
769 tdev = rt->u.dst.dev;
770
771 if (tdev == dev) {
772 ip_rt_put(rt);
773 tunnel->stat.collisions++;
774 goto tx_error;
775 }
776
777 df = tiph->frag_off;
778 if (df)
779 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
780 else
781 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
782
783 if (skb->dst)
784 skb->dst->ops->update_pmtu(skb->dst, mtu);
785
786 if (skb->protocol == htons(ETH_P_IP)) {
787 df |= (old_iph->frag_off&htons(IP_DF));
788
789 if ((old_iph->frag_off&htons(IP_DF)) &&
790 mtu < ntohs(old_iph->tot_len)) {
791 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
792 ip_rt_put(rt);
793 goto tx_error;
794 }
795 }
796#ifdef CONFIG_IPV6
797 else if (skb->protocol == htons(ETH_P_IPV6)) {
798 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
799
800 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
801 if ((tunnel->parms.iph.daddr &&
802 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
803 rt6->rt6i_dst.plen == 128) {
804 rt6->rt6i_flags |= RTF_MODIFIED;
805 skb->dst->metrics[RTAX_MTU-1] = mtu;
806 }
807 }
808
809 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
810 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
811 ip_rt_put(rt);
812 goto tx_error;
813 }
814 }
815#endif
816
817 if (tunnel->err_count > 0) {
818 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
819 tunnel->err_count--;
820
821 dst_link_failure(skb);
822 } else
823 tunnel->err_count = 0;
824 }
825
826 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
827
cfbba49d
PM
828 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
829 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
830 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
831 if (!new_skb) {
832 ip_rt_put(rt);
e905a9ed 833 stats->tx_dropped++;
1da177e4
LT
834 dev_kfree_skb(skb);
835 tunnel->recursion--;
836 return 0;
837 }
838 if (skb->sk)
839 skb_set_owner_w(new_skb, skb->sk);
840 dev_kfree_skb(skb);
841 skb = new_skb;
eddc9ec5 842 old_iph = ip_hdr(skb);
1da177e4
LT
843 }
844
b0e380b1 845 skb->transport_header = skb->network_header;
e2d1bca7
ACM
846 skb_push(skb, gre_hlen);
847 skb_reset_network_header(skb);
1da177e4 848 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
849 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
850 IPSKB_REROUTED);
1da177e4
LT
851 dst_release(skb->dst);
852 skb->dst = &rt->u.dst;
853
854 /*
855 * Push down and install the IPIP header.
856 */
857
eddc9ec5 858 iph = ip_hdr(skb);
1da177e4
LT
859 iph->version = 4;
860 iph->ihl = sizeof(struct iphdr) >> 2;
861 iph->frag_off = df;
862 iph->protocol = IPPROTO_GRE;
863 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
864 iph->daddr = rt->rt_dst;
865 iph->saddr = rt->rt_src;
866
867 if ((iph->ttl = tiph->ttl) == 0) {
868 if (skb->protocol == htons(ETH_P_IP))
869 iph->ttl = old_iph->ttl;
870#ifdef CONFIG_IPV6
871 else if (skb->protocol == htons(ETH_P_IPV6))
872 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
873#endif
874 else
875 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
876 }
877
d5a0a1e3
AV
878 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
879 ((__be16*)(iph+1))[1] = skb->protocol;
1da177e4
LT
880
881 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 882 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
883
884 if (tunnel->parms.o_flags&GRE_SEQ) {
885 ++tunnel->o_seqno;
886 *ptr = htonl(tunnel->o_seqno);
887 ptr--;
888 }
889 if (tunnel->parms.o_flags&GRE_KEY) {
890 *ptr = tunnel->parms.o_key;
891 ptr--;
892 }
893 if (tunnel->parms.o_flags&GRE_CSUM) {
894 *ptr = 0;
5f92a738 895 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
896 }
897 }
898
899 nf_reset(skb);
900
901 IPTUNNEL_XMIT();
902 tunnel->recursion--;
903 return 0;
904
905tx_error_icmp:
906 dst_link_failure(skb);
907
908tx_error:
909 stats->tx_errors++;
910 dev_kfree_skb(skb);
911 tunnel->recursion--;
912 return 0;
913}
914
ee34c1eb
MS
915static void ipgre_tunnel_bind_dev(struct net_device *dev)
916{
917 struct net_device *tdev = NULL;
918 struct ip_tunnel *tunnel;
919 struct iphdr *iph;
920 int hlen = LL_MAX_HEADER;
921 int mtu = ETH_DATA_LEN;
922 int addend = sizeof(struct iphdr) + 4;
923
924 tunnel = netdev_priv(dev);
925 iph = &tunnel->parms.iph;
926
927 /* Guess output device to choose reasonable mtu and hard_header_len */
928
929 if (iph->daddr) {
930 struct flowi fl = { .oif = tunnel->parms.link,
931 .nl_u = { .ip4_u =
932 { .daddr = iph->daddr,
933 .saddr = iph->saddr,
934 .tos = RT_TOS(iph->tos) } },
935 .proto = IPPROTO_GRE };
936 struct rtable *rt;
f206351a 937 if (!ip_route_output_key(&init_net, &rt, &fl)) {
ee34c1eb
MS
938 tdev = rt->u.dst.dev;
939 ip_rt_put(rt);
940 }
941 dev->flags |= IFF_POINTOPOINT;
942 }
943
944 if (!tdev && tunnel->parms.link)
945 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
946
947 if (tdev) {
948 hlen = tdev->hard_header_len;
949 mtu = tdev->mtu;
950 }
951 dev->iflink = tunnel->parms.link;
952
953 /* Precalculate GRE options length */
954 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
955 if (tunnel->parms.o_flags&GRE_CSUM)
956 addend += 4;
957 if (tunnel->parms.o_flags&GRE_KEY)
958 addend += 4;
959 if (tunnel->parms.o_flags&GRE_SEQ)
960 addend += 4;
961 }
962 dev->hard_header_len = hlen + addend;
963 dev->mtu = mtu - addend;
964 tunnel->hlen = addend;
965
966}
967
1da177e4
LT
968static int
969ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
970{
971 int err = 0;
972 struct ip_tunnel_parm p;
973 struct ip_tunnel *t;
f57e7d5a
PE
974 struct net *net = dev_net(dev);
975 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
976
977 switch (cmd) {
978 case SIOCGETTUNNEL:
979 t = NULL;
7daa0004 980 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
981 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
982 err = -EFAULT;
983 break;
984 }
f57e7d5a 985 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
986 }
987 if (t == NULL)
2941a486 988 t = netdev_priv(dev);
1da177e4
LT
989 memcpy(&p, &t->parms, sizeof(p));
990 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
991 err = -EFAULT;
992 break;
993
994 case SIOCADDTUNNEL:
995 case SIOCCHGTUNNEL:
996 err = -EPERM;
997 if (!capable(CAP_NET_ADMIN))
998 goto done;
999
1000 err = -EFAULT;
1001 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1002 goto done;
1003
1004 err = -EINVAL;
1005 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1006 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1007 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1008 goto done;
1009 if (p.iph.ttl)
1010 p.iph.frag_off |= htons(IP_DF);
1011
1012 if (!(p.i_flags&GRE_KEY))
1013 p.i_key = 0;
1014 if (!(p.o_flags&GRE_KEY))
1015 p.o_key = 0;
1016
f57e7d5a 1017 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1018
7daa0004 1019 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1020 if (t != NULL) {
1021 if (t->dev != dev) {
1022 err = -EEXIST;
1023 break;
1024 }
1025 } else {
1026 unsigned nflags=0;
1027
2941a486 1028 t = netdev_priv(dev);
1da177e4 1029
f97c1e0c 1030 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1031 nflags = IFF_BROADCAST;
1032 else if (p.iph.daddr)
1033 nflags = IFF_POINTOPOINT;
1034
1035 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1036 err = -EINVAL;
1037 break;
1038 }
f57e7d5a 1039 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1040 t->parms.iph.saddr = p.iph.saddr;
1041 t->parms.iph.daddr = p.iph.daddr;
1042 t->parms.i_key = p.i_key;
1043 t->parms.o_key = p.o_key;
1044 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1045 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1046 ipgre_tunnel_link(ign, t);
1da177e4
LT
1047 netdev_state_change(dev);
1048 }
1049 }
1050
1051 if (t) {
1052 err = 0;
1053 if (cmd == SIOCCHGTUNNEL) {
1054 t->parms.iph.ttl = p.iph.ttl;
1055 t->parms.iph.tos = p.iph.tos;
1056 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1057 if (t->parms.link != p.link) {
1058 t->parms.link = p.link;
1059 ipgre_tunnel_bind_dev(dev);
1060 netdev_state_change(dev);
1061 }
1da177e4
LT
1062 }
1063 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1064 err = -EFAULT;
1065 } else
1066 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1067 break;
1068
1069 case SIOCDELTUNNEL:
1070 err = -EPERM;
1071 if (!capable(CAP_NET_ADMIN))
1072 goto done;
1073
7daa0004 1074 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1075 err = -EFAULT;
1076 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1077 goto done;
1078 err = -ENOENT;
f57e7d5a 1079 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1080 goto done;
1081 err = -EPERM;
7daa0004 1082 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1083 goto done;
1084 dev = t->dev;
1085 }
22f8cde5
SH
1086 unregister_netdevice(dev);
1087 err = 0;
1da177e4
LT
1088 break;
1089
1090 default:
1091 err = -EINVAL;
1092 }
1093
1094done:
1095 return err;
1096}
1097
1098static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1099{
2941a486 1100 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1da177e4
LT
1101}
1102
1103static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1104{
2941a486 1105 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
1106 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1107 return -EINVAL;
1108 dev->mtu = new_mtu;
1109 return 0;
1110}
1111
1da177e4
LT
1112/* Nice toy. Unfortunately, useless in real life :-)
1113 It allows to construct virtual multiprotocol broadcast "LAN"
1114 over the Internet, provided multicast routing is tuned.
1115
1116
1117 I have no idea was this bicycle invented before me,
1118 so that I had to set ARPHRD_IPGRE to a random value.
1119 I have an impression, that Cisco could make something similar,
1120 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1121
1da177e4
LT
1122 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1123 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1124
1125 ping -t 255 224.66.66.66
1126
1127 If nobody answers, mbone does not work.
1128
1129 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1130 ip addr add 10.66.66.<somewhat>/24 dev Universe
1131 ifconfig Universe up
1132 ifconfig Universe add fe80::<Your_real_addr>/10
1133 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1134 ftp 10.66.66.66
1135 ...
1136 ftp fec0:6666:6666::193.233.7.65
1137 ...
1138
1139 */
1140
3b04ddde
SH
1141static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1142 unsigned short type,
1143 const void *daddr, const void *saddr, unsigned len)
1da177e4 1144{
2941a486 1145 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1146 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1147 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1148
1149 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1150 p[0] = t->parms.o_flags;
1151 p[1] = htons(type);
1152
1153 /*
e905a9ed 1154 * Set the source hardware address.
1da177e4 1155 */
e905a9ed 1156
1da177e4
LT
1157 if (saddr)
1158 memcpy(&iph->saddr, saddr, 4);
1159
1160 if (daddr) {
1161 memcpy(&iph->daddr, daddr, 4);
1162 return t->hlen;
1163 }
f97c1e0c 1164 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1165 return t->hlen;
e905a9ed 1166
1da177e4
LT
1167 return -t->hlen;
1168}
1169
6a5f44d7
TT
1170static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1171{
1172 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1173 memcpy(haddr, &iph->saddr, 4);
1174 return 4;
1175}
1176
3b04ddde
SH
1177static const struct header_ops ipgre_header_ops = {
1178 .create = ipgre_header,
6a5f44d7 1179 .parse = ipgre_header_parse,
3b04ddde
SH
1180};
1181
6a5f44d7 1182#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1183static int ipgre_open(struct net_device *dev)
1184{
2941a486 1185 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1186
f97c1e0c 1187 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1188 struct flowi fl = { .oif = t->parms.link,
1189 .nl_u = { .ip4_u =
1190 { .daddr = t->parms.iph.daddr,
1191 .saddr = t->parms.iph.saddr,
1192 .tos = RT_TOS(t->parms.iph.tos) } },
1193 .proto = IPPROTO_GRE };
1194 struct rtable *rt;
f206351a 1195 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1196 return -EADDRNOTAVAIL;
1197 dev = rt->u.dst.dev;
1198 ip_rt_put(rt);
e5ed6399 1199 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1200 return -EADDRNOTAVAIL;
1201 t->mlink = dev->ifindex;
e5ed6399 1202 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1203 }
1204 return 0;
1205}
1206
1207static int ipgre_close(struct net_device *dev)
1208{
2941a486 1209 struct ip_tunnel *t = netdev_priv(dev);
f97c1e0c 1210 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1211 struct in_device *in_dev;
c346dca1 1212 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1213 if (in_dev) {
1214 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1215 in_dev_put(in_dev);
1216 }
1217 }
1218 return 0;
1219}
1220
1221#endif
1222
1223static void ipgre_tunnel_setup(struct net_device *dev)
1224{
1da177e4
LT
1225 dev->uninit = ipgre_tunnel_uninit;
1226 dev->destructor = free_netdev;
1227 dev->hard_start_xmit = ipgre_tunnel_xmit;
1228 dev->get_stats = ipgre_tunnel_get_stats;
1229 dev->do_ioctl = ipgre_tunnel_ioctl;
1230 dev->change_mtu = ipgre_tunnel_change_mtu;
1231
1232 dev->type = ARPHRD_IPGRE;
1233 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1234 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1235 dev->flags = IFF_NOARP;
1236 dev->iflink = 0;
1237 dev->addr_len = 4;
1238}
1239
1240static int ipgre_tunnel_init(struct net_device *dev)
1241{
1da177e4
LT
1242 struct ip_tunnel *tunnel;
1243 struct iphdr *iph;
1da177e4 1244
2941a486 1245 tunnel = netdev_priv(dev);
1da177e4
LT
1246 iph = &tunnel->parms.iph;
1247
1248 tunnel->dev = dev;
1249 strcpy(tunnel->parms.name, dev->name);
1250
1251 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1252 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1253
ee34c1eb 1254 ipgre_tunnel_bind_dev(dev);
1da177e4
LT
1255
1256 if (iph->daddr) {
1da177e4 1257#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1258 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1259 if (!iph->saddr)
1260 return -EINVAL;
1261 dev->flags = IFF_BROADCAST;
3b04ddde 1262 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1263 dev->open = ipgre_open;
1264 dev->stop = ipgre_close;
1265 }
1266#endif
ee34c1eb 1267 } else
6a5f44d7 1268 dev->header_ops = &ipgre_header_ops;
1da177e4 1269
1da177e4
LT
1270 return 0;
1271}
1272
7daa0004 1273static int ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1274{
2941a486 1275 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
1276 struct iphdr *iph = &tunnel->parms.iph;
1277
1278 tunnel->dev = dev;
1279 strcpy(tunnel->parms.name, dev->name);
1280
1281 iph->version = 4;
1282 iph->protocol = IPPROTO_GRE;
1283 iph->ihl = 5;
1284 tunnel->hlen = sizeof(struct iphdr) + 4;
1285
1286 dev_hold(dev);
1287 tunnels_wc[0] = tunnel;
1288 return 0;
1289}
1290
1291
1292static struct net_protocol ipgre_protocol = {
1293 .handler = ipgre_rcv,
1294 .err_handler = ipgre_err,
1295};
1296
59a4c759
PE
1297static int ipgre_init_net(struct net *net)
1298{
1299 int err;
1300 struct ipgre_net *ign;
1301
1302 err = -ENOMEM;
1303 ign = kmalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1304 if (ign == NULL)
1305 goto err_alloc;
1306
1307 err = net_assign_generic(net, ipgre_net_id, ign);
1308 if (err < 0)
1309 goto err_assign;
1310
7daa0004
PE
1311 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1312 ipgre_tunnel_setup);
1313 if (!ign->fb_tunnel_dev) {
1314 err = -ENOMEM;
1315 goto err_alloc_dev;
1316 }
1317
1318 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1319 dev_net_set(ign->fb_tunnel_dev, net);
1320
1321 if ((err = register_netdev(ign->fb_tunnel_dev)))
1322 goto err_reg_dev;
1323
59a4c759
PE
1324 return 0;
1325
7daa0004
PE
1326err_reg_dev:
1327 free_netdev(ign->fb_tunnel_dev);
1328err_alloc_dev:
1329 /* nothing */
59a4c759
PE
1330err_assign:
1331 kfree(ign);
1332err_alloc:
1333 return err;
1334}
1335
1336static void ipgre_exit_net(struct net *net)
1337{
1338 struct ipgre_net *ign;
1339
1340 ign = net_generic(net, ipgre_net_id);
7daa0004
PE
1341 rtnl_lock();
1342 if (net != &init_net)
1343 unregister_netdevice(ign->fb_tunnel_dev);
1344 rtnl_unlock();
59a4c759
PE
1345 kfree(ign);
1346}
1347
1348static struct pernet_operations ipgre_net_ops = {
1349 .init = ipgre_init_net,
1350 .exit = ipgre_exit_net,
1351};
1da177e4
LT
1352
1353/*
1354 * And now the modules code and kernel interface.
1355 */
1356
1357static int __init ipgre_init(void)
1358{
1359 int err;
1360
1361 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1362
1363 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1364 printk(KERN_INFO "ipgre init: can't add protocol\n");
1365 return -EAGAIN;
1366 }
1367
59a4c759
PE
1368 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1369 if (err < 0)
7daa0004
PE
1370 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1371
1da177e4 1372 return err;
1da177e4
LT
1373}
1374
db44575f
AK
1375static void __exit ipgre_destroy_tunnels(void)
1376{
1377 int prio;
1378
1379 for (prio = 0; prio < 4; prio++) {
1380 int h;
1381 for (h = 0; h < HASH_SIZE; h++) {
1382 struct ip_tunnel *t;
1383 while ((t = tunnels[prio][h]) != NULL)
1384 unregister_netdevice(t->dev);
1385 }
1386 }
1387}
1388
1389static void __exit ipgre_fini(void)
1da177e4
LT
1390{
1391 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1392 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1393
db44575f
AK
1394 rtnl_lock();
1395 ipgre_destroy_tunnels();
1396 rtnl_unlock();
59a4c759
PE
1397
1398 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1399}
1400
1401module_init(ipgre_init);
1402module_exit(ipgre_fini);
1403MODULE_LICENSE("GPL");