]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
[GRE]: Introduce empty ipgre_net structure and net init/exit ops.
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
46f25dff 30#include <linux/if_ether.h>
1da177e4
LT
31
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
59a4c759
PE
42#include <net/net_namespace.h>
43#include <net/netns/generic.h>
1da177e4
LT
44
45#ifdef CONFIG_IPV6
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#endif
50
51/*
52 Problems & solutions
53 --------------------
54
55 1. The most important issue is detecting local dead loops.
56 They would cause complete host lockup in transmit, which
57 would be "resolved" by stack overflow or, if queueing is enabled,
58 with infinite looping in net_bh.
59
60 We cannot track such dead loops during route installation,
61 it is infeasible task. The most general solutions would be
62 to keep skb->encapsulation counter (sort of local ttl),
63 and silently drop packet when it expires. It is the best
64 solution, but it supposes maintaing new variable in ALL
65 skb, even if no tunneling is used.
66
e905a9ed 67 Current solution: t->recursion lock breaks dead loops. It looks
1da177e4
LT
68 like dev->tbusy flag, but I preferred new variable, because
69 the semantics is different. One day, when hard_start_xmit
70 will be multithreaded we will have to use skb->encapsulation.
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
122
123/* Fallback tunnel: no source, no destination, no key, no options */
124
125static int ipgre_fb_tunnel_init(struct net_device *dev);
126
59a4c759
PE
127static int ipgre_net_id;
128struct ipgre_net {
129};
130
1da177e4
LT
131static struct net_device *ipgre_fb_tunnel_dev;
132
133/* Tunnel hash table */
134
135/*
136 4 hash tables:
137
138 3: (remote,local)
139 2: (remote,*)
140 1: (*,local)
141 0: (*,*)
142
143 We require exact key match i.e. if a key is present in packet
144 it will match only tunnel with the same key; if it is not present,
145 it will match only keyless tunnel.
146
147 All keysless packets, if not matched configured keyless tunnels
148 will match fallback tunnel.
149 */
150
151#define HASH_SIZE 16
d5a0a1e3 152#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4
LT
153
154static struct ip_tunnel *tunnels[4][HASH_SIZE];
155
156#define tunnels_r_l (tunnels[3])
157#define tunnels_r (tunnels[2])
158#define tunnels_l (tunnels[1])
159#define tunnels_wc (tunnels[0])
160
161static DEFINE_RWLOCK(ipgre_lock);
162
163/* Given src, dst and key, find appropriate for input tunnel. */
164
d5a0a1e3 165static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
1da177e4
LT
166{
167 unsigned h0 = HASH(remote);
168 unsigned h1 = HASH(key);
169 struct ip_tunnel *t;
170
171 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
172 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
173 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
174 return t;
175 }
176 }
177 for (t = tunnels_r[h0^h1]; t; t = t->next) {
178 if (remote == t->parms.iph.daddr) {
179 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
180 return t;
181 }
182 }
183 for (t = tunnels_l[h1]; t; t = t->next) {
184 if (local == t->parms.iph.saddr ||
f97c1e0c
JP
185 (local == t->parms.iph.daddr &&
186 ipv4_is_multicast(local))) {
1da177e4
LT
187 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
188 return t;
189 }
190 }
191 for (t = tunnels_wc[h1]; t; t = t->next) {
192 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
193 return t;
194 }
195
196 if (ipgre_fb_tunnel_dev->flags&IFF_UP)
2941a486 197 return netdev_priv(ipgre_fb_tunnel_dev);
1da177e4
LT
198 return NULL;
199}
200
5056a1ef 201static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
1da177e4 202{
5056a1ef
YH
203 __be32 remote = parms->iph.daddr;
204 __be32 local = parms->iph.saddr;
205 __be32 key = parms->i_key;
1da177e4
LT
206 unsigned h = HASH(key);
207 int prio = 0;
208
209 if (local)
210 prio |= 1;
f97c1e0c 211 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
212 prio |= 2;
213 h ^= HASH(remote);
214 }
215
216 return &tunnels[prio][h];
217}
218
5056a1ef
YH
219static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
220{
221 return __ipgre_bucket(&t->parms);
222}
223
1da177e4
LT
224static void ipgre_tunnel_link(struct ip_tunnel *t)
225{
226 struct ip_tunnel **tp = ipgre_bucket(t);
227
228 t->next = *tp;
229 write_lock_bh(&ipgre_lock);
230 *tp = t;
231 write_unlock_bh(&ipgre_lock);
232}
233
234static void ipgre_tunnel_unlink(struct ip_tunnel *t)
235{
236 struct ip_tunnel **tp;
237
238 for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
239 if (t == *tp) {
240 write_lock_bh(&ipgre_lock);
241 *tp = t->next;
242 write_unlock_bh(&ipgre_lock);
243 break;
244 }
245 }
246}
247
248static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
249{
d5a0a1e3
AV
250 __be32 remote = parms->iph.daddr;
251 __be32 local = parms->iph.saddr;
252 __be32 key = parms->i_key;
1da177e4
LT
253 struct ip_tunnel *t, **tp, *nt;
254 struct net_device *dev;
1da177e4
LT
255 char name[IFNAMSIZ];
256
5056a1ef 257 for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
1da177e4
LT
258 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
259 if (key == t->parms.i_key)
260 return t;
261 }
262 }
263 if (!create)
264 return NULL;
265
266 if (parms->name[0])
267 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
268 else
269 sprintf(name, "gre%%d");
1da177e4
LT
270
271 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
272 if (!dev)
273 return NULL;
274
b37d428b
PE
275 if (strchr(name, '%')) {
276 if (dev_alloc_name(dev, name) < 0)
277 goto failed_free;
278 }
279
1da177e4 280 dev->init = ipgre_tunnel_init;
2941a486 281 nt = netdev_priv(dev);
1da177e4
LT
282 nt->parms = *parms;
283
b37d428b
PE
284 if (register_netdevice(dev) < 0)
285 goto failed_free;
1da177e4 286
1da177e4
LT
287 dev_hold(dev);
288 ipgre_tunnel_link(nt);
1da177e4
LT
289 return nt;
290
b37d428b
PE
291failed_free:
292 free_netdev(dev);
1da177e4
LT
293 return NULL;
294}
295
296static void ipgre_tunnel_uninit(struct net_device *dev)
297{
2941a486 298 ipgre_tunnel_unlink(netdev_priv(dev));
1da177e4
LT
299 dev_put(dev);
300}
301
302
303static void ipgre_err(struct sk_buff *skb, u32 info)
304{
305#ifndef I_WISH_WORLD_WERE_PERFECT
306
307/* It is not :-( All the routers (except for Linux) return only
308 8 bytes of packet payload. It means, that precise relaying of
309 ICMP in the real Internet is absolutely infeasible.
310
311 Moreover, Cisco "wise men" put GRE key to the third word
312 in GRE header. It makes impossible maintaining even soft state for keyed
313 GRE tunnels with enabled checksum. Tell them "thank you".
314
315 Well, I wonder, rfc1812 was written by Cisco employee,
316 what the hell these idiots break standrads established
317 by themself???
318 */
319
320 struct iphdr *iph = (struct iphdr*)skb->data;
d5a0a1e3 321 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 322 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
323 const int type = icmp_hdr(skb)->type;
324 const int code = icmp_hdr(skb)->code;
1da177e4 325 struct ip_tunnel *t;
d5a0a1e3 326 __be16 flags;
1da177e4
LT
327
328 flags = p[0];
329 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
330 if (flags&(GRE_VERSION|GRE_ROUTING))
331 return;
332 if (flags&GRE_KEY) {
333 grehlen += 4;
334 if (flags&GRE_CSUM)
335 grehlen += 4;
336 }
337 }
338
339 /* If only 8 bytes returned, keyed message will be dropped here */
340 if (skb_headlen(skb) < grehlen)
341 return;
342
343 switch (type) {
344 default:
345 case ICMP_PARAMETERPROB:
346 return;
347
348 case ICMP_DEST_UNREACH:
349 switch (code) {
350 case ICMP_SR_FAILED:
351 case ICMP_PORT_UNREACH:
352 /* Impossible event. */
353 return;
354 case ICMP_FRAG_NEEDED:
355 /* Soft state for pmtu is maintained by IP core. */
356 return;
357 default:
358 /* All others are translated to HOST_UNREACH.
359 rfc2003 contains "deep thoughts" about NET_UNREACH,
360 I believe they are just ether pollution. --ANK
361 */
362 break;
363 }
364 break;
365 case ICMP_TIME_EXCEEDED:
366 if (code != ICMP_EXC_TTL)
367 return;
368 break;
369 }
370
371 read_lock(&ipgre_lock);
d5a0a1e3 372 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
f97c1e0c
JP
373 if (t == NULL || t->parms.iph.daddr == 0 ||
374 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
375 goto out;
376
377 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
378 goto out;
379
380 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
381 t->err_count++;
382 else
383 t->err_count = 1;
384 t->err_time = jiffies;
385out:
386 read_unlock(&ipgre_lock);
387 return;
388#else
389 struct iphdr *iph = (struct iphdr*)dp;
390 struct iphdr *eiph;
d5a0a1e3 391 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
88c7664f
ACM
392 const int type = icmp_hdr(skb)->type;
393 const int code = icmp_hdr(skb)->code;
1da177e4
LT
394 int rel_type = 0;
395 int rel_code = 0;
c55e2f49
AV
396 __be32 rel_info = 0;
397 __u32 n = 0;
d5a0a1e3 398 __be16 flags;
1da177e4
LT
399 int grehlen = (iph->ihl<<2) + 4;
400 struct sk_buff *skb2;
401 struct flowi fl;
402 struct rtable *rt;
403
404 if (p[1] != htons(ETH_P_IP))
405 return;
406
407 flags = p[0];
408 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
409 if (flags&(GRE_VERSION|GRE_ROUTING))
410 return;
411 if (flags&GRE_CSUM)
412 grehlen += 4;
413 if (flags&GRE_KEY)
414 grehlen += 4;
415 if (flags&GRE_SEQ)
416 grehlen += 4;
417 }
418 if (len < grehlen + sizeof(struct iphdr))
419 return;
420 eiph = (struct iphdr*)(dp + grehlen);
421
422 switch (type) {
423 default:
424 return;
425 case ICMP_PARAMETERPROB:
88c7664f 426 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
c55e2f49 427 if (n < (iph->ihl<<2))
1da177e4
LT
428 return;
429
430 /* So... This guy found something strange INSIDE encapsulated
431 packet. Well, he is fool, but what can we do ?
432 */
433 rel_type = ICMP_PARAMETERPROB;
c55e2f49
AV
434 n -= grehlen;
435 rel_info = htonl(n << 24);
1da177e4
LT
436 break;
437
438 case ICMP_DEST_UNREACH:
439 switch (code) {
440 case ICMP_SR_FAILED:
441 case ICMP_PORT_UNREACH:
442 /* Impossible event. */
443 return;
444 case ICMP_FRAG_NEEDED:
445 /* And it is the only really necessary thing :-) */
88c7664f 446 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
c55e2f49 447 if (n < grehlen+68)
1da177e4 448 return;
c55e2f49 449 n -= grehlen;
1da177e4 450 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
c55e2f49 451 if (n > ntohs(eiph->tot_len))
1da177e4 452 return;
c55e2f49 453 rel_info = htonl(n);
1da177e4
LT
454 break;
455 default:
456 /* All others are translated to HOST_UNREACH.
457 rfc2003 contains "deep thoughts" about NET_UNREACH,
458 I believe, it is just ether pollution. --ANK
459 */
460 rel_type = ICMP_DEST_UNREACH;
461 rel_code = ICMP_HOST_UNREACH;
462 break;
463 }
464 break;
465 case ICMP_TIME_EXCEEDED:
466 if (code != ICMP_EXC_TTL)
467 return;
468 break;
469 }
470
471 /* Prepare fake skb to feed it to icmp_send */
472 skb2 = skb_clone(skb, GFP_ATOMIC);
473 if (skb2 == NULL)
474 return;
475 dst_release(skb2->dst);
476 skb2->dst = NULL;
477 skb_pull(skb2, skb->data - (u8*)eiph);
c1d2bbe1 478 skb_reset_network_header(skb2);
1da177e4
LT
479
480 /* Try to guess incoming interface */
481 memset(&fl, 0, sizeof(fl));
482 fl.fl4_dst = eiph->saddr;
483 fl.fl4_tos = RT_TOS(eiph->tos);
484 fl.proto = IPPROTO_GRE;
f206351a 485 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
486 kfree_skb(skb2);
487 return;
488 }
489 skb2->dev = rt->u.dst.dev;
490
491 /* route "incoming" packet */
492 if (rt->rt_flags&RTCF_LOCAL) {
493 ip_rt_put(rt);
494 rt = NULL;
495 fl.fl4_dst = eiph->daddr;
496 fl.fl4_src = eiph->saddr;
497 fl.fl4_tos = eiph->tos;
f206351a 498 if (ip_route_output_key(&init_net, &rt, &fl) ||
1da177e4
LT
499 rt->u.dst.dev->type != ARPHRD_IPGRE) {
500 ip_rt_put(rt);
501 kfree_skb(skb2);
502 return;
503 }
504 } else {
505 ip_rt_put(rt);
506 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
507 skb2->dst->dev->type != ARPHRD_IPGRE) {
508 kfree_skb(skb2);
509 return;
510 }
511 }
512
513 /* change mtu on this route */
514 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
c55e2f49 515 if (n > dst_mtu(skb2->dst)) {
1da177e4
LT
516 kfree_skb(skb2);
517 return;
518 }
c55e2f49 519 skb2->dst->ops->update_pmtu(skb2->dst, n);
1da177e4 520 } else if (type == ICMP_TIME_EXCEEDED) {
2941a486 521 struct ip_tunnel *t = netdev_priv(skb2->dev);
1da177e4
LT
522 if (t->parms.iph.ttl) {
523 rel_type = ICMP_DEST_UNREACH;
524 rel_code = ICMP_HOST_UNREACH;
525 }
526 }
527
528 icmp_send(skb2, rel_type, rel_code, rel_info);
529 kfree_skb(skb2);
530#endif
531}
532
533static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
534{
535 if (INET_ECN_is_ce(iph->tos)) {
536 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 537 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 538 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 539 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
540 }
541 }
542}
543
544static inline u8
545ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
546{
547 u8 inner = 0;
548 if (skb->protocol == htons(ETH_P_IP))
549 inner = old_iph->tos;
550 else if (skb->protocol == htons(ETH_P_IPV6))
551 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
552 return INET_ECN_encapsulate(tos, inner);
553}
554
555static int ipgre_rcv(struct sk_buff *skb)
556{
557 struct iphdr *iph;
558 u8 *h;
d5a0a1e3 559 __be16 flags;
d3bc23e7 560 __sum16 csum = 0;
d5a0a1e3 561 __be32 key = 0;
1da177e4
LT
562 u32 seqno = 0;
563 struct ip_tunnel *tunnel;
564 int offset = 4;
565
566 if (!pskb_may_pull(skb, 16))
567 goto drop_nolock;
568
eddc9ec5 569 iph = ip_hdr(skb);
1da177e4 570 h = skb->data;
d5a0a1e3 571 flags = *(__be16*)h;
1da177e4
LT
572
573 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
574 /* - Version must be 0.
575 - We do not support routing headers.
576 */
577 if (flags&(GRE_VERSION|GRE_ROUTING))
578 goto drop_nolock;
579
580 if (flags&GRE_CSUM) {
fb286bb2 581 switch (skb->ip_summed) {
84fa7933 582 case CHECKSUM_COMPLETE:
d3bc23e7 583 csum = csum_fold(skb->csum);
fb286bb2
HX
584 if (!csum)
585 break;
586 /* fall through */
587 case CHECKSUM_NONE:
588 skb->csum = 0;
589 csum = __skb_checksum_complete(skb);
84fa7933 590 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
591 }
592 offset += 4;
593 }
594 if (flags&GRE_KEY) {
d5a0a1e3 595 key = *(__be32*)(h + offset);
1da177e4
LT
596 offset += 4;
597 }
598 if (flags&GRE_SEQ) {
d5a0a1e3 599 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
600 offset += 4;
601 }
602 }
603
604 read_lock(&ipgre_lock);
605 if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
606 secpath_reset(skb);
607
d5a0a1e3 608 skb->protocol = *(__be16*)(h + 2);
1da177e4
LT
609 /* WCCP version 1 and 2 protocol decoding.
610 * - Change protocol to IP
611 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
612 */
613 if (flags == 0 &&
496c98df
YH
614 skb->protocol == htons(ETH_P_WCCP)) {
615 skb->protocol = htons(ETH_P_IP);
e905a9ed 616 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
617 offset += 4;
618 }
619
1d069167 620 skb->mac_header = skb->network_header;
4209fb60
ACM
621 __pskb_pull(skb, offset);
622 skb_reset_network_header(skb);
9c70220b 623 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
624 skb->pkt_type = PACKET_HOST;
625#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 626 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 627 /* Looped back packet, drop it! */
ee6b9673 628 if (skb->rtable->fl.iif == 0)
1da177e4
LT
629 goto drop;
630 tunnel->stat.multicast++;
631 skb->pkt_type = PACKET_BROADCAST;
632 }
633#endif
634
635 if (((flags&GRE_CSUM) && csum) ||
636 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
637 tunnel->stat.rx_crc_errors++;
638 tunnel->stat.rx_errors++;
639 goto drop;
640 }
641 if (tunnel->parms.i_flags&GRE_SEQ) {
642 if (!(flags&GRE_SEQ) ||
643 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
644 tunnel->stat.rx_fifo_errors++;
645 tunnel->stat.rx_errors++;
646 goto drop;
647 }
648 tunnel->i_seqno = seqno + 1;
649 }
650 tunnel->stat.rx_packets++;
651 tunnel->stat.rx_bytes += skb->len;
652 skb->dev = tunnel->dev;
653 dst_release(skb->dst);
654 skb->dst = NULL;
655 nf_reset(skb);
656 ipgre_ecn_decapsulate(iph, skb);
657 netif_rx(skb);
658 read_unlock(&ipgre_lock);
659 return(0);
660 }
45af08be 661 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
662
663drop:
664 read_unlock(&ipgre_lock);
665drop_nolock:
666 kfree_skb(skb);
667 return(0);
668}
669
670static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
671{
2941a486 672 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 673 struct net_device_stats *stats = &tunnel->stat;
eddc9ec5 674 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
675 struct iphdr *tiph;
676 u8 tos;
d5a0a1e3 677 __be16 df;
1da177e4
LT
678 struct rtable *rt; /* Route to the other host */
679 struct net_device *tdev; /* Device to other host */
680 struct iphdr *iph; /* Our new IP header */
c2636b4d 681 unsigned int max_headroom; /* The extra header space needed */
1da177e4 682 int gre_hlen;
d5a0a1e3 683 __be32 dst;
1da177e4
LT
684 int mtu;
685
686 if (tunnel->recursion++) {
687 tunnel->stat.collisions++;
688 goto tx_error;
689 }
690
3b04ddde 691 if (dev->header_ops) {
1da177e4
LT
692 gre_hlen = 0;
693 tiph = (struct iphdr*)skb->data;
694 } else {
695 gre_hlen = tunnel->hlen;
696 tiph = &tunnel->parms.iph;
697 }
698
699 if ((dst = tiph->daddr) == 0) {
700 /* NBMA tunnel */
701
702 if (skb->dst == NULL) {
703 tunnel->stat.tx_fifo_errors++;
704 goto tx_error;
705 }
706
707 if (skb->protocol == htons(ETH_P_IP)) {
ee6b9673 708 rt = skb->rtable;
1da177e4
LT
709 if ((dst = rt->rt_gateway) == 0)
710 goto tx_error_icmp;
711 }
712#ifdef CONFIG_IPV6
713 else if (skb->protocol == htons(ETH_P_IPV6)) {
714 struct in6_addr *addr6;
715 int addr_type;
716 struct neighbour *neigh = skb->dst->neighbour;
717
718 if (neigh == NULL)
719 goto tx_error;
720
721 addr6 = (struct in6_addr*)&neigh->primary_key;
722 addr_type = ipv6_addr_type(addr6);
723
724 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 725 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
726 addr_type = ipv6_addr_type(addr6);
727 }
728
729 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
730 goto tx_error_icmp;
731
732 dst = addr6->s6_addr32[3];
733 }
734#endif
735 else
736 goto tx_error;
737 }
738
739 tos = tiph->tos;
740 if (tos&1) {
741 if (skb->protocol == htons(ETH_P_IP))
742 tos = old_iph->tos;
743 tos &= ~1;
744 }
745
746 {
747 struct flowi fl = { .oif = tunnel->parms.link,
748 .nl_u = { .ip4_u =
749 { .daddr = dst,
750 .saddr = tiph->saddr,
751 .tos = RT_TOS(tos) } },
752 .proto = IPPROTO_GRE };
f206351a 753 if (ip_route_output_key(&init_net, &rt, &fl)) {
1da177e4
LT
754 tunnel->stat.tx_carrier_errors++;
755 goto tx_error;
756 }
757 }
758 tdev = rt->u.dst.dev;
759
760 if (tdev == dev) {
761 ip_rt_put(rt);
762 tunnel->stat.collisions++;
763 goto tx_error;
764 }
765
766 df = tiph->frag_off;
767 if (df)
768 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
769 else
770 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
771
772 if (skb->dst)
773 skb->dst->ops->update_pmtu(skb->dst, mtu);
774
775 if (skb->protocol == htons(ETH_P_IP)) {
776 df |= (old_iph->frag_off&htons(IP_DF));
777
778 if ((old_iph->frag_off&htons(IP_DF)) &&
779 mtu < ntohs(old_iph->tot_len)) {
780 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
781 ip_rt_put(rt);
782 goto tx_error;
783 }
784 }
785#ifdef CONFIG_IPV6
786 else if (skb->protocol == htons(ETH_P_IPV6)) {
787 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
788
789 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
790 if ((tunnel->parms.iph.daddr &&
791 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
792 rt6->rt6i_dst.plen == 128) {
793 rt6->rt6i_flags |= RTF_MODIFIED;
794 skb->dst->metrics[RTAX_MTU-1] = mtu;
795 }
796 }
797
798 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
799 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
800 ip_rt_put(rt);
801 goto tx_error;
802 }
803 }
804#endif
805
806 if (tunnel->err_count > 0) {
807 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
808 tunnel->err_count--;
809
810 dst_link_failure(skb);
811 } else
812 tunnel->err_count = 0;
813 }
814
815 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
816
cfbba49d
PM
817 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
818 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
819 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
820 if (!new_skb) {
821 ip_rt_put(rt);
e905a9ed 822 stats->tx_dropped++;
1da177e4
LT
823 dev_kfree_skb(skb);
824 tunnel->recursion--;
825 return 0;
826 }
827 if (skb->sk)
828 skb_set_owner_w(new_skb, skb->sk);
829 dev_kfree_skb(skb);
830 skb = new_skb;
eddc9ec5 831 old_iph = ip_hdr(skb);
1da177e4
LT
832 }
833
b0e380b1 834 skb->transport_header = skb->network_header;
e2d1bca7
ACM
835 skb_push(skb, gre_hlen);
836 skb_reset_network_header(skb);
1da177e4 837 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
838 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
839 IPSKB_REROUTED);
1da177e4
LT
840 dst_release(skb->dst);
841 skb->dst = &rt->u.dst;
842
843 /*
844 * Push down and install the IPIP header.
845 */
846
eddc9ec5 847 iph = ip_hdr(skb);
1da177e4
LT
848 iph->version = 4;
849 iph->ihl = sizeof(struct iphdr) >> 2;
850 iph->frag_off = df;
851 iph->protocol = IPPROTO_GRE;
852 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
853 iph->daddr = rt->rt_dst;
854 iph->saddr = rt->rt_src;
855
856 if ((iph->ttl = tiph->ttl) == 0) {
857 if (skb->protocol == htons(ETH_P_IP))
858 iph->ttl = old_iph->ttl;
859#ifdef CONFIG_IPV6
860 else if (skb->protocol == htons(ETH_P_IPV6))
861 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
862#endif
863 else
864 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
865 }
866
d5a0a1e3
AV
867 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
868 ((__be16*)(iph+1))[1] = skb->protocol;
1da177e4
LT
869
870 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 871 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
872
873 if (tunnel->parms.o_flags&GRE_SEQ) {
874 ++tunnel->o_seqno;
875 *ptr = htonl(tunnel->o_seqno);
876 ptr--;
877 }
878 if (tunnel->parms.o_flags&GRE_KEY) {
879 *ptr = tunnel->parms.o_key;
880 ptr--;
881 }
882 if (tunnel->parms.o_flags&GRE_CSUM) {
883 *ptr = 0;
5f92a738 884 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
885 }
886 }
887
888 nf_reset(skb);
889
890 IPTUNNEL_XMIT();
891 tunnel->recursion--;
892 return 0;
893
894tx_error_icmp:
895 dst_link_failure(skb);
896
897tx_error:
898 stats->tx_errors++;
899 dev_kfree_skb(skb);
900 tunnel->recursion--;
901 return 0;
902}
903
ee34c1eb
MS
904static void ipgre_tunnel_bind_dev(struct net_device *dev)
905{
906 struct net_device *tdev = NULL;
907 struct ip_tunnel *tunnel;
908 struct iphdr *iph;
909 int hlen = LL_MAX_HEADER;
910 int mtu = ETH_DATA_LEN;
911 int addend = sizeof(struct iphdr) + 4;
912
913 tunnel = netdev_priv(dev);
914 iph = &tunnel->parms.iph;
915
916 /* Guess output device to choose reasonable mtu and hard_header_len */
917
918 if (iph->daddr) {
919 struct flowi fl = { .oif = tunnel->parms.link,
920 .nl_u = { .ip4_u =
921 { .daddr = iph->daddr,
922 .saddr = iph->saddr,
923 .tos = RT_TOS(iph->tos) } },
924 .proto = IPPROTO_GRE };
925 struct rtable *rt;
f206351a 926 if (!ip_route_output_key(&init_net, &rt, &fl)) {
ee34c1eb
MS
927 tdev = rt->u.dst.dev;
928 ip_rt_put(rt);
929 }
930 dev->flags |= IFF_POINTOPOINT;
931 }
932
933 if (!tdev && tunnel->parms.link)
934 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
935
936 if (tdev) {
937 hlen = tdev->hard_header_len;
938 mtu = tdev->mtu;
939 }
940 dev->iflink = tunnel->parms.link;
941
942 /* Precalculate GRE options length */
943 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
944 if (tunnel->parms.o_flags&GRE_CSUM)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_KEY)
947 addend += 4;
948 if (tunnel->parms.o_flags&GRE_SEQ)
949 addend += 4;
950 }
951 dev->hard_header_len = hlen + addend;
952 dev->mtu = mtu - addend;
953 tunnel->hlen = addend;
954
955}
956
1da177e4
LT
957static int
958ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
959{
960 int err = 0;
961 struct ip_tunnel_parm p;
962 struct ip_tunnel *t;
963
964 switch (cmd) {
965 case SIOCGETTUNNEL:
966 t = NULL;
967 if (dev == ipgre_fb_tunnel_dev) {
968 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
969 err = -EFAULT;
970 break;
971 }
972 t = ipgre_tunnel_locate(&p, 0);
973 }
974 if (t == NULL)
2941a486 975 t = netdev_priv(dev);
1da177e4
LT
976 memcpy(&p, &t->parms, sizeof(p));
977 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
978 err = -EFAULT;
979 break;
980
981 case SIOCADDTUNNEL:
982 case SIOCCHGTUNNEL:
983 err = -EPERM;
984 if (!capable(CAP_NET_ADMIN))
985 goto done;
986
987 err = -EFAULT;
988 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
989 goto done;
990
991 err = -EINVAL;
992 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
993 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
994 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
995 goto done;
996 if (p.iph.ttl)
997 p.iph.frag_off |= htons(IP_DF);
998
999 if (!(p.i_flags&GRE_KEY))
1000 p.i_key = 0;
1001 if (!(p.o_flags&GRE_KEY))
1002 p.o_key = 0;
1003
1004 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
1005
1006 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1007 if (t != NULL) {
1008 if (t->dev != dev) {
1009 err = -EEXIST;
1010 break;
1011 }
1012 } else {
1013 unsigned nflags=0;
1014
2941a486 1015 t = netdev_priv(dev);
1da177e4 1016
f97c1e0c 1017 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1018 nflags = IFF_BROADCAST;
1019 else if (p.iph.daddr)
1020 nflags = IFF_POINTOPOINT;
1021
1022 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1023 err = -EINVAL;
1024 break;
1025 }
1026 ipgre_tunnel_unlink(t);
1027 t->parms.iph.saddr = p.iph.saddr;
1028 t->parms.iph.daddr = p.iph.daddr;
1029 t->parms.i_key = p.i_key;
1030 t->parms.o_key = p.o_key;
1031 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1032 memcpy(dev->broadcast, &p.iph.daddr, 4);
1033 ipgre_tunnel_link(t);
1034 netdev_state_change(dev);
1035 }
1036 }
1037
1038 if (t) {
1039 err = 0;
1040 if (cmd == SIOCCHGTUNNEL) {
1041 t->parms.iph.ttl = p.iph.ttl;
1042 t->parms.iph.tos = p.iph.tos;
1043 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1044 if (t->parms.link != p.link) {
1045 t->parms.link = p.link;
1046 ipgre_tunnel_bind_dev(dev);
1047 netdev_state_change(dev);
1048 }
1da177e4
LT
1049 }
1050 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1051 err = -EFAULT;
1052 } else
1053 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1054 break;
1055
1056 case SIOCDELTUNNEL:
1057 err = -EPERM;
1058 if (!capable(CAP_NET_ADMIN))
1059 goto done;
1060
1061 if (dev == ipgre_fb_tunnel_dev) {
1062 err = -EFAULT;
1063 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1064 goto done;
1065 err = -ENOENT;
1066 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1067 goto done;
1068 err = -EPERM;
2941a486 1069 if (t == netdev_priv(ipgre_fb_tunnel_dev))
1da177e4
LT
1070 goto done;
1071 dev = t->dev;
1072 }
22f8cde5
SH
1073 unregister_netdevice(dev);
1074 err = 0;
1da177e4
LT
1075 break;
1076
1077 default:
1078 err = -EINVAL;
1079 }
1080
1081done:
1082 return err;
1083}
1084
1085static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1086{
2941a486 1087 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1da177e4
LT
1088}
1089
1090static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1091{
2941a486 1092 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
1093 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1094 return -EINVAL;
1095 dev->mtu = new_mtu;
1096 return 0;
1097}
1098
1da177e4
LT
1099/* Nice toy. Unfortunately, useless in real life :-)
1100 It allows to construct virtual multiprotocol broadcast "LAN"
1101 over the Internet, provided multicast routing is tuned.
1102
1103
1104 I have no idea was this bicycle invented before me,
1105 so that I had to set ARPHRD_IPGRE to a random value.
1106 I have an impression, that Cisco could make something similar,
1107 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1108
1da177e4
LT
1109 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1110 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1111
1112 ping -t 255 224.66.66.66
1113
1114 If nobody answers, mbone does not work.
1115
1116 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1117 ip addr add 10.66.66.<somewhat>/24 dev Universe
1118 ifconfig Universe up
1119 ifconfig Universe add fe80::<Your_real_addr>/10
1120 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1121 ftp 10.66.66.66
1122 ...
1123 ftp fec0:6666:6666::193.233.7.65
1124 ...
1125
1126 */
1127
3b04ddde
SH
1128static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1129 unsigned short type,
1130 const void *daddr, const void *saddr, unsigned len)
1da177e4 1131{
2941a486 1132 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1133 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1134 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1135
1136 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1137 p[0] = t->parms.o_flags;
1138 p[1] = htons(type);
1139
1140 /*
e905a9ed 1141 * Set the source hardware address.
1da177e4 1142 */
e905a9ed 1143
1da177e4
LT
1144 if (saddr)
1145 memcpy(&iph->saddr, saddr, 4);
1146
1147 if (daddr) {
1148 memcpy(&iph->daddr, daddr, 4);
1149 return t->hlen;
1150 }
f97c1e0c 1151 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1152 return t->hlen;
e905a9ed 1153
1da177e4
LT
1154 return -t->hlen;
1155}
1156
6a5f44d7
TT
1157static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1158{
1159 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1160 memcpy(haddr, &iph->saddr, 4);
1161 return 4;
1162}
1163
3b04ddde
SH
1164static const struct header_ops ipgre_header_ops = {
1165 .create = ipgre_header,
6a5f44d7 1166 .parse = ipgre_header_parse,
3b04ddde
SH
1167};
1168
6a5f44d7 1169#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1170static int ipgre_open(struct net_device *dev)
1171{
2941a486 1172 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1173
f97c1e0c 1174 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1175 struct flowi fl = { .oif = t->parms.link,
1176 .nl_u = { .ip4_u =
1177 { .daddr = t->parms.iph.daddr,
1178 .saddr = t->parms.iph.saddr,
1179 .tos = RT_TOS(t->parms.iph.tos) } },
1180 .proto = IPPROTO_GRE };
1181 struct rtable *rt;
f206351a 1182 if (ip_route_output_key(&init_net, &rt, &fl))
1da177e4
LT
1183 return -EADDRNOTAVAIL;
1184 dev = rt->u.dst.dev;
1185 ip_rt_put(rt);
e5ed6399 1186 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1187 return -EADDRNOTAVAIL;
1188 t->mlink = dev->ifindex;
e5ed6399 1189 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1190 }
1191 return 0;
1192}
1193
1194static int ipgre_close(struct net_device *dev)
1195{
2941a486 1196 struct ip_tunnel *t = netdev_priv(dev);
f97c1e0c 1197 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1198 struct in_device *in_dev;
c346dca1 1199 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1200 if (in_dev) {
1201 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1202 in_dev_put(in_dev);
1203 }
1204 }
1205 return 0;
1206}
1207
1208#endif
1209
1210static void ipgre_tunnel_setup(struct net_device *dev)
1211{
1da177e4
LT
1212 dev->uninit = ipgre_tunnel_uninit;
1213 dev->destructor = free_netdev;
1214 dev->hard_start_xmit = ipgre_tunnel_xmit;
1215 dev->get_stats = ipgre_tunnel_get_stats;
1216 dev->do_ioctl = ipgre_tunnel_ioctl;
1217 dev->change_mtu = ipgre_tunnel_change_mtu;
1218
1219 dev->type = ARPHRD_IPGRE;
1220 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1221 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1222 dev->flags = IFF_NOARP;
1223 dev->iflink = 0;
1224 dev->addr_len = 4;
1225}
1226
1227static int ipgre_tunnel_init(struct net_device *dev)
1228{
1da177e4
LT
1229 struct ip_tunnel *tunnel;
1230 struct iphdr *iph;
1da177e4 1231
2941a486 1232 tunnel = netdev_priv(dev);
1da177e4
LT
1233 iph = &tunnel->parms.iph;
1234
1235 tunnel->dev = dev;
1236 strcpy(tunnel->parms.name, dev->name);
1237
1238 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1239 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1240
ee34c1eb 1241 ipgre_tunnel_bind_dev(dev);
1da177e4
LT
1242
1243 if (iph->daddr) {
1da177e4 1244#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1245 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1246 if (!iph->saddr)
1247 return -EINVAL;
1248 dev->flags = IFF_BROADCAST;
3b04ddde 1249 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1250 dev->open = ipgre_open;
1251 dev->stop = ipgre_close;
1252 }
1253#endif
ee34c1eb 1254 } else
6a5f44d7 1255 dev->header_ops = &ipgre_header_ops;
1da177e4 1256
1da177e4
LT
1257 return 0;
1258}
1259
4b30b1c6 1260static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1261{
2941a486 1262 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4
LT
1263 struct iphdr *iph = &tunnel->parms.iph;
1264
1265 tunnel->dev = dev;
1266 strcpy(tunnel->parms.name, dev->name);
1267
1268 iph->version = 4;
1269 iph->protocol = IPPROTO_GRE;
1270 iph->ihl = 5;
1271 tunnel->hlen = sizeof(struct iphdr) + 4;
1272
1273 dev_hold(dev);
1274 tunnels_wc[0] = tunnel;
1275 return 0;
1276}
1277
1278
1279static struct net_protocol ipgre_protocol = {
1280 .handler = ipgre_rcv,
1281 .err_handler = ipgre_err,
1282};
1283
59a4c759
PE
1284static int ipgre_init_net(struct net *net)
1285{
1286 int err;
1287 struct ipgre_net *ign;
1288
1289 err = -ENOMEM;
1290 ign = kmalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1291 if (ign == NULL)
1292 goto err_alloc;
1293
1294 err = net_assign_generic(net, ipgre_net_id, ign);
1295 if (err < 0)
1296 goto err_assign;
1297
1298 return 0;
1299
1300err_assign:
1301 kfree(ign);
1302err_alloc:
1303 return err;
1304}
1305
1306static void ipgre_exit_net(struct net *net)
1307{
1308 struct ipgre_net *ign;
1309
1310 ign = net_generic(net, ipgre_net_id);
1311 kfree(ign);
1312}
1313
1314static struct pernet_operations ipgre_net_ops = {
1315 .init = ipgre_init_net,
1316 .exit = ipgre_exit_net,
1317};
1da177e4
LT
1318
1319/*
1320 * And now the modules code and kernel interface.
1321 */
1322
1323static int __init ipgre_init(void)
1324{
1325 int err;
1326
1327 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1328
1329 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1330 printk(KERN_INFO "ipgre init: can't add protocol\n");
1331 return -EAGAIN;
1332 }
1333
1334 ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1335 ipgre_tunnel_setup);
1336 if (!ipgre_fb_tunnel_dev) {
1337 err = -ENOMEM;
1338 goto err1;
1339 }
1340
1341 ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1342
1343 if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1344 goto err2;
59a4c759
PE
1345
1346 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1347 if (err < 0)
1348 goto err3;
1da177e4
LT
1349out:
1350 return err;
1351err2:
1352 free_netdev(ipgre_fb_tunnel_dev);
1353err1:
1354 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1355 goto out;
59a4c759
PE
1356err3:
1357 unregister_netdevice(ipgre_fb_tunnel_dev);
1358 goto err1;
1da177e4
LT
1359}
1360
db44575f
AK
1361static void __exit ipgre_destroy_tunnels(void)
1362{
1363 int prio;
1364
1365 for (prio = 0; prio < 4; prio++) {
1366 int h;
1367 for (h = 0; h < HASH_SIZE; h++) {
1368 struct ip_tunnel *t;
1369 while ((t = tunnels[prio][h]) != NULL)
1370 unregister_netdevice(t->dev);
1371 }
1372 }
1373}
1374
1375static void __exit ipgre_fini(void)
1da177e4
LT
1376{
1377 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1378 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1379
db44575f
AK
1380 rtnl_lock();
1381 ipgre_destroy_tunnels();
1382 rtnl_unlock();
59a4c759
PE
1383
1384 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1385}
1386
1387module_init(ipgre_init);
1388module_exit(ipgre_fini);
1389MODULE_LICENSE("GPL");