]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
sky2: set carrier off in probe
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
e1a80002 30#include <linux/etherdevice.h>
46f25dff 31#include <linux/if_ether.h>
1da177e4
LT
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
59a4c759
PE
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
c19e654d 45#include <net/rtnetlink.h>
1da177e4
LT
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
a43912ab 69 Current solution: HARD_TX_LOCK lock breaks dead loops.
1da177e4
LT
70
71
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107
108
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
115
116 Alexey Kuznetsov.
117 */
118
c19e654d 119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 122static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
123
124/* Fallback tunnel: no source, no destination, no key, no options */
125
eb8ce741
PE
126#define HASH_SIZE 16
127
59a4c759
PE
128static int ipgre_net_id;
129struct ipgre_net {
eb8ce741
PE
130 struct ip_tunnel *tunnels[4][HASH_SIZE];
131
7daa0004 132 struct net_device *fb_tunnel_dev;
59a4c759
PE
133};
134
1da177e4
LT
135/* Tunnel hash table */
136
137/*
138 4 hash tables:
139
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
144
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
148
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
151 */
152
d5a0a1e3 153#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 154
eb8ce741
PE
155#define tunnels_r_l tunnels[3]
156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0]
1da177e4
LT
159
160static DEFINE_RWLOCK(ipgre_lock);
161
162/* Given src, dst and key, find appropriate for input tunnel. */
163
749c10f9 164static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
e1a80002
HX
165 __be32 remote, __be32 local,
166 __be32 key, __be16 gre_proto)
1da177e4 167{
749c10f9
TT
168 struct net *net = dev_net(dev);
169 int link = dev->ifindex;
1da177e4
LT
170 unsigned h0 = HASH(remote);
171 unsigned h1 = HASH(key);
afcf1242 172 struct ip_tunnel *t, *cand = NULL;
7daa0004 173 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
174 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
175 ARPHRD_ETHER : ARPHRD_IPGRE;
afcf1242 176 int score, cand_score = 4;
1da177e4 177
eb8ce741 178 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
749c10f9
TT
179 if (local != t->parms.iph.saddr ||
180 remote != t->parms.iph.daddr ||
181 key != t->parms.i_key ||
182 !(t->dev->flags & IFF_UP))
183 continue;
184
185 if (t->dev->type != ARPHRD_IPGRE &&
186 t->dev->type != dev_type)
187 continue;
188
afcf1242 189 score = 0;
749c10f9 190 if (t->parms.link != link)
afcf1242 191 score |= 1;
749c10f9 192 if (t->dev->type != dev_type)
afcf1242
TT
193 score |= 2;
194 if (score == 0)
749c10f9 195 return t;
afcf1242
TT
196
197 if (score < cand_score) {
198 cand = t;
199 cand_score = score;
200 }
1da177e4 201 }
e1a80002 202
eb8ce741 203 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
749c10f9
TT
204 if (remote != t->parms.iph.daddr ||
205 key != t->parms.i_key ||
206 !(t->dev->flags & IFF_UP))
207 continue;
208
209 if (t->dev->type != ARPHRD_IPGRE &&
210 t->dev->type != dev_type)
211 continue;
212
afcf1242 213 score = 0;
749c10f9 214 if (t->parms.link != link)
afcf1242 215 score |= 1;
749c10f9 216 if (t->dev->type != dev_type)
afcf1242
TT
217 score |= 2;
218 if (score == 0)
749c10f9 219 return t;
afcf1242
TT
220
221 if (score < cand_score) {
222 cand = t;
223 cand_score = score;
224 }
1da177e4 225 }
e1a80002 226
eb8ce741 227 for (t = ign->tunnels_l[h1]; t; t = t->next) {
749c10f9
TT
228 if ((local != t->parms.iph.saddr &&
229 (local != t->parms.iph.daddr ||
230 !ipv4_is_multicast(local))) ||
231 key != t->parms.i_key ||
232 !(t->dev->flags & IFF_UP))
233 continue;
234
235 if (t->dev->type != ARPHRD_IPGRE &&
236 t->dev->type != dev_type)
237 continue;
238
afcf1242 239 score = 0;
749c10f9 240 if (t->parms.link != link)
afcf1242 241 score |= 1;
749c10f9 242 if (t->dev->type != dev_type)
afcf1242
TT
243 score |= 2;
244 if (score == 0)
749c10f9 245 return t;
afcf1242
TT
246
247 if (score < cand_score) {
248 cand = t;
249 cand_score = score;
250 }
1da177e4 251 }
e1a80002 252
eb8ce741 253 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
749c10f9
TT
254 if (t->parms.i_key != key ||
255 !(t->dev->flags & IFF_UP))
256 continue;
257
258 if (t->dev->type != ARPHRD_IPGRE &&
259 t->dev->type != dev_type)
260 continue;
261
afcf1242 262 score = 0;
749c10f9 263 if (t->parms.link != link)
afcf1242 264 score |= 1;
749c10f9 265 if (t->dev->type != dev_type)
afcf1242
TT
266 score |= 2;
267 if (score == 0)
749c10f9 268 return t;
afcf1242
TT
269
270 if (score < cand_score) {
271 cand = t;
272 cand_score = score;
273 }
1da177e4
LT
274 }
275
afcf1242
TT
276 if (cand != NULL)
277 return cand;
e1a80002 278
749c10f9 279 if (ign->fb_tunnel_dev->flags & IFF_UP)
7daa0004 280 return netdev_priv(ign->fb_tunnel_dev);
749c10f9 281
1da177e4
LT
282 return NULL;
283}
284
f57e7d5a
PE
285static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
286 struct ip_tunnel_parm *parms)
1da177e4 287{
5056a1ef
YH
288 __be32 remote = parms->iph.daddr;
289 __be32 local = parms->iph.saddr;
290 __be32 key = parms->i_key;
1da177e4
LT
291 unsigned h = HASH(key);
292 int prio = 0;
293
294 if (local)
295 prio |= 1;
f97c1e0c 296 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
297 prio |= 2;
298 h ^= HASH(remote);
299 }
300
eb8ce741 301 return &ign->tunnels[prio][h];
1da177e4
LT
302}
303
f57e7d5a
PE
304static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
305 struct ip_tunnel *t)
5056a1ef 306{
f57e7d5a 307 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
308}
309
f57e7d5a 310static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 311{
f57e7d5a 312 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4
LT
313
314 t->next = *tp;
315 write_lock_bh(&ipgre_lock);
316 *tp = t;
317 write_unlock_bh(&ipgre_lock);
318}
319
f57e7d5a 320static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
321{
322 struct ip_tunnel **tp;
323
f57e7d5a 324 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4
LT
325 if (t == *tp) {
326 write_lock_bh(&ipgre_lock);
327 *tp = t->next;
328 write_unlock_bh(&ipgre_lock);
329 break;
330 }
331 }
332}
333
e1a80002
HX
334static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
335 struct ip_tunnel_parm *parms,
336 int type)
1da177e4 337{
d5a0a1e3
AV
338 __be32 remote = parms->iph.daddr;
339 __be32 local = parms->iph.saddr;
340 __be32 key = parms->i_key;
749c10f9 341 int link = parms->link;
e1a80002
HX
342 struct ip_tunnel *t, **tp;
343 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
344
345 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
346 if (local == t->parms.iph.saddr &&
347 remote == t->parms.iph.daddr &&
348 key == t->parms.i_key &&
749c10f9 349 link == t->parms.link &&
e1a80002
HX
350 type == t->dev->type)
351 break;
352
353 return t;
354}
355
356static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
357 struct ip_tunnel_parm *parms, int create)
358{
359 struct ip_tunnel *t, *nt;
1da177e4 360 struct net_device *dev;
1da177e4 361 char name[IFNAMSIZ];
f57e7d5a 362 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 363
e1a80002
HX
364 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
365 if (t || !create)
366 return t;
1da177e4
LT
367
368 if (parms->name[0])
369 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
370 else
371 sprintf(name, "gre%%d");
1da177e4
LT
372
373 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
374 if (!dev)
375 return NULL;
376
0b67eceb
PE
377 dev_net_set(dev, net);
378
b37d428b
PE
379 if (strchr(name, '%')) {
380 if (dev_alloc_name(dev, name) < 0)
381 goto failed_free;
382 }
383
2941a486 384 nt = netdev_priv(dev);
1da177e4 385 nt->parms = *parms;
c19e654d 386 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 387
42aa9162
HX
388 dev->mtu = ipgre_tunnel_bind_dev(dev);
389
b37d428b
PE
390 if (register_netdevice(dev) < 0)
391 goto failed_free;
1da177e4 392
1da177e4 393 dev_hold(dev);
f57e7d5a 394 ipgre_tunnel_link(ign, nt);
1da177e4
LT
395 return nt;
396
b37d428b
PE
397failed_free:
398 free_netdev(dev);
1da177e4
LT
399 return NULL;
400}
401
402static void ipgre_tunnel_uninit(struct net_device *dev)
403{
f57e7d5a
PE
404 struct net *net = dev_net(dev);
405 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
406
407 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
408 dev_put(dev);
409}
410
411
412static void ipgre_err(struct sk_buff *skb, u32 info)
413{
1da177e4 414
071f92d0 415/* All the routers (except for Linux) return only
1da177e4
LT
416 8 bytes of packet payload. It means, that precise relaying of
417 ICMP in the real Internet is absolutely infeasible.
418
419 Moreover, Cisco "wise men" put GRE key to the third word
420 in GRE header. It makes impossible maintaining even soft state for keyed
421 GRE tunnels with enabled checksum. Tell them "thank you".
422
423 Well, I wonder, rfc1812 was written by Cisco employee,
424 what the hell these idiots break standrads established
425 by themself???
426 */
427
6ed2533e 428 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 429 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 430 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
431 const int type = icmp_hdr(skb)->type;
432 const int code = icmp_hdr(skb)->code;
1da177e4 433 struct ip_tunnel *t;
d5a0a1e3 434 __be16 flags;
1da177e4
LT
435
436 flags = p[0];
437 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
438 if (flags&(GRE_VERSION|GRE_ROUTING))
439 return;
440 if (flags&GRE_KEY) {
441 grehlen += 4;
442 if (flags&GRE_CSUM)
443 grehlen += 4;
444 }
445 }
446
447 /* If only 8 bytes returned, keyed message will be dropped here */
448 if (skb_headlen(skb) < grehlen)
449 return;
450
451 switch (type) {
452 default:
453 case ICMP_PARAMETERPROB:
454 return;
455
456 case ICMP_DEST_UNREACH:
457 switch (code) {
458 case ICMP_SR_FAILED:
459 case ICMP_PORT_UNREACH:
460 /* Impossible event. */
461 return;
462 case ICMP_FRAG_NEEDED:
463 /* Soft state for pmtu is maintained by IP core. */
464 return;
465 default:
466 /* All others are translated to HOST_UNREACH.
467 rfc2003 contains "deep thoughts" about NET_UNREACH,
468 I believe they are just ether pollution. --ANK
469 */
470 break;
471 }
472 break;
473 case ICMP_TIME_EXCEEDED:
474 if (code != ICMP_EXC_TTL)
475 return;
476 break;
477 }
478
479 read_lock(&ipgre_lock);
749c10f9 480 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
e1a80002
HX
481 flags & GRE_KEY ?
482 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
483 p[1]);
f97c1e0c
JP
484 if (t == NULL || t->parms.iph.daddr == 0 ||
485 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
486 goto out;
487
488 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
489 goto out;
490
da6185d8 491 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
492 t->err_count++;
493 else
494 t->err_count = 1;
495 t->err_time = jiffies;
496out:
497 read_unlock(&ipgre_lock);
498 return;
1da177e4
LT
499}
500
501static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
502{
503 if (INET_ECN_is_ce(iph->tos)) {
504 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 505 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 506 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 507 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
508 }
509 }
510}
511
512static inline u8
513ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
514{
515 u8 inner = 0;
516 if (skb->protocol == htons(ETH_P_IP))
517 inner = old_iph->tos;
518 else if (skb->protocol == htons(ETH_P_IPV6))
519 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
520 return INET_ECN_encapsulate(tos, inner);
521}
522
523static int ipgre_rcv(struct sk_buff *skb)
524{
525 struct iphdr *iph;
526 u8 *h;
d5a0a1e3 527 __be16 flags;
d3bc23e7 528 __sum16 csum = 0;
d5a0a1e3 529 __be32 key = 0;
1da177e4
LT
530 u32 seqno = 0;
531 struct ip_tunnel *tunnel;
532 int offset = 4;
e1a80002 533 __be16 gre_proto;
64194c31 534 unsigned int len;
1da177e4
LT
535
536 if (!pskb_may_pull(skb, 16))
537 goto drop_nolock;
538
eddc9ec5 539 iph = ip_hdr(skb);
1da177e4 540 h = skb->data;
d5a0a1e3 541 flags = *(__be16*)h;
1da177e4
LT
542
543 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
544 /* - Version must be 0.
545 - We do not support routing headers.
546 */
547 if (flags&(GRE_VERSION|GRE_ROUTING))
548 goto drop_nolock;
549
550 if (flags&GRE_CSUM) {
fb286bb2 551 switch (skb->ip_summed) {
84fa7933 552 case CHECKSUM_COMPLETE:
d3bc23e7 553 csum = csum_fold(skb->csum);
fb286bb2
HX
554 if (!csum)
555 break;
556 /* fall through */
557 case CHECKSUM_NONE:
558 skb->csum = 0;
559 csum = __skb_checksum_complete(skb);
84fa7933 560 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
561 }
562 offset += 4;
563 }
564 if (flags&GRE_KEY) {
d5a0a1e3 565 key = *(__be32*)(h + offset);
1da177e4
LT
566 offset += 4;
567 }
568 if (flags&GRE_SEQ) {
d5a0a1e3 569 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
570 offset += 4;
571 }
572 }
573
e1a80002
HX
574 gre_proto = *(__be16 *)(h + 2);
575
1da177e4 576 read_lock(&ipgre_lock);
749c10f9 577 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
e1a80002
HX
578 iph->saddr, iph->daddr, key,
579 gre_proto))) {
addd68eb
PE
580 struct net_device_stats *stats = &tunnel->dev->stats;
581
1da177e4
LT
582 secpath_reset(skb);
583
e1a80002 584 skb->protocol = gre_proto;
1da177e4
LT
585 /* WCCP version 1 and 2 protocol decoding.
586 * - Change protocol to IP
587 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
588 */
e1a80002 589 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 590 skb->protocol = htons(ETH_P_IP);
e905a9ed 591 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
592 offset += 4;
593 }
594
1d069167 595 skb->mac_header = skb->network_header;
4209fb60 596 __pskb_pull(skb, offset);
9c70220b 597 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
598 skb->pkt_type = PACKET_HOST;
599#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 600 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 601 /* Looped back packet, drop it! */
511c3f92 602 if (skb_rtable(skb)->fl.iif == 0)
1da177e4 603 goto drop;
addd68eb 604 stats->multicast++;
1da177e4
LT
605 skb->pkt_type = PACKET_BROADCAST;
606 }
607#endif
608
609 if (((flags&GRE_CSUM) && csum) ||
610 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
611 stats->rx_crc_errors++;
612 stats->rx_errors++;
1da177e4
LT
613 goto drop;
614 }
615 if (tunnel->parms.i_flags&GRE_SEQ) {
616 if (!(flags&GRE_SEQ) ||
617 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
618 stats->rx_fifo_errors++;
619 stats->rx_errors++;
1da177e4
LT
620 goto drop;
621 }
622 tunnel->i_seqno = seqno + 1;
623 }
e1a80002 624
64194c31
HX
625 len = skb->len;
626
e1a80002
HX
627 /* Warning: All skb pointers will be invalidated! */
628 if (tunnel->dev->type == ARPHRD_ETHER) {
629 if (!pskb_may_pull(skb, ETH_HLEN)) {
630 stats->rx_length_errors++;
631 stats->rx_errors++;
632 goto drop;
633 }
634
635 iph = ip_hdr(skb);
636 skb->protocol = eth_type_trans(skb, tunnel->dev);
637 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
638 }
639
addd68eb 640 stats->rx_packets++;
64194c31 641 stats->rx_bytes += len;
1da177e4 642 skb->dev = tunnel->dev;
adf30907 643 skb_dst_drop(skb);
1da177e4 644 nf_reset(skb);
e1a80002
HX
645
646 skb_reset_network_header(skb);
1da177e4 647 ipgre_ecn_decapsulate(iph, skb);
e1a80002 648
1da177e4
LT
649 netif_rx(skb);
650 read_unlock(&ipgre_lock);
651 return(0);
652 }
45af08be 653 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
654
655drop:
656 read_unlock(&ipgre_lock);
657drop_nolock:
658 kfree_skb(skb);
659 return(0);
660}
661
6fef4c0c 662static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 663{
2941a486 664 struct ip_tunnel *tunnel = netdev_priv(dev);
addd68eb 665 struct net_device_stats *stats = &tunnel->dev->stats;
eddc9ec5 666 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
667 struct iphdr *tiph;
668 u8 tos;
d5a0a1e3 669 __be16 df;
1da177e4
LT
670 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */
c2636b4d 673 unsigned int max_headroom; /* The extra header space needed */
1da177e4 674 int gre_hlen;
d5a0a1e3 675 __be32 dst;
1da177e4
LT
676 int mtu;
677
e1a80002
HX
678 if (dev->type == ARPHRD_ETHER)
679 IPCB(skb)->flags = 0;
680
681 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 682 gre_hlen = 0;
6ed2533e 683 tiph = (struct iphdr *)skb->data;
1da177e4
LT
684 } else {
685 gre_hlen = tunnel->hlen;
686 tiph = &tunnel->parms.iph;
687 }
688
689 if ((dst = tiph->daddr) == 0) {
690 /* NBMA tunnel */
691
adf30907 692 if (skb_dst(skb) == NULL) {
addd68eb 693 stats->tx_fifo_errors++;
1da177e4
LT
694 goto tx_error;
695 }
696
697 if (skb->protocol == htons(ETH_P_IP)) {
511c3f92 698 rt = skb_rtable(skb);
1da177e4
LT
699 if ((dst = rt->rt_gateway) == 0)
700 goto tx_error_icmp;
701 }
702#ifdef CONFIG_IPV6
703 else if (skb->protocol == htons(ETH_P_IPV6)) {
704 struct in6_addr *addr6;
705 int addr_type;
adf30907 706 struct neighbour *neigh = skb_dst(skb)->neighbour;
1da177e4
LT
707
708 if (neigh == NULL)
709 goto tx_error;
710
6ed2533e 711 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
712 addr_type = ipv6_addr_type(addr6);
713
714 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 715 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
716 addr_type = ipv6_addr_type(addr6);
717 }
718
719 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
720 goto tx_error_icmp;
721
722 dst = addr6->s6_addr32[3];
723 }
724#endif
725 else
726 goto tx_error;
727 }
728
729 tos = tiph->tos;
ee686ca9
AJ
730 if (tos == 1) {
731 tos = 0;
1da177e4
LT
732 if (skb->protocol == htons(ETH_P_IP))
733 tos = old_iph->tos;
1da177e4
LT
734 }
735
736 {
737 struct flowi fl = { .oif = tunnel->parms.link,
738 .nl_u = { .ip4_u =
739 { .daddr = dst,
740 .saddr = tiph->saddr,
741 .tos = RT_TOS(tos) } },
742 .proto = IPPROTO_GRE };
96635522 743 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 744 stats->tx_carrier_errors++;
1da177e4
LT
745 goto tx_error;
746 }
747 }
748 tdev = rt->u.dst.dev;
749
750 if (tdev == dev) {
751 ip_rt_put(rt);
addd68eb 752 stats->collisions++;
1da177e4
LT
753 goto tx_error;
754 }
755
756 df = tiph->frag_off;
757 if (df)
c95b819a 758 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4 759 else
adf30907 760 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
1da177e4 761
adf30907
ED
762 if (skb_dst(skb))
763 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4
LT
764
765 if (skb->protocol == htons(ETH_P_IP)) {
766 df |= (old_iph->frag_off&htons(IP_DF));
767
768 if ((old_iph->frag_off&htons(IP_DF)) &&
769 mtu < ntohs(old_iph->tot_len)) {
770 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
771 ip_rt_put(rt);
772 goto tx_error;
773 }
774 }
775#ifdef CONFIG_IPV6
776 else if (skb->protocol == htons(ETH_P_IPV6)) {
adf30907 777 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
1da177e4 778
adf30907 779 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
780 if ((tunnel->parms.iph.daddr &&
781 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
782 rt6->rt6i_dst.plen == 128) {
783 rt6->rt6i_flags |= RTF_MODIFIED;
adf30907 784 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
1da177e4
LT
785 }
786 }
787
788 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
789 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
790 ip_rt_put(rt);
791 goto tx_error;
792 }
793 }
794#endif
795
796 if (tunnel->err_count > 0) {
da6185d8
WY
797 if (time_before(jiffies,
798 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
799 tunnel->err_count--;
800
801 dst_link_failure(skb);
802 } else
803 tunnel->err_count = 0;
804 }
805
806 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
807
cfbba49d
PM
808 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
809 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
810 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
811 if (!new_skb) {
812 ip_rt_put(rt);
e905a9ed 813 stats->tx_dropped++;
1da177e4 814 dev_kfree_skb(skb);
6ed10654 815 return NETDEV_TX_OK;
1da177e4
LT
816 }
817 if (skb->sk)
818 skb_set_owner_w(new_skb, skb->sk);
819 dev_kfree_skb(skb);
820 skb = new_skb;
eddc9ec5 821 old_iph = ip_hdr(skb);
1da177e4
LT
822 }
823
64194c31 824 skb_reset_transport_header(skb);
e2d1bca7
ACM
825 skb_push(skb, gre_hlen);
826 skb_reset_network_header(skb);
1da177e4 827 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
828 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
829 IPSKB_REROUTED);
adf30907
ED
830 skb_dst_drop(skb);
831 skb_dst_set(skb, &rt->u.dst);
1da177e4
LT
832
833 /*
834 * Push down and install the IPIP header.
835 */
836
eddc9ec5 837 iph = ip_hdr(skb);
1da177e4
LT
838 iph->version = 4;
839 iph->ihl = sizeof(struct iphdr) >> 2;
840 iph->frag_off = df;
841 iph->protocol = IPPROTO_GRE;
842 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
843 iph->daddr = rt->rt_dst;
844 iph->saddr = rt->rt_src;
845
846 if ((iph->ttl = tiph->ttl) == 0) {
847 if (skb->protocol == htons(ETH_P_IP))
848 iph->ttl = old_iph->ttl;
849#ifdef CONFIG_IPV6
850 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 851 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
852#endif
853 else
854 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
855 }
856
e1a80002
HX
857 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
858 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
859 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
860
861 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 862 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
863
864 if (tunnel->parms.o_flags&GRE_SEQ) {
865 ++tunnel->o_seqno;
866 *ptr = htonl(tunnel->o_seqno);
867 ptr--;
868 }
869 if (tunnel->parms.o_flags&GRE_KEY) {
870 *ptr = tunnel->parms.o_key;
871 ptr--;
872 }
873 if (tunnel->parms.o_flags&GRE_CSUM) {
874 *ptr = 0;
5f92a738 875 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
876 }
877 }
878
879 nf_reset(skb);
880
881 IPTUNNEL_XMIT();
6ed10654 882 return NETDEV_TX_OK;
1da177e4
LT
883
884tx_error_icmp:
885 dst_link_failure(skb);
886
887tx_error:
888 stats->tx_errors++;
889 dev_kfree_skb(skb);
6ed10654 890 return NETDEV_TX_OK;
1da177e4
LT
891}
892
42aa9162 893static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
894{
895 struct net_device *tdev = NULL;
896 struct ip_tunnel *tunnel;
897 struct iphdr *iph;
898 int hlen = LL_MAX_HEADER;
899 int mtu = ETH_DATA_LEN;
900 int addend = sizeof(struct iphdr) + 4;
901
902 tunnel = netdev_priv(dev);
903 iph = &tunnel->parms.iph;
904
c95b819a 905 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
906
907 if (iph->daddr) {
908 struct flowi fl = { .oif = tunnel->parms.link,
909 .nl_u = { .ip4_u =
910 { .daddr = iph->daddr,
911 .saddr = iph->saddr,
912 .tos = RT_TOS(iph->tos) } },
913 .proto = IPPROTO_GRE };
914 struct rtable *rt;
96635522 915 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
916 tdev = rt->u.dst.dev;
917 ip_rt_put(rt);
918 }
e1a80002
HX
919
920 if (dev->type != ARPHRD_ETHER)
921 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
922 }
923
924 if (!tdev && tunnel->parms.link)
96635522 925 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
926
927 if (tdev) {
c95b819a 928 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
929 mtu = tdev->mtu;
930 }
931 dev->iflink = tunnel->parms.link;
932
933 /* Precalculate GRE options length */
934 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
935 if (tunnel->parms.o_flags&GRE_CSUM)
936 addend += 4;
937 if (tunnel->parms.o_flags&GRE_KEY)
938 addend += 4;
939 if (tunnel->parms.o_flags&GRE_SEQ)
940 addend += 4;
941 }
c95b819a 942 dev->needed_headroom = addend + hlen;
8cdb0456 943 mtu -= dev->hard_header_len + addend;
42aa9162
HX
944
945 if (mtu < 68)
946 mtu = 68;
947
ee34c1eb
MS
948 tunnel->hlen = addend;
949
42aa9162 950 return mtu;
ee34c1eb
MS
951}
952
1da177e4
LT
953static int
954ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
955{
956 int err = 0;
957 struct ip_tunnel_parm p;
958 struct ip_tunnel *t;
f57e7d5a
PE
959 struct net *net = dev_net(dev);
960 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
961
962 switch (cmd) {
963 case SIOCGETTUNNEL:
964 t = NULL;
7daa0004 965 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
966 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
967 err = -EFAULT;
968 break;
969 }
f57e7d5a 970 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
971 }
972 if (t == NULL)
2941a486 973 t = netdev_priv(dev);
1da177e4
LT
974 memcpy(&p, &t->parms, sizeof(p));
975 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
976 err = -EFAULT;
977 break;
978
979 case SIOCADDTUNNEL:
980 case SIOCCHGTUNNEL:
981 err = -EPERM;
982 if (!capable(CAP_NET_ADMIN))
983 goto done;
984
985 err = -EFAULT;
986 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
987 goto done;
988
989 err = -EINVAL;
990 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
991 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
992 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
993 goto done;
994 if (p.iph.ttl)
995 p.iph.frag_off |= htons(IP_DF);
996
997 if (!(p.i_flags&GRE_KEY))
998 p.i_key = 0;
999 if (!(p.o_flags&GRE_KEY))
1000 p.o_key = 0;
1001
f57e7d5a 1002 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1003
7daa0004 1004 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1005 if (t != NULL) {
1006 if (t->dev != dev) {
1007 err = -EEXIST;
1008 break;
1009 }
1010 } else {
6ed2533e 1011 unsigned nflags = 0;
1da177e4 1012
2941a486 1013 t = netdev_priv(dev);
1da177e4 1014
f97c1e0c 1015 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1016 nflags = IFF_BROADCAST;
1017 else if (p.iph.daddr)
1018 nflags = IFF_POINTOPOINT;
1019
1020 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1021 err = -EINVAL;
1022 break;
1023 }
f57e7d5a 1024 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1025 t->parms.iph.saddr = p.iph.saddr;
1026 t->parms.iph.daddr = p.iph.daddr;
1027 t->parms.i_key = p.i_key;
1028 t->parms.o_key = p.o_key;
1029 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1030 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1031 ipgre_tunnel_link(ign, t);
1da177e4
LT
1032 netdev_state_change(dev);
1033 }
1034 }
1035
1036 if (t) {
1037 err = 0;
1038 if (cmd == SIOCCHGTUNNEL) {
1039 t->parms.iph.ttl = p.iph.ttl;
1040 t->parms.iph.tos = p.iph.tos;
1041 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1042 if (t->parms.link != p.link) {
1043 t->parms.link = p.link;
42aa9162 1044 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1045 netdev_state_change(dev);
1046 }
1da177e4
LT
1047 }
1048 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1049 err = -EFAULT;
1050 } else
1051 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1052 break;
1053
1054 case SIOCDELTUNNEL:
1055 err = -EPERM;
1056 if (!capable(CAP_NET_ADMIN))
1057 goto done;
1058
7daa0004 1059 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1060 err = -EFAULT;
1061 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1062 goto done;
1063 err = -ENOENT;
f57e7d5a 1064 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1065 goto done;
1066 err = -EPERM;
7daa0004 1067 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1068 goto done;
1069 dev = t->dev;
1070 }
22f8cde5
SH
1071 unregister_netdevice(dev);
1072 err = 0;
1da177e4
LT
1073 break;
1074
1075 default:
1076 err = -EINVAL;
1077 }
1078
1079done:
1080 return err;
1081}
1082
1da177e4
LT
1083static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1084{
2941a486 1085 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1086 if (new_mtu < 68 ||
1087 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1088 return -EINVAL;
1089 dev->mtu = new_mtu;
1090 return 0;
1091}
1092
1da177e4
LT
1093/* Nice toy. Unfortunately, useless in real life :-)
1094 It allows to construct virtual multiprotocol broadcast "LAN"
1095 over the Internet, provided multicast routing is tuned.
1096
1097
1098 I have no idea was this bicycle invented before me,
1099 so that I had to set ARPHRD_IPGRE to a random value.
1100 I have an impression, that Cisco could make something similar,
1101 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1102
1da177e4
LT
1103 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1104 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1105
1106 ping -t 255 224.66.66.66
1107
1108 If nobody answers, mbone does not work.
1109
1110 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1111 ip addr add 10.66.66.<somewhat>/24 dev Universe
1112 ifconfig Universe up
1113 ifconfig Universe add fe80::<Your_real_addr>/10
1114 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1115 ftp 10.66.66.66
1116 ...
1117 ftp fec0:6666:6666::193.233.7.65
1118 ...
1119
1120 */
1121
3b04ddde
SH
1122static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1123 unsigned short type,
1124 const void *daddr, const void *saddr, unsigned len)
1da177e4 1125{
2941a486 1126 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1127 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1128 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1129
1130 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1131 p[0] = t->parms.o_flags;
1132 p[1] = htons(type);
1133
1134 /*
e905a9ed 1135 * Set the source hardware address.
1da177e4 1136 */
e905a9ed 1137
1da177e4
LT
1138 if (saddr)
1139 memcpy(&iph->saddr, saddr, 4);
1140
1141 if (daddr) {
1142 memcpy(&iph->daddr, daddr, 4);
1143 return t->hlen;
1144 }
f97c1e0c 1145 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1146 return t->hlen;
e905a9ed 1147
1da177e4
LT
1148 return -t->hlen;
1149}
1150
6a5f44d7
TT
1151static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1152{
6ed2533e 1153 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1154 memcpy(haddr, &iph->saddr, 4);
1155 return 4;
1156}
1157
3b04ddde
SH
1158static const struct header_ops ipgre_header_ops = {
1159 .create = ipgre_header,
6a5f44d7 1160 .parse = ipgre_header_parse,
3b04ddde
SH
1161};
1162
6a5f44d7 1163#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1164static int ipgre_open(struct net_device *dev)
1165{
2941a486 1166 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1167
f97c1e0c 1168 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1169 struct flowi fl = { .oif = t->parms.link,
1170 .nl_u = { .ip4_u =
1171 { .daddr = t->parms.iph.daddr,
1172 .saddr = t->parms.iph.saddr,
1173 .tos = RT_TOS(t->parms.iph.tos) } },
1174 .proto = IPPROTO_GRE };
1175 struct rtable *rt;
96635522 1176 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1177 return -EADDRNOTAVAIL;
1178 dev = rt->u.dst.dev;
1179 ip_rt_put(rt);
e5ed6399 1180 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1181 return -EADDRNOTAVAIL;
1182 t->mlink = dev->ifindex;
e5ed6399 1183 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1184 }
1185 return 0;
1186}
1187
1188static int ipgre_close(struct net_device *dev)
1189{
2941a486 1190 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1191
f97c1e0c 1192 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1193 struct in_device *in_dev;
c346dca1 1194 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1195 if (in_dev) {
1196 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1197 in_dev_put(in_dev);
1198 }
1199 }
1200 return 0;
1201}
1202
1203#endif
1204
b8c26a33
SH
1205static const struct net_device_ops ipgre_netdev_ops = {
1206 .ndo_init = ipgre_tunnel_init,
1207 .ndo_uninit = ipgre_tunnel_uninit,
1208#ifdef CONFIG_NET_IPGRE_BROADCAST
1209 .ndo_open = ipgre_open,
1210 .ndo_stop = ipgre_close,
1211#endif
1212 .ndo_start_xmit = ipgre_tunnel_xmit,
1213 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1214 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1215};
1216
1da177e4
LT
1217static void ipgre_tunnel_setup(struct net_device *dev)
1218{
b8c26a33 1219 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1220 dev->destructor = free_netdev;
1da177e4
LT
1221
1222 dev->type = ARPHRD_IPGRE;
c95b819a 1223 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1224 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1225 dev->flags = IFF_NOARP;
1226 dev->iflink = 0;
1227 dev->addr_len = 4;
0b67eceb 1228 dev->features |= NETIF_F_NETNS_LOCAL;
108bfa89 1229 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1da177e4
LT
1230}
1231
1232static int ipgre_tunnel_init(struct net_device *dev)
1233{
1da177e4
LT
1234 struct ip_tunnel *tunnel;
1235 struct iphdr *iph;
1da177e4 1236
2941a486 1237 tunnel = netdev_priv(dev);
1da177e4
LT
1238 iph = &tunnel->parms.iph;
1239
1240 tunnel->dev = dev;
1241 strcpy(tunnel->parms.name, dev->name);
1242
1243 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1244 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1245
1da177e4 1246 if (iph->daddr) {
1da177e4 1247#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1248 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1249 if (!iph->saddr)
1250 return -EINVAL;
1251 dev->flags = IFF_BROADCAST;
3b04ddde 1252 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1253 }
1254#endif
ee34c1eb 1255 } else
6a5f44d7 1256 dev->header_ops = &ipgre_header_ops;
1da177e4 1257
1da177e4
LT
1258 return 0;
1259}
1260
b8c26a33 1261static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1262{
2941a486 1263 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1264 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1265 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1266
1267 tunnel->dev = dev;
1268 strcpy(tunnel->parms.name, dev->name);
1269
1270 iph->version = 4;
1271 iph->protocol = IPPROTO_GRE;
1272 iph->ihl = 5;
1273 tunnel->hlen = sizeof(struct iphdr) + 4;
1274
1275 dev_hold(dev);
eb8ce741 1276 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1277}
1278
1279
32613090 1280static const struct net_protocol ipgre_protocol = {
1da177e4
LT
1281 .handler = ipgre_rcv,
1282 .err_handler = ipgre_err,
f96c148f 1283 .netns_ok = 1,
1da177e4
LT
1284};
1285
eb8ce741
PE
1286static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1287{
1288 int prio;
1289
1290 for (prio = 0; prio < 4; prio++) {
1291 int h;
1292 for (h = 0; h < HASH_SIZE; h++) {
1293 struct ip_tunnel *t;
1294 while ((t = ign->tunnels[prio][h]) != NULL)
1295 unregister_netdevice(t->dev);
1296 }
1297 }
1298}
1299
59a4c759
PE
1300static int ipgre_init_net(struct net *net)
1301{
1302 int err;
1303 struct ipgre_net *ign;
1304
1305 err = -ENOMEM;
eb8ce741 1306 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
59a4c759
PE
1307 if (ign == NULL)
1308 goto err_alloc;
1309
1310 err = net_assign_generic(net, ipgre_net_id, ign);
1311 if (err < 0)
1312 goto err_assign;
1313
7daa0004
PE
1314 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1315 ipgre_tunnel_setup);
1316 if (!ign->fb_tunnel_dev) {
1317 err = -ENOMEM;
1318 goto err_alloc_dev;
1319 }
be77e593 1320 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1321
b8c26a33 1322 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1323 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1324
1325 if ((err = register_netdev(ign->fb_tunnel_dev)))
1326 goto err_reg_dev;
1327
59a4c759
PE
1328 return 0;
1329
7daa0004
PE
1330err_reg_dev:
1331 free_netdev(ign->fb_tunnel_dev);
1332err_alloc_dev:
1333 /* nothing */
59a4c759
PE
1334err_assign:
1335 kfree(ign);
1336err_alloc:
1337 return err;
1338}
1339
1340static void ipgre_exit_net(struct net *net)
1341{
1342 struct ipgre_net *ign;
1343
1344 ign = net_generic(net, ipgre_net_id);
7daa0004 1345 rtnl_lock();
eb8ce741 1346 ipgre_destroy_tunnels(ign);
7daa0004 1347 rtnl_unlock();
59a4c759
PE
1348 kfree(ign);
1349}
1350
1351static struct pernet_operations ipgre_net_ops = {
1352 .init = ipgre_init_net,
1353 .exit = ipgre_exit_net,
1354};
1da177e4 1355
c19e654d
HX
1356static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1357{
1358 __be16 flags;
1359
1360 if (!data)
1361 return 0;
1362
1363 flags = 0;
1364 if (data[IFLA_GRE_IFLAGS])
1365 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1366 if (data[IFLA_GRE_OFLAGS])
1367 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368 if (flags & (GRE_VERSION|GRE_ROUTING))
1369 return -EINVAL;
1370
1371 return 0;
1372}
1373
e1a80002
HX
1374static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1375{
1376 __be32 daddr;
1377
1378 if (tb[IFLA_ADDRESS]) {
1379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1380 return -EINVAL;
1381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1382 return -EADDRNOTAVAIL;
1383 }
1384
1385 if (!data)
1386 goto out;
1387
1388 if (data[IFLA_GRE_REMOTE]) {
1389 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1390 if (!daddr)
1391 return -EINVAL;
1392 }
1393
1394out:
1395 return ipgre_tunnel_validate(tb, data);
1396}
1397
c19e654d
HX
1398static void ipgre_netlink_parms(struct nlattr *data[],
1399 struct ip_tunnel_parm *parms)
1400{
7bb82d92 1401 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1402
1403 parms->iph.protocol = IPPROTO_GRE;
1404
1405 if (!data)
1406 return;
1407
1408 if (data[IFLA_GRE_LINK])
1409 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1410
1411 if (data[IFLA_GRE_IFLAGS])
1412 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1413
1414 if (data[IFLA_GRE_OFLAGS])
1415 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1416
1417 if (data[IFLA_GRE_IKEY])
1418 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1419
1420 if (data[IFLA_GRE_OKEY])
1421 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1422
1423 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1424 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1425
1426 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1427 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1428
1429 if (data[IFLA_GRE_TTL])
1430 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1431
1432 if (data[IFLA_GRE_TOS])
1433 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1434
1435 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1436 parms->iph.frag_off = htons(IP_DF);
1437}
1438
e1a80002
HX
1439static int ipgre_tap_init(struct net_device *dev)
1440{
1441 struct ip_tunnel *tunnel;
1442
1443 tunnel = netdev_priv(dev);
1444
1445 tunnel->dev = dev;
1446 strcpy(tunnel->parms.name, dev->name);
1447
1448 ipgre_tunnel_bind_dev(dev);
1449
1450 return 0;
1451}
1452
b8c26a33
SH
1453static const struct net_device_ops ipgre_tap_netdev_ops = {
1454 .ndo_init = ipgre_tap_init,
1455 .ndo_uninit = ipgre_tunnel_uninit,
1456 .ndo_start_xmit = ipgre_tunnel_xmit,
1457 .ndo_set_mac_address = eth_mac_addr,
1458 .ndo_validate_addr = eth_validate_addr,
1459 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1460};
1461
e1a80002
HX
1462static void ipgre_tap_setup(struct net_device *dev)
1463{
1464
1465 ether_setup(dev);
1466
b8c26a33 1467 dev->netdev_ops = &ipgre_netdev_ops;
e1a80002 1468 dev->destructor = free_netdev;
e1a80002
HX
1469
1470 dev->iflink = 0;
1471 dev->features |= NETIF_F_NETNS_LOCAL;
1472}
1473
c19e654d
HX
1474static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1475 struct nlattr *data[])
1476{
1477 struct ip_tunnel *nt;
1478 struct net *net = dev_net(dev);
1479 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1480 int mtu;
1481 int err;
1482
1483 nt = netdev_priv(dev);
1484 ipgre_netlink_parms(data, &nt->parms);
1485
e1a80002 1486 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1487 return -EEXIST;
1488
e1a80002
HX
1489 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1490 random_ether_addr(dev->dev_addr);
1491
c19e654d
HX
1492 mtu = ipgre_tunnel_bind_dev(dev);
1493 if (!tb[IFLA_MTU])
1494 dev->mtu = mtu;
1495
1496 err = register_netdevice(dev);
1497 if (err)
1498 goto out;
1499
1500 dev_hold(dev);
1501 ipgre_tunnel_link(ign, nt);
1502
1503out:
1504 return err;
1505}
1506
1507static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1508 struct nlattr *data[])
1509{
1510 struct ip_tunnel *t, *nt;
1511 struct net *net = dev_net(dev);
1512 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1513 struct ip_tunnel_parm p;
1514 int mtu;
1515
1516 if (dev == ign->fb_tunnel_dev)
1517 return -EINVAL;
1518
1519 nt = netdev_priv(dev);
1520 ipgre_netlink_parms(data, &p);
1521
1522 t = ipgre_tunnel_locate(net, &p, 0);
1523
1524 if (t) {
1525 if (t->dev != dev)
1526 return -EEXIST;
1527 } else {
1528 unsigned nflags = 0;
1529
1530 t = nt;
1531
1532 if (ipv4_is_multicast(p.iph.daddr))
1533 nflags = IFF_BROADCAST;
1534 else if (p.iph.daddr)
1535 nflags = IFF_POINTOPOINT;
1536
1537 if ((dev->flags ^ nflags) &
1538 (IFF_POINTOPOINT | IFF_BROADCAST))
1539 return -EINVAL;
1540
1541 ipgre_tunnel_unlink(ign, t);
1542 t->parms.iph.saddr = p.iph.saddr;
1543 t->parms.iph.daddr = p.iph.daddr;
1544 t->parms.i_key = p.i_key;
1545 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1546 memcpy(dev->broadcast, &p.iph.daddr, 4);
1547 ipgre_tunnel_link(ign, t);
1548 netdev_state_change(dev);
1549 }
1550
1551 t->parms.o_key = p.o_key;
1552 t->parms.iph.ttl = p.iph.ttl;
1553 t->parms.iph.tos = p.iph.tos;
1554 t->parms.iph.frag_off = p.iph.frag_off;
1555
1556 if (t->parms.link != p.link) {
1557 t->parms.link = p.link;
1558 mtu = ipgre_tunnel_bind_dev(dev);
1559 if (!tb[IFLA_MTU])
1560 dev->mtu = mtu;
1561 netdev_state_change(dev);
1562 }
1563
1564 return 0;
1565}
1566
1567static size_t ipgre_get_size(const struct net_device *dev)
1568{
1569 return
1570 /* IFLA_GRE_LINK */
1571 nla_total_size(4) +
1572 /* IFLA_GRE_IFLAGS */
1573 nla_total_size(2) +
1574 /* IFLA_GRE_OFLAGS */
1575 nla_total_size(2) +
1576 /* IFLA_GRE_IKEY */
1577 nla_total_size(4) +
1578 /* IFLA_GRE_OKEY */
1579 nla_total_size(4) +
1580 /* IFLA_GRE_LOCAL */
1581 nla_total_size(4) +
1582 /* IFLA_GRE_REMOTE */
1583 nla_total_size(4) +
1584 /* IFLA_GRE_TTL */
1585 nla_total_size(1) +
1586 /* IFLA_GRE_TOS */
1587 nla_total_size(1) +
1588 /* IFLA_GRE_PMTUDISC */
1589 nla_total_size(1) +
1590 0;
1591}
1592
1593static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1594{
1595 struct ip_tunnel *t = netdev_priv(dev);
1596 struct ip_tunnel_parm *p = &t->parms;
1597
1598 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1599 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1600 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1601 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1602 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1603 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1604 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1605 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1606 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1607 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1608
1609 return 0;
1610
1611nla_put_failure:
1612 return -EMSGSIZE;
1613}
1614
1615static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1616 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1617 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1618 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1619 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1620 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1621 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1622 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1623 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1624 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1625 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1626};
1627
1628static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1629 .kind = "gre",
1630 .maxtype = IFLA_GRE_MAX,
1631 .policy = ipgre_policy,
1632 .priv_size = sizeof(struct ip_tunnel),
1633 .setup = ipgre_tunnel_setup,
1634 .validate = ipgre_tunnel_validate,
1635 .newlink = ipgre_newlink,
1636 .changelink = ipgre_changelink,
1637 .get_size = ipgre_get_size,
1638 .fill_info = ipgre_fill_info,
1639};
1640
e1a80002
HX
1641static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1642 .kind = "gretap",
1643 .maxtype = IFLA_GRE_MAX,
1644 .policy = ipgre_policy,
1645 .priv_size = sizeof(struct ip_tunnel),
1646 .setup = ipgre_tap_setup,
1647 .validate = ipgre_tap_validate,
1648 .newlink = ipgre_newlink,
1649 .changelink = ipgre_changelink,
1650 .get_size = ipgre_get_size,
1651 .fill_info = ipgre_fill_info,
1652};
1653
1da177e4
LT
1654/*
1655 * And now the modules code and kernel interface.
1656 */
1657
1658static int __init ipgre_init(void)
1659{
1660 int err;
1661
1662 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1663
1664 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1665 printk(KERN_INFO "ipgre init: can't add protocol\n");
1666 return -EAGAIN;
1667 }
1668
59a4c759
PE
1669 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1670 if (err < 0)
c19e654d 1671 goto gen_device_failed;
7daa0004 1672
c19e654d
HX
1673 err = rtnl_link_register(&ipgre_link_ops);
1674 if (err < 0)
1675 goto rtnl_link_failed;
1676
e1a80002
HX
1677 err = rtnl_link_register(&ipgre_tap_ops);
1678 if (err < 0)
1679 goto tap_ops_failed;
1680
c19e654d 1681out:
1da177e4 1682 return err;
c19e654d 1683
e1a80002
HX
1684tap_ops_failed:
1685 rtnl_link_unregister(&ipgre_link_ops);
c19e654d
HX
1686rtnl_link_failed:
1687 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1688gen_device_failed:
1689 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1690 goto out;
1da177e4
LT
1691}
1692
db44575f 1693static void __exit ipgre_fini(void)
1da177e4 1694{
e1a80002 1695 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d
HX
1696 rtnl_link_unregister(&ipgre_link_ops);
1697 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1698 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1699 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1da177e4
LT
1700}
1701
1702module_init(ipgre_init);
1703module_exit(ipgre_fini);
1704MODULE_LICENSE("GPL");
4d74f8ba
PM
1705MODULE_ALIAS_RTNL_LINK("gre");
1706MODULE_ALIAS_RTNL_LINK("gretap");