]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
ipip: get rid of ipip_lock
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4 16#include <linux/kernel.h>
5a0e3ad6 17#include <linux/slab.h>
1da177e4
LT
18#include <asm/uaccess.h>
19#include <linux/skbuff.h>
20#include <linux/netdevice.h>
21#include <linux/in.h>
22#include <linux/tcp.h>
23#include <linux/udp.h>
24#include <linux/if_arp.h>
25#include <linux/mroute.h>
26#include <linux/init.h>
27#include <linux/in6.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/netfilter_ipv4.h>
e1a80002 31#include <linux/etherdevice.h>
46f25dff 32#include <linux/if_ether.h>
1da177e4
LT
33
34#include <net/sock.h>
35#include <net/ip.h>
36#include <net/icmp.h>
37#include <net/protocol.h>
38#include <net/ipip.h>
39#include <net/arp.h>
40#include <net/checksum.h>
41#include <net/dsfield.h>
42#include <net/inet_ecn.h>
43#include <net/xfrm.h>
59a4c759
PE
44#include <net/net_namespace.h>
45#include <net/netns/generic.h>
c19e654d 46#include <net/rtnetlink.h>
00959ade 47#include <net/gre.h>
1da177e4
LT
48
49#ifdef CONFIG_IPV6
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#endif
54
55/*
56 Problems & solutions
57 --------------------
58
59 1. The most important issue is detecting local dead loops.
60 They would cause complete host lockup in transmit, which
61 would be "resolved" by stack overflow or, if queueing is enabled,
62 with infinite looping in net_bh.
63
64 We cannot track such dead loops during route installation,
65 it is infeasible task. The most general solutions would be
66 to keep skb->encapsulation counter (sort of local ttl),
67 and silently drop packet when it expires. It is the best
68 solution, but it supposes maintaing new variable in ALL
69 skb, even if no tunneling is used.
70
a43912ab 71 Current solution: HARD_TX_LOCK lock breaks dead loops.
1da177e4
LT
72
73
74
75 2. Networking dead loops would not kill routers, but would really
76 kill network. IP hop limit plays role of "t->recursion" in this case,
77 if we copy it from packet being encapsulated to upper header.
78 It is very good solution, but it introduces two problems:
79
80 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
81 do not work over tunnels.
82 - traceroute does not work. I planned to relay ICMP from tunnel,
83 so that this problem would be solved and traceroute output
84 would even more informative. This idea appeared to be wrong:
85 only Linux complies to rfc1812 now (yes, guys, Linux is the only
86 true router now :-)), all routers (at least, in neighbourhood of mine)
87 return only 8 bytes of payload. It is the end.
88
89 Hence, if we want that OSPF worked or traceroute said something reasonable,
90 we should search for another solution.
91
92 One of them is to parse packet trying to detect inner encapsulation
93 made by our node. It is difficult or even impossible, especially,
94 taking into account fragmentation. TO be short, tt is not solution at all.
95
96 Current solution: The solution was UNEXPECTEDLY SIMPLE.
97 We force DF flag on tunnels with preconfigured hop limit,
98 that is ALL. :-) Well, it does not remove the problem completely,
99 but exponential growth of network traffic is changed to linear
100 (branches, that exceed pmtu are pruned) and tunnel mtu
101 fastly degrades to value <68, where looping stops.
102 Yes, it is not good if there exists a router in the loop,
103 which does not force DF, even when encapsulating packets have DF set.
104 But it is not our problem! Nobody could accuse us, we made
105 all that we could make. Even if it is your gated who injected
106 fatal route to network, even if it were you who configured
107 fatal static route: you are innocent. :-)
108
109
110
111 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
112 practically identical code. It would be good to glue them
113 together, but it is not very evident, how to make them modular.
114 sit is integral part of IPv6, ipip and gre are naturally modular.
115 We could extract common parts (hash table, ioctl etc)
116 to a separate module (ip_tunnel.c).
117
118 Alexey Kuznetsov.
119 */
120
c19e654d 121static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
122static int ipgre_tunnel_init(struct net_device *dev);
123static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 124static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
125
126/* Fallback tunnel: no source, no destination, no key, no options */
127
eb8ce741
PE
128#define HASH_SIZE 16
129
f99189b1 130static int ipgre_net_id __read_mostly;
59a4c759 131struct ipgre_net {
eb8ce741
PE
132 struct ip_tunnel *tunnels[4][HASH_SIZE];
133
7daa0004 134 struct net_device *fb_tunnel_dev;
59a4c759
PE
135};
136
1da177e4
LT
137/* Tunnel hash table */
138
139/*
140 4 hash tables:
141
142 3: (remote,local)
143 2: (remote,*)
144 1: (*,local)
145 0: (*,*)
146
147 We require exact key match i.e. if a key is present in packet
148 it will match only tunnel with the same key; if it is not present,
149 it will match only keyless tunnel.
150
151 All keysless packets, if not matched configured keyless tunnels
152 will match fallback tunnel.
153 */
154
d5a0a1e3 155#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 156
eb8ce741
PE
157#define tunnels_r_l tunnels[3]
158#define tunnels_r tunnels[2]
159#define tunnels_l tunnels[1]
160#define tunnels_wc tunnels[0]
8d5b2c08
ED
161/*
162 * Locking : hash tables are protected by RCU and a spinlock
163 */
164static DEFINE_SPINLOCK(ipgre_lock);
1da177e4 165
8d5b2c08
ED
166#define for_each_ip_tunnel_rcu(start) \
167 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1da177e4
LT
168
169/* Given src, dst and key, find appropriate for input tunnel. */
170
749c10f9 171static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
e1a80002
HX
172 __be32 remote, __be32 local,
173 __be32 key, __be16 gre_proto)
1da177e4 174{
749c10f9
TT
175 struct net *net = dev_net(dev);
176 int link = dev->ifindex;
1da177e4
LT
177 unsigned h0 = HASH(remote);
178 unsigned h1 = HASH(key);
afcf1242 179 struct ip_tunnel *t, *cand = NULL;
7daa0004 180 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
181 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
182 ARPHRD_ETHER : ARPHRD_IPGRE;
afcf1242 183 int score, cand_score = 4;
1da177e4 184
8d5b2c08 185 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
749c10f9
TT
186 if (local != t->parms.iph.saddr ||
187 remote != t->parms.iph.daddr ||
188 key != t->parms.i_key ||
189 !(t->dev->flags & IFF_UP))
190 continue;
191
192 if (t->dev->type != ARPHRD_IPGRE &&
193 t->dev->type != dev_type)
194 continue;
195
afcf1242 196 score = 0;
749c10f9 197 if (t->parms.link != link)
afcf1242 198 score |= 1;
749c10f9 199 if (t->dev->type != dev_type)
afcf1242
TT
200 score |= 2;
201 if (score == 0)
749c10f9 202 return t;
afcf1242
TT
203
204 if (score < cand_score) {
205 cand = t;
206 cand_score = score;
207 }
1da177e4 208 }
e1a80002 209
8d5b2c08 210 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
749c10f9
TT
211 if (remote != t->parms.iph.daddr ||
212 key != t->parms.i_key ||
213 !(t->dev->flags & IFF_UP))
214 continue;
215
216 if (t->dev->type != ARPHRD_IPGRE &&
217 t->dev->type != dev_type)
218 continue;
219
afcf1242 220 score = 0;
749c10f9 221 if (t->parms.link != link)
afcf1242 222 score |= 1;
749c10f9 223 if (t->dev->type != dev_type)
afcf1242
TT
224 score |= 2;
225 if (score == 0)
749c10f9 226 return t;
afcf1242
TT
227
228 if (score < cand_score) {
229 cand = t;
230 cand_score = score;
231 }
1da177e4 232 }
e1a80002 233
8d5b2c08 234 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
749c10f9
TT
235 if ((local != t->parms.iph.saddr &&
236 (local != t->parms.iph.daddr ||
237 !ipv4_is_multicast(local))) ||
238 key != t->parms.i_key ||
239 !(t->dev->flags & IFF_UP))
240 continue;
241
242 if (t->dev->type != ARPHRD_IPGRE &&
243 t->dev->type != dev_type)
244 continue;
245
afcf1242 246 score = 0;
749c10f9 247 if (t->parms.link != link)
afcf1242 248 score |= 1;
749c10f9 249 if (t->dev->type != dev_type)
afcf1242
TT
250 score |= 2;
251 if (score == 0)
749c10f9 252 return t;
afcf1242
TT
253
254 if (score < cand_score) {
255 cand = t;
256 cand_score = score;
257 }
1da177e4 258 }
e1a80002 259
8d5b2c08 260 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
749c10f9
TT
261 if (t->parms.i_key != key ||
262 !(t->dev->flags & IFF_UP))
263 continue;
264
265 if (t->dev->type != ARPHRD_IPGRE &&
266 t->dev->type != dev_type)
267 continue;
268
afcf1242 269 score = 0;
749c10f9 270 if (t->parms.link != link)
afcf1242 271 score |= 1;
749c10f9 272 if (t->dev->type != dev_type)
afcf1242
TT
273 score |= 2;
274 if (score == 0)
749c10f9 275 return t;
afcf1242
TT
276
277 if (score < cand_score) {
278 cand = t;
279 cand_score = score;
280 }
1da177e4
LT
281 }
282
afcf1242
TT
283 if (cand != NULL)
284 return cand;
e1a80002 285
8d5b2c08
ED
286 dev = ign->fb_tunnel_dev;
287 if (dev->flags & IFF_UP)
288 return netdev_priv(dev);
749c10f9 289
1da177e4
LT
290 return NULL;
291}
292
f57e7d5a
PE
293static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
294 struct ip_tunnel_parm *parms)
1da177e4 295{
5056a1ef
YH
296 __be32 remote = parms->iph.daddr;
297 __be32 local = parms->iph.saddr;
298 __be32 key = parms->i_key;
1da177e4
LT
299 unsigned h = HASH(key);
300 int prio = 0;
301
302 if (local)
303 prio |= 1;
f97c1e0c 304 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
305 prio |= 2;
306 h ^= HASH(remote);
307 }
308
eb8ce741 309 return &ign->tunnels[prio][h];
1da177e4
LT
310}
311
f57e7d5a
PE
312static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
313 struct ip_tunnel *t)
5056a1ef 314{
f57e7d5a 315 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
316}
317
f57e7d5a 318static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 319{
f57e7d5a 320 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4 321
8d5b2c08 322 spin_lock_bh(&ipgre_lock);
1da177e4 323 t->next = *tp;
8d5b2c08
ED
324 rcu_assign_pointer(*tp, t);
325 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
326}
327
f57e7d5a 328static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
329{
330 struct ip_tunnel **tp;
331
f57e7d5a 332 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4 333 if (t == *tp) {
8d5b2c08 334 spin_lock_bh(&ipgre_lock);
1da177e4 335 *tp = t->next;
8d5b2c08 336 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
337 break;
338 }
339 }
340}
341
e1a80002
HX
342static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
343 struct ip_tunnel_parm *parms,
344 int type)
1da177e4 345{
d5a0a1e3
AV
346 __be32 remote = parms->iph.daddr;
347 __be32 local = parms->iph.saddr;
348 __be32 key = parms->i_key;
749c10f9 349 int link = parms->link;
e1a80002
HX
350 struct ip_tunnel *t, **tp;
351 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
352
353 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
354 if (local == t->parms.iph.saddr &&
355 remote == t->parms.iph.daddr &&
356 key == t->parms.i_key &&
749c10f9 357 link == t->parms.link &&
e1a80002
HX
358 type == t->dev->type)
359 break;
360
361 return t;
362}
363
364static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
365 struct ip_tunnel_parm *parms, int create)
366{
367 struct ip_tunnel *t, *nt;
1da177e4 368 struct net_device *dev;
1da177e4 369 char name[IFNAMSIZ];
f57e7d5a 370 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 371
e1a80002
HX
372 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
373 if (t || !create)
374 return t;
1da177e4
LT
375
376 if (parms->name[0])
377 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
378 else
379 sprintf(name, "gre%%d");
1da177e4
LT
380
381 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
382 if (!dev)
383 return NULL;
384
0b67eceb
PE
385 dev_net_set(dev, net);
386
b37d428b
PE
387 if (strchr(name, '%')) {
388 if (dev_alloc_name(dev, name) < 0)
389 goto failed_free;
390 }
391
2941a486 392 nt = netdev_priv(dev);
1da177e4 393 nt->parms = *parms;
c19e654d 394 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 395
42aa9162
HX
396 dev->mtu = ipgre_tunnel_bind_dev(dev);
397
b37d428b
PE
398 if (register_netdevice(dev) < 0)
399 goto failed_free;
1da177e4 400
1da177e4 401 dev_hold(dev);
f57e7d5a 402 ipgre_tunnel_link(ign, nt);
1da177e4
LT
403 return nt;
404
b37d428b
PE
405failed_free:
406 free_netdev(dev);
1da177e4
LT
407 return NULL;
408}
409
410static void ipgre_tunnel_uninit(struct net_device *dev)
411{
f57e7d5a
PE
412 struct net *net = dev_net(dev);
413 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
414
415 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
416 dev_put(dev);
417}
418
419
420static void ipgre_err(struct sk_buff *skb, u32 info)
421{
1da177e4 422
071f92d0 423/* All the routers (except for Linux) return only
1da177e4
LT
424 8 bytes of packet payload. It means, that precise relaying of
425 ICMP in the real Internet is absolutely infeasible.
426
427 Moreover, Cisco "wise men" put GRE key to the third word
428 in GRE header. It makes impossible maintaining even soft state for keyed
429 GRE tunnels with enabled checksum. Tell them "thank you".
430
431 Well, I wonder, rfc1812 was written by Cisco employee,
432 what the hell these idiots break standrads established
433 by themself???
434 */
435
6ed2533e 436 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 437 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 438 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
439 const int type = icmp_hdr(skb)->type;
440 const int code = icmp_hdr(skb)->code;
1da177e4 441 struct ip_tunnel *t;
d5a0a1e3 442 __be16 flags;
1da177e4
LT
443
444 flags = p[0];
445 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
446 if (flags&(GRE_VERSION|GRE_ROUTING))
447 return;
448 if (flags&GRE_KEY) {
449 grehlen += 4;
450 if (flags&GRE_CSUM)
451 grehlen += 4;
452 }
453 }
454
455 /* If only 8 bytes returned, keyed message will be dropped here */
456 if (skb_headlen(skb) < grehlen)
457 return;
458
459 switch (type) {
460 default:
461 case ICMP_PARAMETERPROB:
462 return;
463
464 case ICMP_DEST_UNREACH:
465 switch (code) {
466 case ICMP_SR_FAILED:
467 case ICMP_PORT_UNREACH:
468 /* Impossible event. */
469 return;
470 case ICMP_FRAG_NEEDED:
471 /* Soft state for pmtu is maintained by IP core. */
472 return;
473 default:
474 /* All others are translated to HOST_UNREACH.
475 rfc2003 contains "deep thoughts" about NET_UNREACH,
476 I believe they are just ether pollution. --ANK
477 */
478 break;
479 }
480 break;
481 case ICMP_TIME_EXCEEDED:
482 if (code != ICMP_EXC_TTL)
483 return;
484 break;
485 }
486
8d5b2c08 487 rcu_read_lock();
749c10f9 488 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
e1a80002
HX
489 flags & GRE_KEY ?
490 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
491 p[1]);
f97c1e0c
JP
492 if (t == NULL || t->parms.iph.daddr == 0 ||
493 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
494 goto out;
495
496 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
497 goto out;
498
da6185d8 499 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
500 t->err_count++;
501 else
502 t->err_count = 1;
503 t->err_time = jiffies;
504out:
8d5b2c08 505 rcu_read_unlock();
1da177e4
LT
506}
507
508static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
509{
510 if (INET_ECN_is_ce(iph->tos)) {
511 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 512 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 513 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 514 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
515 }
516 }
517}
518
519static inline u8
520ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
521{
522 u8 inner = 0;
523 if (skb->protocol == htons(ETH_P_IP))
524 inner = old_iph->tos;
525 else if (skb->protocol == htons(ETH_P_IPV6))
526 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
527 return INET_ECN_encapsulate(tos, inner);
528}
529
530static int ipgre_rcv(struct sk_buff *skb)
531{
532 struct iphdr *iph;
533 u8 *h;
d5a0a1e3 534 __be16 flags;
d3bc23e7 535 __sum16 csum = 0;
d5a0a1e3 536 __be32 key = 0;
1da177e4
LT
537 u32 seqno = 0;
538 struct ip_tunnel *tunnel;
539 int offset = 4;
e1a80002 540 __be16 gre_proto;
1da177e4
LT
541
542 if (!pskb_may_pull(skb, 16))
543 goto drop_nolock;
544
eddc9ec5 545 iph = ip_hdr(skb);
1da177e4 546 h = skb->data;
d5a0a1e3 547 flags = *(__be16*)h;
1da177e4
LT
548
549 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
550 /* - Version must be 0.
551 - We do not support routing headers.
552 */
553 if (flags&(GRE_VERSION|GRE_ROUTING))
554 goto drop_nolock;
555
556 if (flags&GRE_CSUM) {
fb286bb2 557 switch (skb->ip_summed) {
84fa7933 558 case CHECKSUM_COMPLETE:
d3bc23e7 559 csum = csum_fold(skb->csum);
fb286bb2
HX
560 if (!csum)
561 break;
562 /* fall through */
563 case CHECKSUM_NONE:
564 skb->csum = 0;
565 csum = __skb_checksum_complete(skb);
84fa7933 566 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
567 }
568 offset += 4;
569 }
570 if (flags&GRE_KEY) {
d5a0a1e3 571 key = *(__be32*)(h + offset);
1da177e4
LT
572 offset += 4;
573 }
574 if (flags&GRE_SEQ) {
d5a0a1e3 575 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
576 offset += 4;
577 }
578 }
579
e1a80002
HX
580 gre_proto = *(__be16 *)(h + 2);
581
8d5b2c08 582 rcu_read_lock();
749c10f9 583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
e1a80002
HX
584 iph->saddr, iph->daddr, key,
585 gre_proto))) {
addd68eb
PE
586 struct net_device_stats *stats = &tunnel->dev->stats;
587
1da177e4
LT
588 secpath_reset(skb);
589
e1a80002 590 skb->protocol = gre_proto;
1da177e4
LT
591 /* WCCP version 1 and 2 protocol decoding.
592 * - Change protocol to IP
593 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
594 */
e1a80002 595 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 596 skb->protocol = htons(ETH_P_IP);
e905a9ed 597 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
598 offset += 4;
599 }
600
1d069167 601 skb->mac_header = skb->network_header;
4209fb60 602 __pskb_pull(skb, offset);
9c70220b 603 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
604 skb->pkt_type = PACKET_HOST;
605#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 606 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 607 /* Looped back packet, drop it! */
511c3f92 608 if (skb_rtable(skb)->fl.iif == 0)
1da177e4 609 goto drop;
addd68eb 610 stats->multicast++;
1da177e4
LT
611 skb->pkt_type = PACKET_BROADCAST;
612 }
613#endif
614
615 if (((flags&GRE_CSUM) && csum) ||
616 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
617 stats->rx_crc_errors++;
618 stats->rx_errors++;
1da177e4
LT
619 goto drop;
620 }
621 if (tunnel->parms.i_flags&GRE_SEQ) {
622 if (!(flags&GRE_SEQ) ||
623 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
624 stats->rx_fifo_errors++;
625 stats->rx_errors++;
1da177e4
LT
626 goto drop;
627 }
628 tunnel->i_seqno = seqno + 1;
629 }
e1a80002
HX
630
631 /* Warning: All skb pointers will be invalidated! */
632 if (tunnel->dev->type == ARPHRD_ETHER) {
633 if (!pskb_may_pull(skb, ETH_HLEN)) {
634 stats->rx_length_errors++;
635 stats->rx_errors++;
636 goto drop;
637 }
638
639 iph = ip_hdr(skb);
640 skb->protocol = eth_type_trans(skb, tunnel->dev);
641 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
642 }
643
d19d56dd 644 skb_tunnel_rx(skb, tunnel->dev);
e1a80002
HX
645
646 skb_reset_network_header(skb);
1da177e4 647 ipgre_ecn_decapsulate(iph, skb);
e1a80002 648
1da177e4 649 netif_rx(skb);
8d5b2c08 650 rcu_read_unlock();
1da177e4
LT
651 return(0);
652 }
45af08be 653 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
654
655drop:
8d5b2c08 656 rcu_read_unlock();
1da177e4
LT
657drop_nolock:
658 kfree_skb(skb);
659 return(0);
660}
661
6fef4c0c 662static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 663{
2941a486 664 struct ip_tunnel *tunnel = netdev_priv(dev);
0bfbedb1
ED
665 struct net_device_stats *stats = &dev->stats;
666 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
eddc9ec5 667 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
668 struct iphdr *tiph;
669 u8 tos;
d5a0a1e3 670 __be16 df;
1da177e4
LT
671 struct rtable *rt; /* Route to the other host */
672 struct net_device *tdev; /* Device to other host */
673 struct iphdr *iph; /* Our new IP header */
c2636b4d 674 unsigned int max_headroom; /* The extra header space needed */
1da177e4 675 int gre_hlen;
d5a0a1e3 676 __be32 dst;
1da177e4
LT
677 int mtu;
678
e1a80002
HX
679 if (dev->type == ARPHRD_ETHER)
680 IPCB(skb)->flags = 0;
681
682 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 683 gre_hlen = 0;
6ed2533e 684 tiph = (struct iphdr *)skb->data;
1da177e4
LT
685 } else {
686 gre_hlen = tunnel->hlen;
687 tiph = &tunnel->parms.iph;
688 }
689
690 if ((dst = tiph->daddr) == 0) {
691 /* NBMA tunnel */
692
adf30907 693 if (skb_dst(skb) == NULL) {
addd68eb 694 stats->tx_fifo_errors++;
1da177e4
LT
695 goto tx_error;
696 }
697
698 if (skb->protocol == htons(ETH_P_IP)) {
511c3f92 699 rt = skb_rtable(skb);
1da177e4
LT
700 if ((dst = rt->rt_gateway) == 0)
701 goto tx_error_icmp;
702 }
703#ifdef CONFIG_IPV6
704 else if (skb->protocol == htons(ETH_P_IPV6)) {
705 struct in6_addr *addr6;
706 int addr_type;
adf30907 707 struct neighbour *neigh = skb_dst(skb)->neighbour;
1da177e4
LT
708
709 if (neigh == NULL)
710 goto tx_error;
711
6ed2533e 712 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
713 addr_type = ipv6_addr_type(addr6);
714
715 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 716 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
717 addr_type = ipv6_addr_type(addr6);
718 }
719
720 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
721 goto tx_error_icmp;
722
723 dst = addr6->s6_addr32[3];
724 }
725#endif
726 else
727 goto tx_error;
728 }
729
730 tos = tiph->tos;
ee686ca9
AJ
731 if (tos == 1) {
732 tos = 0;
1da177e4
LT
733 if (skb->protocol == htons(ETH_P_IP))
734 tos = old_iph->tos;
dd4ba83d
SH
735 else if (skb->protocol == htons(ETH_P_IPV6))
736 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
1da177e4
LT
737 }
738
739 {
740 struct flowi fl = { .oif = tunnel->parms.link,
741 .nl_u = { .ip4_u =
742 { .daddr = dst,
743 .saddr = tiph->saddr,
744 .tos = RT_TOS(tos) } },
745 .proto = IPPROTO_GRE };
96635522 746 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 747 stats->tx_carrier_errors++;
1da177e4
LT
748 goto tx_error;
749 }
750 }
d8d1f30b 751 tdev = rt->dst.dev;
1da177e4
LT
752
753 if (tdev == dev) {
754 ip_rt_put(rt);
addd68eb 755 stats->collisions++;
1da177e4
LT
756 goto tx_error;
757 }
758
759 df = tiph->frag_off;
760 if (df)
d8d1f30b 761 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
1da177e4 762 else
adf30907 763 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
1da177e4 764
adf30907
ED
765 if (skb_dst(skb))
766 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4
LT
767
768 if (skb->protocol == htons(ETH_P_IP)) {
769 df |= (old_iph->frag_off&htons(IP_DF));
770
771 if ((old_iph->frag_off&htons(IP_DF)) &&
772 mtu < ntohs(old_iph->tot_len)) {
773 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
774 ip_rt_put(rt);
775 goto tx_error;
776 }
777 }
778#ifdef CONFIG_IPV6
779 else if (skb->protocol == htons(ETH_P_IPV6)) {
adf30907 780 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
1da177e4 781
adf30907 782 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
783 if ((tunnel->parms.iph.daddr &&
784 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
785 rt6->rt6i_dst.plen == 128) {
786 rt6->rt6i_flags |= RTF_MODIFIED;
adf30907 787 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
1da177e4
LT
788 }
789 }
790
791 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
3ffe533c 792 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1da177e4
LT
793 ip_rt_put(rt);
794 goto tx_error;
795 }
796 }
797#endif
798
799 if (tunnel->err_count > 0) {
da6185d8
WY
800 if (time_before(jiffies,
801 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
802 tunnel->err_count--;
803
804 dst_link_failure(skb);
805 } else
806 tunnel->err_count = 0;
807 }
808
d8d1f30b 809 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
1da177e4 810
cfbba49d
PM
811 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
812 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4 813 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
243aad83
TT
814 if (max_headroom > dev->needed_headroom)
815 dev->needed_headroom = max_headroom;
1da177e4
LT
816 if (!new_skb) {
817 ip_rt_put(rt);
0bfbedb1 818 txq->tx_dropped++;
1da177e4 819 dev_kfree_skb(skb);
6ed10654 820 return NETDEV_TX_OK;
1da177e4
LT
821 }
822 if (skb->sk)
823 skb_set_owner_w(new_skb, skb->sk);
824 dev_kfree_skb(skb);
825 skb = new_skb;
eddc9ec5 826 old_iph = ip_hdr(skb);
1da177e4
LT
827 }
828
64194c31 829 skb_reset_transport_header(skb);
e2d1bca7
ACM
830 skb_push(skb, gre_hlen);
831 skb_reset_network_header(skb);
1da177e4 832 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
833 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
834 IPSKB_REROUTED);
adf30907 835 skb_dst_drop(skb);
d8d1f30b 836 skb_dst_set(skb, &rt->dst);
1da177e4
LT
837
838 /*
839 * Push down and install the IPIP header.
840 */
841
eddc9ec5 842 iph = ip_hdr(skb);
1da177e4
LT
843 iph->version = 4;
844 iph->ihl = sizeof(struct iphdr) >> 2;
845 iph->frag_off = df;
846 iph->protocol = IPPROTO_GRE;
847 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
848 iph->daddr = rt->rt_dst;
849 iph->saddr = rt->rt_src;
850
851 if ((iph->ttl = tiph->ttl) == 0) {
852 if (skb->protocol == htons(ETH_P_IP))
853 iph->ttl = old_iph->ttl;
854#ifdef CONFIG_IPV6
855 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 856 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
857#endif
858 else
d8d1f30b 859 iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
1da177e4
LT
860 }
861
e1a80002
HX
862 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
863 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
864 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
865
866 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 867 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
868
869 if (tunnel->parms.o_flags&GRE_SEQ) {
870 ++tunnel->o_seqno;
871 *ptr = htonl(tunnel->o_seqno);
872 ptr--;
873 }
874 if (tunnel->parms.o_flags&GRE_KEY) {
875 *ptr = tunnel->parms.o_key;
876 ptr--;
877 }
878 if (tunnel->parms.o_flags&GRE_CSUM) {
879 *ptr = 0;
5f92a738 880 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
881 }
882 }
883
884 nf_reset(skb);
885
886 IPTUNNEL_XMIT();
6ed10654 887 return NETDEV_TX_OK;
1da177e4
LT
888
889tx_error_icmp:
890 dst_link_failure(skb);
891
892tx_error:
893 stats->tx_errors++;
894 dev_kfree_skb(skb);
6ed10654 895 return NETDEV_TX_OK;
1da177e4
LT
896}
897
42aa9162 898static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
899{
900 struct net_device *tdev = NULL;
901 struct ip_tunnel *tunnel;
902 struct iphdr *iph;
903 int hlen = LL_MAX_HEADER;
904 int mtu = ETH_DATA_LEN;
905 int addend = sizeof(struct iphdr) + 4;
906
907 tunnel = netdev_priv(dev);
908 iph = &tunnel->parms.iph;
909
c95b819a 910 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
911
912 if (iph->daddr) {
913 struct flowi fl = { .oif = tunnel->parms.link,
914 .nl_u = { .ip4_u =
915 { .daddr = iph->daddr,
916 .saddr = iph->saddr,
917 .tos = RT_TOS(iph->tos) } },
918 .proto = IPPROTO_GRE };
919 struct rtable *rt;
96635522 920 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
d8d1f30b 921 tdev = rt->dst.dev;
ee34c1eb
MS
922 ip_rt_put(rt);
923 }
e1a80002
HX
924
925 if (dev->type != ARPHRD_ETHER)
926 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
927 }
928
929 if (!tdev && tunnel->parms.link)
96635522 930 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
931
932 if (tdev) {
c95b819a 933 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
934 mtu = tdev->mtu;
935 }
936 dev->iflink = tunnel->parms.link;
937
938 /* Precalculate GRE options length */
939 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
940 if (tunnel->parms.o_flags&GRE_CSUM)
941 addend += 4;
942 if (tunnel->parms.o_flags&GRE_KEY)
943 addend += 4;
944 if (tunnel->parms.o_flags&GRE_SEQ)
945 addend += 4;
946 }
c95b819a 947 dev->needed_headroom = addend + hlen;
8cdb0456 948 mtu -= dev->hard_header_len + addend;
42aa9162
HX
949
950 if (mtu < 68)
951 mtu = 68;
952
ee34c1eb
MS
953 tunnel->hlen = addend;
954
42aa9162 955 return mtu;
ee34c1eb
MS
956}
957
1da177e4
LT
958static int
959ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
960{
961 int err = 0;
962 struct ip_tunnel_parm p;
963 struct ip_tunnel *t;
f57e7d5a
PE
964 struct net *net = dev_net(dev);
965 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
966
967 switch (cmd) {
968 case SIOCGETTUNNEL:
969 t = NULL;
7daa0004 970 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
971 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
972 err = -EFAULT;
973 break;
974 }
f57e7d5a 975 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
976 }
977 if (t == NULL)
2941a486 978 t = netdev_priv(dev);
1da177e4
LT
979 memcpy(&p, &t->parms, sizeof(p));
980 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
981 err = -EFAULT;
982 break;
983
984 case SIOCADDTUNNEL:
985 case SIOCCHGTUNNEL:
986 err = -EPERM;
987 if (!capable(CAP_NET_ADMIN))
988 goto done;
989
990 err = -EFAULT;
991 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
992 goto done;
993
994 err = -EINVAL;
995 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
996 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
997 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
998 goto done;
999 if (p.iph.ttl)
1000 p.iph.frag_off |= htons(IP_DF);
1001
1002 if (!(p.i_flags&GRE_KEY))
1003 p.i_key = 0;
1004 if (!(p.o_flags&GRE_KEY))
1005 p.o_key = 0;
1006
f57e7d5a 1007 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1008
7daa0004 1009 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1010 if (t != NULL) {
1011 if (t->dev != dev) {
1012 err = -EEXIST;
1013 break;
1014 }
1015 } else {
6ed2533e 1016 unsigned nflags = 0;
1da177e4 1017
2941a486 1018 t = netdev_priv(dev);
1da177e4 1019
f97c1e0c 1020 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1021 nflags = IFF_BROADCAST;
1022 else if (p.iph.daddr)
1023 nflags = IFF_POINTOPOINT;
1024
1025 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1026 err = -EINVAL;
1027 break;
1028 }
f57e7d5a 1029 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1030 t->parms.iph.saddr = p.iph.saddr;
1031 t->parms.iph.daddr = p.iph.daddr;
1032 t->parms.i_key = p.i_key;
1033 t->parms.o_key = p.o_key;
1034 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1035 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1036 ipgre_tunnel_link(ign, t);
1da177e4
LT
1037 netdev_state_change(dev);
1038 }
1039 }
1040
1041 if (t) {
1042 err = 0;
1043 if (cmd == SIOCCHGTUNNEL) {
1044 t->parms.iph.ttl = p.iph.ttl;
1045 t->parms.iph.tos = p.iph.tos;
1046 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1047 if (t->parms.link != p.link) {
1048 t->parms.link = p.link;
42aa9162 1049 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1050 netdev_state_change(dev);
1051 }
1da177e4
LT
1052 }
1053 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1054 err = -EFAULT;
1055 } else
1056 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1057 break;
1058
1059 case SIOCDELTUNNEL:
1060 err = -EPERM;
1061 if (!capable(CAP_NET_ADMIN))
1062 goto done;
1063
7daa0004 1064 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1065 err = -EFAULT;
1066 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1067 goto done;
1068 err = -ENOENT;
f57e7d5a 1069 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1070 goto done;
1071 err = -EPERM;
7daa0004 1072 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1073 goto done;
1074 dev = t->dev;
1075 }
22f8cde5
SH
1076 unregister_netdevice(dev);
1077 err = 0;
1da177e4
LT
1078 break;
1079
1080 default:
1081 err = -EINVAL;
1082 }
1083
1084done:
1085 return err;
1086}
1087
1da177e4
LT
1088static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1089{
2941a486 1090 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1091 if (new_mtu < 68 ||
1092 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1093 return -EINVAL;
1094 dev->mtu = new_mtu;
1095 return 0;
1096}
1097
1da177e4
LT
1098/* Nice toy. Unfortunately, useless in real life :-)
1099 It allows to construct virtual multiprotocol broadcast "LAN"
1100 over the Internet, provided multicast routing is tuned.
1101
1102
1103 I have no idea was this bicycle invented before me,
1104 so that I had to set ARPHRD_IPGRE to a random value.
1105 I have an impression, that Cisco could make something similar,
1106 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1107
1da177e4
LT
1108 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1109 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1110
1111 ping -t 255 224.66.66.66
1112
1113 If nobody answers, mbone does not work.
1114
1115 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1116 ip addr add 10.66.66.<somewhat>/24 dev Universe
1117 ifconfig Universe up
1118 ifconfig Universe add fe80::<Your_real_addr>/10
1119 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1120 ftp 10.66.66.66
1121 ...
1122 ftp fec0:6666:6666::193.233.7.65
1123 ...
1124
1125 */
1126
3b04ddde
SH
1127static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1128 unsigned short type,
1129 const void *daddr, const void *saddr, unsigned len)
1da177e4 1130{
2941a486 1131 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1132 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1133 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1134
1135 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1136 p[0] = t->parms.o_flags;
1137 p[1] = htons(type);
1138
1139 /*
e905a9ed 1140 * Set the source hardware address.
1da177e4 1141 */
e905a9ed 1142
1da177e4
LT
1143 if (saddr)
1144 memcpy(&iph->saddr, saddr, 4);
6d55cb91 1145 if (daddr)
1da177e4 1146 memcpy(&iph->daddr, daddr, 4);
6d55cb91 1147 if (iph->daddr)
1da177e4 1148 return t->hlen;
e905a9ed 1149
1da177e4
LT
1150 return -t->hlen;
1151}
1152
6a5f44d7
TT
1153static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1154{
6ed2533e 1155 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1156 memcpy(haddr, &iph->saddr, 4);
1157 return 4;
1158}
1159
3b04ddde
SH
1160static const struct header_ops ipgre_header_ops = {
1161 .create = ipgre_header,
6a5f44d7 1162 .parse = ipgre_header_parse,
3b04ddde
SH
1163};
1164
6a5f44d7 1165#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1166static int ipgre_open(struct net_device *dev)
1167{
2941a486 1168 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1169
f97c1e0c 1170 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1171 struct flowi fl = { .oif = t->parms.link,
1172 .nl_u = { .ip4_u =
1173 { .daddr = t->parms.iph.daddr,
1174 .saddr = t->parms.iph.saddr,
1175 .tos = RT_TOS(t->parms.iph.tos) } },
1176 .proto = IPPROTO_GRE };
1177 struct rtable *rt;
96635522 1178 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4 1179 return -EADDRNOTAVAIL;
d8d1f30b 1180 dev = rt->dst.dev;
1da177e4 1181 ip_rt_put(rt);
e5ed6399 1182 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1183 return -EADDRNOTAVAIL;
1184 t->mlink = dev->ifindex;
e5ed6399 1185 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1186 }
1187 return 0;
1188}
1189
1190static int ipgre_close(struct net_device *dev)
1191{
2941a486 1192 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1193
f97c1e0c 1194 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1195 struct in_device *in_dev;
c346dca1 1196 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1197 if (in_dev) {
1198 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1199 in_dev_put(in_dev);
1200 }
1201 }
1202 return 0;
1203}
1204
1205#endif
1206
b8c26a33
SH
1207static const struct net_device_ops ipgre_netdev_ops = {
1208 .ndo_init = ipgre_tunnel_init,
1209 .ndo_uninit = ipgre_tunnel_uninit,
1210#ifdef CONFIG_NET_IPGRE_BROADCAST
1211 .ndo_open = ipgre_open,
1212 .ndo_stop = ipgre_close,
1213#endif
1214 .ndo_start_xmit = ipgre_tunnel_xmit,
1215 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1216 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1217};
1218
1da177e4
LT
1219static void ipgre_tunnel_setup(struct net_device *dev)
1220{
b8c26a33 1221 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1222 dev->destructor = free_netdev;
1da177e4
LT
1223
1224 dev->type = ARPHRD_IPGRE;
c95b819a 1225 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1226 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1227 dev->flags = IFF_NOARP;
1228 dev->iflink = 0;
1229 dev->addr_len = 4;
0b67eceb 1230 dev->features |= NETIF_F_NETNS_LOCAL;
108bfa89 1231 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1da177e4
LT
1232}
1233
1234static int ipgre_tunnel_init(struct net_device *dev)
1235{
1da177e4
LT
1236 struct ip_tunnel *tunnel;
1237 struct iphdr *iph;
1da177e4 1238
2941a486 1239 tunnel = netdev_priv(dev);
1da177e4
LT
1240 iph = &tunnel->parms.iph;
1241
1242 tunnel->dev = dev;
1243 strcpy(tunnel->parms.name, dev->name);
1244
1245 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1246 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1247
1da177e4 1248 if (iph->daddr) {
1da177e4 1249#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1250 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1251 if (!iph->saddr)
1252 return -EINVAL;
1253 dev->flags = IFF_BROADCAST;
3b04ddde 1254 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1255 }
1256#endif
ee34c1eb 1257 } else
6a5f44d7 1258 dev->header_ops = &ipgre_header_ops;
1da177e4 1259
1da177e4
LT
1260 return 0;
1261}
1262
b8c26a33 1263static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1264{
2941a486 1265 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1266 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1267 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1268
1269 tunnel->dev = dev;
1270 strcpy(tunnel->parms.name, dev->name);
1271
1272 iph->version = 4;
1273 iph->protocol = IPPROTO_GRE;
1274 iph->ihl = 5;
1275 tunnel->hlen = sizeof(struct iphdr) + 4;
1276
1277 dev_hold(dev);
eb8ce741 1278 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1279}
1280
1281
00959ade
DK
1282static const struct gre_protocol ipgre_protocol = {
1283 .handler = ipgre_rcv,
1284 .err_handler = ipgre_err,
1da177e4
LT
1285};
1286
eef6dd65 1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
eb8ce741
PE
1288{
1289 int prio;
1290
1291 for (prio = 0; prio < 4; prio++) {
1292 int h;
1293 for (h = 0; h < HASH_SIZE; h++) {
eef6dd65
ED
1294 struct ip_tunnel *t = ign->tunnels[prio][h];
1295
1296 while (t != NULL) {
1297 unregister_netdevice_queue(t->dev, head);
1298 t = t->next;
1299 }
eb8ce741
PE
1300 }
1301 }
1302}
1303
2c8c1e72 1304static int __net_init ipgre_init_net(struct net *net)
59a4c759 1305{
cfb8fbf2 1306 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
59a4c759 1307 int err;
59a4c759 1308
7daa0004
PE
1309 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1310 ipgre_tunnel_setup);
1311 if (!ign->fb_tunnel_dev) {
1312 err = -ENOMEM;
1313 goto err_alloc_dev;
1314 }
be77e593 1315 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1316
b8c26a33 1317 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1318 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1319
1320 if ((err = register_netdev(ign->fb_tunnel_dev)))
1321 goto err_reg_dev;
1322
59a4c759
PE
1323 return 0;
1324
7daa0004
PE
1325err_reg_dev:
1326 free_netdev(ign->fb_tunnel_dev);
1327err_alloc_dev:
59a4c759
PE
1328 return err;
1329}
1330
2c8c1e72 1331static void __net_exit ipgre_exit_net(struct net *net)
59a4c759
PE
1332{
1333 struct ipgre_net *ign;
eef6dd65 1334 LIST_HEAD(list);
59a4c759
PE
1335
1336 ign = net_generic(net, ipgre_net_id);
7daa0004 1337 rtnl_lock();
eef6dd65
ED
1338 ipgre_destroy_tunnels(ign, &list);
1339 unregister_netdevice_many(&list);
7daa0004 1340 rtnl_unlock();
59a4c759
PE
1341}
1342
1343static struct pernet_operations ipgre_net_ops = {
1344 .init = ipgre_init_net,
1345 .exit = ipgre_exit_net,
cfb8fbf2
EB
1346 .id = &ipgre_net_id,
1347 .size = sizeof(struct ipgre_net),
59a4c759 1348};
1da177e4 1349
c19e654d
HX
1350static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1351{
1352 __be16 flags;
1353
1354 if (!data)
1355 return 0;
1356
1357 flags = 0;
1358 if (data[IFLA_GRE_IFLAGS])
1359 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1360 if (data[IFLA_GRE_OFLAGS])
1361 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1362 if (flags & (GRE_VERSION|GRE_ROUTING))
1363 return -EINVAL;
1364
1365 return 0;
1366}
1367
e1a80002
HX
1368static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1369{
1370 __be32 daddr;
1371
1372 if (tb[IFLA_ADDRESS]) {
1373 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1374 return -EINVAL;
1375 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1376 return -EADDRNOTAVAIL;
1377 }
1378
1379 if (!data)
1380 goto out;
1381
1382 if (data[IFLA_GRE_REMOTE]) {
1383 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1384 if (!daddr)
1385 return -EINVAL;
1386 }
1387
1388out:
1389 return ipgre_tunnel_validate(tb, data);
1390}
1391
c19e654d
HX
1392static void ipgre_netlink_parms(struct nlattr *data[],
1393 struct ip_tunnel_parm *parms)
1394{
7bb82d92 1395 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1396
1397 parms->iph.protocol = IPPROTO_GRE;
1398
1399 if (!data)
1400 return;
1401
1402 if (data[IFLA_GRE_LINK])
1403 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1404
1405 if (data[IFLA_GRE_IFLAGS])
1406 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1407
1408 if (data[IFLA_GRE_OFLAGS])
1409 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1410
1411 if (data[IFLA_GRE_IKEY])
1412 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1413
1414 if (data[IFLA_GRE_OKEY])
1415 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1416
1417 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1418 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1419
1420 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1421 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1422
1423 if (data[IFLA_GRE_TTL])
1424 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1425
1426 if (data[IFLA_GRE_TOS])
1427 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1428
1429 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1430 parms->iph.frag_off = htons(IP_DF);
1431}
1432
e1a80002
HX
1433static int ipgre_tap_init(struct net_device *dev)
1434{
1435 struct ip_tunnel *tunnel;
1436
1437 tunnel = netdev_priv(dev);
1438
1439 tunnel->dev = dev;
1440 strcpy(tunnel->parms.name, dev->name);
1441
1442 ipgre_tunnel_bind_dev(dev);
1443
1444 return 0;
1445}
1446
b8c26a33
SH
1447static const struct net_device_ops ipgre_tap_netdev_ops = {
1448 .ndo_init = ipgre_tap_init,
1449 .ndo_uninit = ipgre_tunnel_uninit,
1450 .ndo_start_xmit = ipgre_tunnel_xmit,
1451 .ndo_set_mac_address = eth_mac_addr,
1452 .ndo_validate_addr = eth_validate_addr,
1453 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1454};
1455
e1a80002
HX
1456static void ipgre_tap_setup(struct net_device *dev)
1457{
1458
1459 ether_setup(dev);
1460
2e9526b3 1461 dev->netdev_ops = &ipgre_tap_netdev_ops;
e1a80002 1462 dev->destructor = free_netdev;
e1a80002
HX
1463
1464 dev->iflink = 0;
1465 dev->features |= NETIF_F_NETNS_LOCAL;
1466}
1467
81adee47 1468static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
c19e654d
HX
1469 struct nlattr *data[])
1470{
1471 struct ip_tunnel *nt;
1472 struct net *net = dev_net(dev);
1473 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1474 int mtu;
1475 int err;
1476
1477 nt = netdev_priv(dev);
1478 ipgre_netlink_parms(data, &nt->parms);
1479
e1a80002 1480 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1481 return -EEXIST;
1482
e1a80002
HX
1483 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1484 random_ether_addr(dev->dev_addr);
1485
c19e654d
HX
1486 mtu = ipgre_tunnel_bind_dev(dev);
1487 if (!tb[IFLA_MTU])
1488 dev->mtu = mtu;
1489
1490 err = register_netdevice(dev);
1491 if (err)
1492 goto out;
1493
1494 dev_hold(dev);
1495 ipgre_tunnel_link(ign, nt);
1496
1497out:
1498 return err;
1499}
1500
1501static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1502 struct nlattr *data[])
1503{
1504 struct ip_tunnel *t, *nt;
1505 struct net *net = dev_net(dev);
1506 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1507 struct ip_tunnel_parm p;
1508 int mtu;
1509
1510 if (dev == ign->fb_tunnel_dev)
1511 return -EINVAL;
1512
1513 nt = netdev_priv(dev);
1514 ipgre_netlink_parms(data, &p);
1515
1516 t = ipgre_tunnel_locate(net, &p, 0);
1517
1518 if (t) {
1519 if (t->dev != dev)
1520 return -EEXIST;
1521 } else {
c19e654d
HX
1522 t = nt;
1523
2e9526b3
HX
1524 if (dev->type != ARPHRD_ETHER) {
1525 unsigned nflags = 0;
c19e654d 1526
2e9526b3
HX
1527 if (ipv4_is_multicast(p.iph.daddr))
1528 nflags = IFF_BROADCAST;
1529 else if (p.iph.daddr)
1530 nflags = IFF_POINTOPOINT;
1531
1532 if ((dev->flags ^ nflags) &
1533 (IFF_POINTOPOINT | IFF_BROADCAST))
1534 return -EINVAL;
1535 }
c19e654d
HX
1536
1537 ipgre_tunnel_unlink(ign, t);
1538 t->parms.iph.saddr = p.iph.saddr;
1539 t->parms.iph.daddr = p.iph.daddr;
1540 t->parms.i_key = p.i_key;
2e9526b3
HX
1541 if (dev->type != ARPHRD_ETHER) {
1542 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1543 memcpy(dev->broadcast, &p.iph.daddr, 4);
1544 }
c19e654d
HX
1545 ipgre_tunnel_link(ign, t);
1546 netdev_state_change(dev);
1547 }
1548
1549 t->parms.o_key = p.o_key;
1550 t->parms.iph.ttl = p.iph.ttl;
1551 t->parms.iph.tos = p.iph.tos;
1552 t->parms.iph.frag_off = p.iph.frag_off;
1553
1554 if (t->parms.link != p.link) {
1555 t->parms.link = p.link;
1556 mtu = ipgre_tunnel_bind_dev(dev);
1557 if (!tb[IFLA_MTU])
1558 dev->mtu = mtu;
1559 netdev_state_change(dev);
1560 }
1561
1562 return 0;
1563}
1564
1565static size_t ipgre_get_size(const struct net_device *dev)
1566{
1567 return
1568 /* IFLA_GRE_LINK */
1569 nla_total_size(4) +
1570 /* IFLA_GRE_IFLAGS */
1571 nla_total_size(2) +
1572 /* IFLA_GRE_OFLAGS */
1573 nla_total_size(2) +
1574 /* IFLA_GRE_IKEY */
1575 nla_total_size(4) +
1576 /* IFLA_GRE_OKEY */
1577 nla_total_size(4) +
1578 /* IFLA_GRE_LOCAL */
1579 nla_total_size(4) +
1580 /* IFLA_GRE_REMOTE */
1581 nla_total_size(4) +
1582 /* IFLA_GRE_TTL */
1583 nla_total_size(1) +
1584 /* IFLA_GRE_TOS */
1585 nla_total_size(1) +
1586 /* IFLA_GRE_PMTUDISC */
1587 nla_total_size(1) +
1588 0;
1589}
1590
1591static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1592{
1593 struct ip_tunnel *t = netdev_priv(dev);
1594 struct ip_tunnel_parm *p = &t->parms;
1595
1596 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1597 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1598 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1599 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1600 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1601 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1602 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1603 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1604 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1605 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1606
1607 return 0;
1608
1609nla_put_failure:
1610 return -EMSGSIZE;
1611}
1612
1613static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1614 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1615 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1616 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1617 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1618 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1619 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1620 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1621 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1622 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1623 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1624};
1625
1626static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1627 .kind = "gre",
1628 .maxtype = IFLA_GRE_MAX,
1629 .policy = ipgre_policy,
1630 .priv_size = sizeof(struct ip_tunnel),
1631 .setup = ipgre_tunnel_setup,
1632 .validate = ipgre_tunnel_validate,
1633 .newlink = ipgre_newlink,
1634 .changelink = ipgre_changelink,
1635 .get_size = ipgre_get_size,
1636 .fill_info = ipgre_fill_info,
1637};
1638
e1a80002
HX
1639static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1640 .kind = "gretap",
1641 .maxtype = IFLA_GRE_MAX,
1642 .policy = ipgre_policy,
1643 .priv_size = sizeof(struct ip_tunnel),
1644 .setup = ipgre_tap_setup,
1645 .validate = ipgre_tap_validate,
1646 .newlink = ipgre_newlink,
1647 .changelink = ipgre_changelink,
1648 .get_size = ipgre_get_size,
1649 .fill_info = ipgre_fill_info,
1650};
1651
1da177e4
LT
1652/*
1653 * And now the modules code and kernel interface.
1654 */
1655
1656static int __init ipgre_init(void)
1657{
1658 int err;
1659
1660 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1661
cfb8fbf2 1662 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1663 if (err < 0)
c2892f02
AD
1664 return err;
1665
00959ade 1666 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02
AD
1667 if (err < 0) {
1668 printk(KERN_INFO "ipgre init: can't add protocol\n");
1669 goto add_proto_failed;
1670 }
7daa0004 1671
c19e654d
HX
1672 err = rtnl_link_register(&ipgre_link_ops);
1673 if (err < 0)
1674 goto rtnl_link_failed;
1675
e1a80002
HX
1676 err = rtnl_link_register(&ipgre_tap_ops);
1677 if (err < 0)
1678 goto tap_ops_failed;
1679
c19e654d 1680out:
1da177e4 1681 return err;
c19e654d 1682
e1a80002
HX
1683tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1685rtnl_link_failed:
00959ade 1686 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02
AD
1687add_proto_failed:
1688 unregister_pernet_device(&ipgre_net_ops);
c19e654d 1689 goto out;
1da177e4
LT
1690}
1691
db44575f 1692static void __exit ipgre_fini(void)
1da177e4 1693{
e1a80002 1694 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1695 rtnl_link_unregister(&ipgre_link_ops);
00959ade 1696 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1da177e4 1697 printk(KERN_INFO "ipgre close: can't remove protocol\n");
c2892f02 1698 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1699}
1700
1701module_init(ipgre_init);
1702module_exit(ipgre_fini);
1703MODULE_LICENSE("GPL");
4d74f8ba
PM
1704MODULE_ALIAS_RTNL_LINK("gre");
1705MODULE_ALIAS_RTNL_LINK("gretap");