]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
if_tunnel.h: add missing ams/byteorder.h include
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
e1a80002 30#include <linux/etherdevice.h>
46f25dff 31#include <linux/if_ether.h>
1da177e4
LT
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
59a4c759
PE
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
c19e654d 45#include <net/rtnetlink.h>
1da177e4
LT
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
a43912ab 69 Current solution: HARD_TX_LOCK lock breaks dead loops.
1da177e4
LT
70
71
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107
108
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
115
116 Alexey Kuznetsov.
117 */
118
c19e654d 119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 122static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
123
124/* Fallback tunnel: no source, no destination, no key, no options */
125
eb8ce741
PE
126#define HASH_SIZE 16
127
f99189b1 128static int ipgre_net_id __read_mostly;
59a4c759 129struct ipgre_net {
eb8ce741
PE
130 struct ip_tunnel *tunnels[4][HASH_SIZE];
131
7daa0004 132 struct net_device *fb_tunnel_dev;
59a4c759
PE
133};
134
1da177e4
LT
135/* Tunnel hash table */
136
137/*
138 4 hash tables:
139
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
144
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
148
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
151 */
152
d5a0a1e3 153#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 154
eb8ce741
PE
155#define tunnels_r_l tunnels[3]
156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0]
8d5b2c08
ED
159/*
160 * Locking : hash tables are protected by RCU and a spinlock
161 */
162static DEFINE_SPINLOCK(ipgre_lock);
1da177e4 163
8d5b2c08
ED
164#define for_each_ip_tunnel_rcu(start) \
165 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1da177e4
LT
166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
749c10f9 169static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
e1a80002
HX
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
1da177e4 172{
749c10f9
TT
173 struct net *net = dev_net(dev);
174 int link = dev->ifindex;
1da177e4
LT
175 unsigned h0 = HASH(remote);
176 unsigned h1 = HASH(key);
afcf1242 177 struct ip_tunnel *t, *cand = NULL;
7daa0004 178 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
179 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
180 ARPHRD_ETHER : ARPHRD_IPGRE;
afcf1242 181 int score, cand_score = 4;
1da177e4 182
8d5b2c08 183 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
749c10f9
TT
184 if (local != t->parms.iph.saddr ||
185 remote != t->parms.iph.daddr ||
186 key != t->parms.i_key ||
187 !(t->dev->flags & IFF_UP))
188 continue;
189
190 if (t->dev->type != ARPHRD_IPGRE &&
191 t->dev->type != dev_type)
192 continue;
193
afcf1242 194 score = 0;
749c10f9 195 if (t->parms.link != link)
afcf1242 196 score |= 1;
749c10f9 197 if (t->dev->type != dev_type)
afcf1242
TT
198 score |= 2;
199 if (score == 0)
749c10f9 200 return t;
afcf1242
TT
201
202 if (score < cand_score) {
203 cand = t;
204 cand_score = score;
205 }
1da177e4 206 }
e1a80002 207
8d5b2c08 208 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
749c10f9
TT
209 if (remote != t->parms.iph.daddr ||
210 key != t->parms.i_key ||
211 !(t->dev->flags & IFF_UP))
212 continue;
213
214 if (t->dev->type != ARPHRD_IPGRE &&
215 t->dev->type != dev_type)
216 continue;
217
afcf1242 218 score = 0;
749c10f9 219 if (t->parms.link != link)
afcf1242 220 score |= 1;
749c10f9 221 if (t->dev->type != dev_type)
afcf1242
TT
222 score |= 2;
223 if (score == 0)
749c10f9 224 return t;
afcf1242
TT
225
226 if (score < cand_score) {
227 cand = t;
228 cand_score = score;
229 }
1da177e4 230 }
e1a80002 231
8d5b2c08 232 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
749c10f9
TT
233 if ((local != t->parms.iph.saddr &&
234 (local != t->parms.iph.daddr ||
235 !ipv4_is_multicast(local))) ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
238 continue;
239
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
242 continue;
243
afcf1242 244 score = 0;
749c10f9 245 if (t->parms.link != link)
afcf1242 246 score |= 1;
749c10f9 247 if (t->dev->type != dev_type)
afcf1242
TT
248 score |= 2;
249 if (score == 0)
749c10f9 250 return t;
afcf1242
TT
251
252 if (score < cand_score) {
253 cand = t;
254 cand_score = score;
255 }
1da177e4 256 }
e1a80002 257
8d5b2c08 258 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
749c10f9
TT
259 if (t->parms.i_key != key ||
260 !(t->dev->flags & IFF_UP))
261 continue;
262
263 if (t->dev->type != ARPHRD_IPGRE &&
264 t->dev->type != dev_type)
265 continue;
266
afcf1242 267 score = 0;
749c10f9 268 if (t->parms.link != link)
afcf1242 269 score |= 1;
749c10f9 270 if (t->dev->type != dev_type)
afcf1242
TT
271 score |= 2;
272 if (score == 0)
749c10f9 273 return t;
afcf1242
TT
274
275 if (score < cand_score) {
276 cand = t;
277 cand_score = score;
278 }
1da177e4
LT
279 }
280
afcf1242
TT
281 if (cand != NULL)
282 return cand;
e1a80002 283
8d5b2c08
ED
284 dev = ign->fb_tunnel_dev;
285 if (dev->flags & IFF_UP)
286 return netdev_priv(dev);
749c10f9 287
1da177e4
LT
288 return NULL;
289}
290
f57e7d5a
PE
291static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
292 struct ip_tunnel_parm *parms)
1da177e4 293{
5056a1ef
YH
294 __be32 remote = parms->iph.daddr;
295 __be32 local = parms->iph.saddr;
296 __be32 key = parms->i_key;
1da177e4
LT
297 unsigned h = HASH(key);
298 int prio = 0;
299
300 if (local)
301 prio |= 1;
f97c1e0c 302 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
303 prio |= 2;
304 h ^= HASH(remote);
305 }
306
eb8ce741 307 return &ign->tunnels[prio][h];
1da177e4
LT
308}
309
f57e7d5a
PE
310static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
311 struct ip_tunnel *t)
5056a1ef 312{
f57e7d5a 313 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
314}
315
f57e7d5a 316static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 317{
f57e7d5a 318 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4 319
8d5b2c08 320 spin_lock_bh(&ipgre_lock);
1da177e4 321 t->next = *tp;
8d5b2c08
ED
322 rcu_assign_pointer(*tp, t);
323 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
324}
325
f57e7d5a 326static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
327{
328 struct ip_tunnel **tp;
329
f57e7d5a 330 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4 331 if (t == *tp) {
8d5b2c08 332 spin_lock_bh(&ipgre_lock);
1da177e4 333 *tp = t->next;
8d5b2c08 334 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
335 break;
336 }
337 }
338}
339
e1a80002
HX
340static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
341 struct ip_tunnel_parm *parms,
342 int type)
1da177e4 343{
d5a0a1e3
AV
344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
749c10f9 347 int link = parms->link;
e1a80002
HX
348 struct ip_tunnel *t, **tp;
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350
351 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
352 if (local == t->parms.iph.saddr &&
353 remote == t->parms.iph.daddr &&
354 key == t->parms.i_key &&
749c10f9 355 link == t->parms.link &&
e1a80002
HX
356 type == t->dev->type)
357 break;
358
359 return t;
360}
361
362static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
363 struct ip_tunnel_parm *parms, int create)
364{
365 struct ip_tunnel *t, *nt;
1da177e4 366 struct net_device *dev;
1da177e4 367 char name[IFNAMSIZ];
f57e7d5a 368 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 369
e1a80002
HX
370 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
371 if (t || !create)
372 return t;
1da177e4
LT
373
374 if (parms->name[0])
375 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
376 else
377 sprintf(name, "gre%%d");
1da177e4
LT
378
379 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
380 if (!dev)
381 return NULL;
382
0b67eceb
PE
383 dev_net_set(dev, net);
384
b37d428b
PE
385 if (strchr(name, '%')) {
386 if (dev_alloc_name(dev, name) < 0)
387 goto failed_free;
388 }
389
2941a486 390 nt = netdev_priv(dev);
1da177e4 391 nt->parms = *parms;
c19e654d 392 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 393
42aa9162
HX
394 dev->mtu = ipgre_tunnel_bind_dev(dev);
395
b37d428b
PE
396 if (register_netdevice(dev) < 0)
397 goto failed_free;
1da177e4 398
1da177e4 399 dev_hold(dev);
f57e7d5a 400 ipgre_tunnel_link(ign, nt);
1da177e4
LT
401 return nt;
402
b37d428b
PE
403failed_free:
404 free_netdev(dev);
1da177e4
LT
405 return NULL;
406}
407
408static void ipgre_tunnel_uninit(struct net_device *dev)
409{
f57e7d5a
PE
410 struct net *net = dev_net(dev);
411 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
412
413 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
414 dev_put(dev);
415}
416
417
418static void ipgre_err(struct sk_buff *skb, u32 info)
419{
1da177e4 420
071f92d0 421/* All the routers (except for Linux) return only
1da177e4
LT
422 8 bytes of packet payload. It means, that precise relaying of
423 ICMP in the real Internet is absolutely infeasible.
424
425 Moreover, Cisco "wise men" put GRE key to the third word
426 in GRE header. It makes impossible maintaining even soft state for keyed
427 GRE tunnels with enabled checksum. Tell them "thank you".
428
429 Well, I wonder, rfc1812 was written by Cisco employee,
430 what the hell these idiots break standrads established
431 by themself???
432 */
433
6ed2533e 434 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 435 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 436 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
437 const int type = icmp_hdr(skb)->type;
438 const int code = icmp_hdr(skb)->code;
1da177e4 439 struct ip_tunnel *t;
d5a0a1e3 440 __be16 flags;
1da177e4
LT
441
442 flags = p[0];
443 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
444 if (flags&(GRE_VERSION|GRE_ROUTING))
445 return;
446 if (flags&GRE_KEY) {
447 grehlen += 4;
448 if (flags&GRE_CSUM)
449 grehlen += 4;
450 }
451 }
452
453 /* If only 8 bytes returned, keyed message will be dropped here */
454 if (skb_headlen(skb) < grehlen)
455 return;
456
457 switch (type) {
458 default:
459 case ICMP_PARAMETERPROB:
460 return;
461
462 case ICMP_DEST_UNREACH:
463 switch (code) {
464 case ICMP_SR_FAILED:
465 case ICMP_PORT_UNREACH:
466 /* Impossible event. */
467 return;
468 case ICMP_FRAG_NEEDED:
469 /* Soft state for pmtu is maintained by IP core. */
470 return;
471 default:
472 /* All others are translated to HOST_UNREACH.
473 rfc2003 contains "deep thoughts" about NET_UNREACH,
474 I believe they are just ether pollution. --ANK
475 */
476 break;
477 }
478 break;
479 case ICMP_TIME_EXCEEDED:
480 if (code != ICMP_EXC_TTL)
481 return;
482 break;
483 }
484
8d5b2c08 485 rcu_read_lock();
749c10f9 486 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
e1a80002
HX
487 flags & GRE_KEY ?
488 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
489 p[1]);
f97c1e0c
JP
490 if (t == NULL || t->parms.iph.daddr == 0 ||
491 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
492 goto out;
493
494 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
495 goto out;
496
da6185d8 497 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
498 t->err_count++;
499 else
500 t->err_count = 1;
501 t->err_time = jiffies;
502out:
8d5b2c08 503 rcu_read_unlock();
1da177e4 504 return;
1da177e4
LT
505}
506
507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
508{
509 if (INET_ECN_is_ce(iph->tos)) {
510 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 511 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 512 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 513 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
514 }
515 }
516}
517
518static inline u8
519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
520{
521 u8 inner = 0;
522 if (skb->protocol == htons(ETH_P_IP))
523 inner = old_iph->tos;
524 else if (skb->protocol == htons(ETH_P_IPV6))
525 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
526 return INET_ECN_encapsulate(tos, inner);
527}
528
529static int ipgre_rcv(struct sk_buff *skb)
530{
531 struct iphdr *iph;
532 u8 *h;
d5a0a1e3 533 __be16 flags;
d3bc23e7 534 __sum16 csum = 0;
d5a0a1e3 535 __be32 key = 0;
1da177e4
LT
536 u32 seqno = 0;
537 struct ip_tunnel *tunnel;
538 int offset = 4;
e1a80002 539 __be16 gre_proto;
64194c31 540 unsigned int len;
1da177e4
LT
541
542 if (!pskb_may_pull(skb, 16))
543 goto drop_nolock;
544
eddc9ec5 545 iph = ip_hdr(skb);
1da177e4 546 h = skb->data;
d5a0a1e3 547 flags = *(__be16*)h;
1da177e4
LT
548
549 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
550 /* - Version must be 0.
551 - We do not support routing headers.
552 */
553 if (flags&(GRE_VERSION|GRE_ROUTING))
554 goto drop_nolock;
555
556 if (flags&GRE_CSUM) {
fb286bb2 557 switch (skb->ip_summed) {
84fa7933 558 case CHECKSUM_COMPLETE:
d3bc23e7 559 csum = csum_fold(skb->csum);
fb286bb2
HX
560 if (!csum)
561 break;
562 /* fall through */
563 case CHECKSUM_NONE:
564 skb->csum = 0;
565 csum = __skb_checksum_complete(skb);
84fa7933 566 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
567 }
568 offset += 4;
569 }
570 if (flags&GRE_KEY) {
d5a0a1e3 571 key = *(__be32*)(h + offset);
1da177e4
LT
572 offset += 4;
573 }
574 if (flags&GRE_SEQ) {
d5a0a1e3 575 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
576 offset += 4;
577 }
578 }
579
e1a80002
HX
580 gre_proto = *(__be16 *)(h + 2);
581
8d5b2c08 582 rcu_read_lock();
749c10f9 583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
e1a80002
HX
584 iph->saddr, iph->daddr, key,
585 gre_proto))) {
addd68eb
PE
586 struct net_device_stats *stats = &tunnel->dev->stats;
587
1da177e4
LT
588 secpath_reset(skb);
589
e1a80002 590 skb->protocol = gre_proto;
1da177e4
LT
591 /* WCCP version 1 and 2 protocol decoding.
592 * - Change protocol to IP
593 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
594 */
e1a80002 595 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 596 skb->protocol = htons(ETH_P_IP);
e905a9ed 597 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
598 offset += 4;
599 }
600
1d069167 601 skb->mac_header = skb->network_header;
4209fb60 602 __pskb_pull(skb, offset);
9c70220b 603 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
604 skb->pkt_type = PACKET_HOST;
605#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 606 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 607 /* Looped back packet, drop it! */
511c3f92 608 if (skb_rtable(skb)->fl.iif == 0)
1da177e4 609 goto drop;
addd68eb 610 stats->multicast++;
1da177e4
LT
611 skb->pkt_type = PACKET_BROADCAST;
612 }
613#endif
614
615 if (((flags&GRE_CSUM) && csum) ||
616 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
617 stats->rx_crc_errors++;
618 stats->rx_errors++;
1da177e4
LT
619 goto drop;
620 }
621 if (tunnel->parms.i_flags&GRE_SEQ) {
622 if (!(flags&GRE_SEQ) ||
623 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
624 stats->rx_fifo_errors++;
625 stats->rx_errors++;
1da177e4
LT
626 goto drop;
627 }
628 tunnel->i_seqno = seqno + 1;
629 }
e1a80002 630
64194c31
HX
631 len = skb->len;
632
e1a80002
HX
633 /* Warning: All skb pointers will be invalidated! */
634 if (tunnel->dev->type == ARPHRD_ETHER) {
635 if (!pskb_may_pull(skb, ETH_HLEN)) {
636 stats->rx_length_errors++;
637 stats->rx_errors++;
638 goto drop;
639 }
640
641 iph = ip_hdr(skb);
642 skb->protocol = eth_type_trans(skb, tunnel->dev);
643 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
644 }
645
addd68eb 646 stats->rx_packets++;
64194c31 647 stats->rx_bytes += len;
1da177e4 648 skb->dev = tunnel->dev;
adf30907 649 skb_dst_drop(skb);
1da177e4 650 nf_reset(skb);
e1a80002
HX
651
652 skb_reset_network_header(skb);
1da177e4 653 ipgre_ecn_decapsulate(iph, skb);
e1a80002 654
1da177e4 655 netif_rx(skb);
8d5b2c08 656 rcu_read_unlock();
1da177e4
LT
657 return(0);
658 }
45af08be 659 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
660
661drop:
8d5b2c08 662 rcu_read_unlock();
1da177e4
LT
663drop_nolock:
664 kfree_skb(skb);
665 return(0);
666}
667
6fef4c0c 668static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 669{
2941a486 670 struct ip_tunnel *tunnel = netdev_priv(dev);
0bfbedb1
ED
671 struct net_device_stats *stats = &dev->stats;
672 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
eddc9ec5 673 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
674 struct iphdr *tiph;
675 u8 tos;
d5a0a1e3 676 __be16 df;
1da177e4
LT
677 struct rtable *rt; /* Route to the other host */
678 struct net_device *tdev; /* Device to other host */
679 struct iphdr *iph; /* Our new IP header */
c2636b4d 680 unsigned int max_headroom; /* The extra header space needed */
1da177e4 681 int gre_hlen;
d5a0a1e3 682 __be32 dst;
1da177e4
LT
683 int mtu;
684
e1a80002
HX
685 if (dev->type == ARPHRD_ETHER)
686 IPCB(skb)->flags = 0;
687
688 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 689 gre_hlen = 0;
6ed2533e 690 tiph = (struct iphdr *)skb->data;
1da177e4
LT
691 } else {
692 gre_hlen = tunnel->hlen;
693 tiph = &tunnel->parms.iph;
694 }
695
696 if ((dst = tiph->daddr) == 0) {
697 /* NBMA tunnel */
698
adf30907 699 if (skb_dst(skb) == NULL) {
addd68eb 700 stats->tx_fifo_errors++;
1da177e4
LT
701 goto tx_error;
702 }
703
704 if (skb->protocol == htons(ETH_P_IP)) {
511c3f92 705 rt = skb_rtable(skb);
1da177e4
LT
706 if ((dst = rt->rt_gateway) == 0)
707 goto tx_error_icmp;
708 }
709#ifdef CONFIG_IPV6
710 else if (skb->protocol == htons(ETH_P_IPV6)) {
711 struct in6_addr *addr6;
712 int addr_type;
adf30907 713 struct neighbour *neigh = skb_dst(skb)->neighbour;
1da177e4
LT
714
715 if (neigh == NULL)
716 goto tx_error;
717
6ed2533e 718 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
719 addr_type = ipv6_addr_type(addr6);
720
721 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 722 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
723 addr_type = ipv6_addr_type(addr6);
724 }
725
726 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
727 goto tx_error_icmp;
728
729 dst = addr6->s6_addr32[3];
730 }
731#endif
732 else
733 goto tx_error;
734 }
735
736 tos = tiph->tos;
ee686ca9
AJ
737 if (tos == 1) {
738 tos = 0;
1da177e4
LT
739 if (skb->protocol == htons(ETH_P_IP))
740 tos = old_iph->tos;
1da177e4
LT
741 }
742
743 {
744 struct flowi fl = { .oif = tunnel->parms.link,
745 .nl_u = { .ip4_u =
746 { .daddr = dst,
747 .saddr = tiph->saddr,
748 .tos = RT_TOS(tos) } },
749 .proto = IPPROTO_GRE };
96635522 750 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 751 stats->tx_carrier_errors++;
1da177e4
LT
752 goto tx_error;
753 }
754 }
755 tdev = rt->u.dst.dev;
756
757 if (tdev == dev) {
758 ip_rt_put(rt);
addd68eb 759 stats->collisions++;
1da177e4
LT
760 goto tx_error;
761 }
762
763 df = tiph->frag_off;
764 if (df)
c95b819a 765 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4 766 else
adf30907 767 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
1da177e4 768
adf30907
ED
769 if (skb_dst(skb))
770 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4
LT
771
772 if (skb->protocol == htons(ETH_P_IP)) {
773 df |= (old_iph->frag_off&htons(IP_DF));
774
775 if ((old_iph->frag_off&htons(IP_DF)) &&
776 mtu < ntohs(old_iph->tot_len)) {
777 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
778 ip_rt_put(rt);
779 goto tx_error;
780 }
781 }
782#ifdef CONFIG_IPV6
783 else if (skb->protocol == htons(ETH_P_IPV6)) {
adf30907 784 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
1da177e4 785
adf30907 786 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
787 if ((tunnel->parms.iph.daddr &&
788 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
789 rt6->rt6i_dst.plen == 128) {
790 rt6->rt6i_flags |= RTF_MODIFIED;
adf30907 791 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
1da177e4
LT
792 }
793 }
794
795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
3ffe533c 796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1da177e4
LT
797 ip_rt_put(rt);
798 goto tx_error;
799 }
800 }
801#endif
802
803 if (tunnel->err_count > 0) {
da6185d8
WY
804 if (time_before(jiffies,
805 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
806 tunnel->err_count--;
807
808 dst_link_failure(skb);
809 } else
810 tunnel->err_count = 0;
811 }
812
813 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
814
cfbba49d
PM
815 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
817 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
818 if (!new_skb) {
819 ip_rt_put(rt);
0bfbedb1 820 txq->tx_dropped++;
1da177e4 821 dev_kfree_skb(skb);
6ed10654 822 return NETDEV_TX_OK;
1da177e4
LT
823 }
824 if (skb->sk)
825 skb_set_owner_w(new_skb, skb->sk);
826 dev_kfree_skb(skb);
827 skb = new_skb;
eddc9ec5 828 old_iph = ip_hdr(skb);
1da177e4
LT
829 }
830
64194c31 831 skb_reset_transport_header(skb);
e2d1bca7
ACM
832 skb_push(skb, gre_hlen);
833 skb_reset_network_header(skb);
1da177e4 834 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
835 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
836 IPSKB_REROUTED);
adf30907
ED
837 skb_dst_drop(skb);
838 skb_dst_set(skb, &rt->u.dst);
1da177e4
LT
839
840 /*
841 * Push down and install the IPIP header.
842 */
843
eddc9ec5 844 iph = ip_hdr(skb);
1da177e4
LT
845 iph->version = 4;
846 iph->ihl = sizeof(struct iphdr) >> 2;
847 iph->frag_off = df;
848 iph->protocol = IPPROTO_GRE;
849 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
850 iph->daddr = rt->rt_dst;
851 iph->saddr = rt->rt_src;
852
853 if ((iph->ttl = tiph->ttl) == 0) {
854 if (skb->protocol == htons(ETH_P_IP))
855 iph->ttl = old_iph->ttl;
856#ifdef CONFIG_IPV6
857 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 858 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
859#endif
860 else
861 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
862 }
863
e1a80002
HX
864 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
865 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
866 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
867
868 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 869 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
870
871 if (tunnel->parms.o_flags&GRE_SEQ) {
872 ++tunnel->o_seqno;
873 *ptr = htonl(tunnel->o_seqno);
874 ptr--;
875 }
876 if (tunnel->parms.o_flags&GRE_KEY) {
877 *ptr = tunnel->parms.o_key;
878 ptr--;
879 }
880 if (tunnel->parms.o_flags&GRE_CSUM) {
881 *ptr = 0;
5f92a738 882 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
883 }
884 }
885
886 nf_reset(skb);
887
888 IPTUNNEL_XMIT();
6ed10654 889 return NETDEV_TX_OK;
1da177e4
LT
890
891tx_error_icmp:
892 dst_link_failure(skb);
893
894tx_error:
895 stats->tx_errors++;
896 dev_kfree_skb(skb);
6ed10654 897 return NETDEV_TX_OK;
1da177e4
LT
898}
899
42aa9162 900static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
901{
902 struct net_device *tdev = NULL;
903 struct ip_tunnel *tunnel;
904 struct iphdr *iph;
905 int hlen = LL_MAX_HEADER;
906 int mtu = ETH_DATA_LEN;
907 int addend = sizeof(struct iphdr) + 4;
908
909 tunnel = netdev_priv(dev);
910 iph = &tunnel->parms.iph;
911
c95b819a 912 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
913
914 if (iph->daddr) {
915 struct flowi fl = { .oif = tunnel->parms.link,
916 .nl_u = { .ip4_u =
917 { .daddr = iph->daddr,
918 .saddr = iph->saddr,
919 .tos = RT_TOS(iph->tos) } },
920 .proto = IPPROTO_GRE };
921 struct rtable *rt;
96635522 922 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
923 tdev = rt->u.dst.dev;
924 ip_rt_put(rt);
925 }
e1a80002
HX
926
927 if (dev->type != ARPHRD_ETHER)
928 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
929 }
930
931 if (!tdev && tunnel->parms.link)
96635522 932 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
933
934 if (tdev) {
c95b819a 935 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
936 mtu = tdev->mtu;
937 }
938 dev->iflink = tunnel->parms.link;
939
940 /* Precalculate GRE options length */
941 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
942 if (tunnel->parms.o_flags&GRE_CSUM)
943 addend += 4;
944 if (tunnel->parms.o_flags&GRE_KEY)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_SEQ)
947 addend += 4;
948 }
c95b819a 949 dev->needed_headroom = addend + hlen;
8cdb0456 950 mtu -= dev->hard_header_len + addend;
42aa9162
HX
951
952 if (mtu < 68)
953 mtu = 68;
954
ee34c1eb
MS
955 tunnel->hlen = addend;
956
42aa9162 957 return mtu;
ee34c1eb
MS
958}
959
1da177e4
LT
960static int
961ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
962{
963 int err = 0;
964 struct ip_tunnel_parm p;
965 struct ip_tunnel *t;
f57e7d5a
PE
966 struct net *net = dev_net(dev);
967 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
968
969 switch (cmd) {
970 case SIOCGETTUNNEL:
971 t = NULL;
7daa0004 972 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
973 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
974 err = -EFAULT;
975 break;
976 }
f57e7d5a 977 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
978 }
979 if (t == NULL)
2941a486 980 t = netdev_priv(dev);
1da177e4
LT
981 memcpy(&p, &t->parms, sizeof(p));
982 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
983 err = -EFAULT;
984 break;
985
986 case SIOCADDTUNNEL:
987 case SIOCCHGTUNNEL:
988 err = -EPERM;
989 if (!capable(CAP_NET_ADMIN))
990 goto done;
991
992 err = -EFAULT;
993 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
994 goto done;
995
996 err = -EINVAL;
997 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
998 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
999 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1000 goto done;
1001 if (p.iph.ttl)
1002 p.iph.frag_off |= htons(IP_DF);
1003
1004 if (!(p.i_flags&GRE_KEY))
1005 p.i_key = 0;
1006 if (!(p.o_flags&GRE_KEY))
1007 p.o_key = 0;
1008
f57e7d5a 1009 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1010
7daa0004 1011 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1012 if (t != NULL) {
1013 if (t->dev != dev) {
1014 err = -EEXIST;
1015 break;
1016 }
1017 } else {
6ed2533e 1018 unsigned nflags = 0;
1da177e4 1019
2941a486 1020 t = netdev_priv(dev);
1da177e4 1021
f97c1e0c 1022 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1023 nflags = IFF_BROADCAST;
1024 else if (p.iph.daddr)
1025 nflags = IFF_POINTOPOINT;
1026
1027 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1028 err = -EINVAL;
1029 break;
1030 }
f57e7d5a 1031 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1032 t->parms.iph.saddr = p.iph.saddr;
1033 t->parms.iph.daddr = p.iph.daddr;
1034 t->parms.i_key = p.i_key;
1035 t->parms.o_key = p.o_key;
1036 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1037 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1038 ipgre_tunnel_link(ign, t);
1da177e4
LT
1039 netdev_state_change(dev);
1040 }
1041 }
1042
1043 if (t) {
1044 err = 0;
1045 if (cmd == SIOCCHGTUNNEL) {
1046 t->parms.iph.ttl = p.iph.ttl;
1047 t->parms.iph.tos = p.iph.tos;
1048 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1049 if (t->parms.link != p.link) {
1050 t->parms.link = p.link;
42aa9162 1051 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1052 netdev_state_change(dev);
1053 }
1da177e4
LT
1054 }
1055 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1056 err = -EFAULT;
1057 } else
1058 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1059 break;
1060
1061 case SIOCDELTUNNEL:
1062 err = -EPERM;
1063 if (!capable(CAP_NET_ADMIN))
1064 goto done;
1065
7daa0004 1066 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1067 err = -EFAULT;
1068 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1069 goto done;
1070 err = -ENOENT;
f57e7d5a 1071 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1072 goto done;
1073 err = -EPERM;
7daa0004 1074 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1075 goto done;
1076 dev = t->dev;
1077 }
22f8cde5
SH
1078 unregister_netdevice(dev);
1079 err = 0;
1da177e4
LT
1080 break;
1081
1082 default:
1083 err = -EINVAL;
1084 }
1085
1086done:
1087 return err;
1088}
1089
1da177e4
LT
1090static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1091{
2941a486 1092 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1093 if (new_mtu < 68 ||
1094 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1095 return -EINVAL;
1096 dev->mtu = new_mtu;
1097 return 0;
1098}
1099
1da177e4
LT
1100/* Nice toy. Unfortunately, useless in real life :-)
1101 It allows to construct virtual multiprotocol broadcast "LAN"
1102 over the Internet, provided multicast routing is tuned.
1103
1104
1105 I have no idea was this bicycle invented before me,
1106 so that I had to set ARPHRD_IPGRE to a random value.
1107 I have an impression, that Cisco could make something similar,
1108 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1109
1da177e4
LT
1110 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1111 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1112
1113 ping -t 255 224.66.66.66
1114
1115 If nobody answers, mbone does not work.
1116
1117 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1118 ip addr add 10.66.66.<somewhat>/24 dev Universe
1119 ifconfig Universe up
1120 ifconfig Universe add fe80::<Your_real_addr>/10
1121 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1122 ftp 10.66.66.66
1123 ...
1124 ftp fec0:6666:6666::193.233.7.65
1125 ...
1126
1127 */
1128
3b04ddde
SH
1129static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1130 unsigned short type,
1131 const void *daddr, const void *saddr, unsigned len)
1da177e4 1132{
2941a486 1133 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1134 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1135 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1136
1137 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1138 p[0] = t->parms.o_flags;
1139 p[1] = htons(type);
1140
1141 /*
e905a9ed 1142 * Set the source hardware address.
1da177e4 1143 */
e905a9ed 1144
1da177e4
LT
1145 if (saddr)
1146 memcpy(&iph->saddr, saddr, 4);
6d55cb91 1147 if (daddr)
1da177e4 1148 memcpy(&iph->daddr, daddr, 4);
6d55cb91 1149 if (iph->daddr)
1da177e4 1150 return t->hlen;
e905a9ed 1151
1da177e4
LT
1152 return -t->hlen;
1153}
1154
6a5f44d7
TT
1155static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1156{
6ed2533e 1157 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1158 memcpy(haddr, &iph->saddr, 4);
1159 return 4;
1160}
1161
3b04ddde
SH
1162static const struct header_ops ipgre_header_ops = {
1163 .create = ipgre_header,
6a5f44d7 1164 .parse = ipgre_header_parse,
3b04ddde
SH
1165};
1166
6a5f44d7 1167#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1168static int ipgre_open(struct net_device *dev)
1169{
2941a486 1170 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1171
f97c1e0c 1172 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1173 struct flowi fl = { .oif = t->parms.link,
1174 .nl_u = { .ip4_u =
1175 { .daddr = t->parms.iph.daddr,
1176 .saddr = t->parms.iph.saddr,
1177 .tos = RT_TOS(t->parms.iph.tos) } },
1178 .proto = IPPROTO_GRE };
1179 struct rtable *rt;
96635522 1180 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1181 return -EADDRNOTAVAIL;
1182 dev = rt->u.dst.dev;
1183 ip_rt_put(rt);
e5ed6399 1184 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1185 return -EADDRNOTAVAIL;
1186 t->mlink = dev->ifindex;
e5ed6399 1187 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1188 }
1189 return 0;
1190}
1191
1192static int ipgre_close(struct net_device *dev)
1193{
2941a486 1194 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1195
f97c1e0c 1196 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1197 struct in_device *in_dev;
c346dca1 1198 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1199 if (in_dev) {
1200 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1201 in_dev_put(in_dev);
1202 }
1203 }
1204 return 0;
1205}
1206
1207#endif
1208
b8c26a33
SH
1209static const struct net_device_ops ipgre_netdev_ops = {
1210 .ndo_init = ipgre_tunnel_init,
1211 .ndo_uninit = ipgre_tunnel_uninit,
1212#ifdef CONFIG_NET_IPGRE_BROADCAST
1213 .ndo_open = ipgre_open,
1214 .ndo_stop = ipgre_close,
1215#endif
1216 .ndo_start_xmit = ipgre_tunnel_xmit,
1217 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1218 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1219};
1220
1da177e4
LT
1221static void ipgre_tunnel_setup(struct net_device *dev)
1222{
b8c26a33 1223 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1224 dev->destructor = free_netdev;
1da177e4
LT
1225
1226 dev->type = ARPHRD_IPGRE;
c95b819a 1227 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1228 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1229 dev->flags = IFF_NOARP;
1230 dev->iflink = 0;
1231 dev->addr_len = 4;
0b67eceb 1232 dev->features |= NETIF_F_NETNS_LOCAL;
108bfa89 1233 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1da177e4
LT
1234}
1235
1236static int ipgre_tunnel_init(struct net_device *dev)
1237{
1da177e4
LT
1238 struct ip_tunnel *tunnel;
1239 struct iphdr *iph;
1da177e4 1240
2941a486 1241 tunnel = netdev_priv(dev);
1da177e4
LT
1242 iph = &tunnel->parms.iph;
1243
1244 tunnel->dev = dev;
1245 strcpy(tunnel->parms.name, dev->name);
1246
1247 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1248 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1249
1da177e4 1250 if (iph->daddr) {
1da177e4 1251#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1252 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1253 if (!iph->saddr)
1254 return -EINVAL;
1255 dev->flags = IFF_BROADCAST;
3b04ddde 1256 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1257 }
1258#endif
ee34c1eb 1259 } else
6a5f44d7 1260 dev->header_ops = &ipgre_header_ops;
1da177e4 1261
1da177e4
LT
1262 return 0;
1263}
1264
b8c26a33 1265static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1266{
2941a486 1267 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1268 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1269 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1270
1271 tunnel->dev = dev;
1272 strcpy(tunnel->parms.name, dev->name);
1273
1274 iph->version = 4;
1275 iph->protocol = IPPROTO_GRE;
1276 iph->ihl = 5;
1277 tunnel->hlen = sizeof(struct iphdr) + 4;
1278
1279 dev_hold(dev);
eb8ce741 1280 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1281}
1282
1283
32613090 1284static const struct net_protocol ipgre_protocol = {
1da177e4
LT
1285 .handler = ipgre_rcv,
1286 .err_handler = ipgre_err,
f96c148f 1287 .netns_ok = 1,
1da177e4
LT
1288};
1289
eef6dd65 1290static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
eb8ce741
PE
1291{
1292 int prio;
1293
1294 for (prio = 0; prio < 4; prio++) {
1295 int h;
1296 for (h = 0; h < HASH_SIZE; h++) {
eef6dd65
ED
1297 struct ip_tunnel *t = ign->tunnels[prio][h];
1298
1299 while (t != NULL) {
1300 unregister_netdevice_queue(t->dev, head);
1301 t = t->next;
1302 }
eb8ce741
PE
1303 }
1304 }
1305}
1306
2c8c1e72 1307static int __net_init ipgre_init_net(struct net *net)
59a4c759 1308{
cfb8fbf2 1309 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
59a4c759 1310 int err;
59a4c759 1311
7daa0004
PE
1312 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1313 ipgre_tunnel_setup);
1314 if (!ign->fb_tunnel_dev) {
1315 err = -ENOMEM;
1316 goto err_alloc_dev;
1317 }
be77e593 1318 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1319
b8c26a33 1320 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1321 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1322
1323 if ((err = register_netdev(ign->fb_tunnel_dev)))
1324 goto err_reg_dev;
1325
59a4c759
PE
1326 return 0;
1327
7daa0004
PE
1328err_reg_dev:
1329 free_netdev(ign->fb_tunnel_dev);
1330err_alloc_dev:
59a4c759
PE
1331 return err;
1332}
1333
2c8c1e72 1334static void __net_exit ipgre_exit_net(struct net *net)
59a4c759
PE
1335{
1336 struct ipgre_net *ign;
eef6dd65 1337 LIST_HEAD(list);
59a4c759
PE
1338
1339 ign = net_generic(net, ipgre_net_id);
7daa0004 1340 rtnl_lock();
eef6dd65
ED
1341 ipgre_destroy_tunnels(ign, &list);
1342 unregister_netdevice_many(&list);
7daa0004 1343 rtnl_unlock();
59a4c759
PE
1344}
1345
1346static struct pernet_operations ipgre_net_ops = {
1347 .init = ipgre_init_net,
1348 .exit = ipgre_exit_net,
cfb8fbf2
EB
1349 .id = &ipgre_net_id,
1350 .size = sizeof(struct ipgre_net),
59a4c759 1351};
1da177e4 1352
c19e654d
HX
1353static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1354{
1355 __be16 flags;
1356
1357 if (!data)
1358 return 0;
1359
1360 flags = 0;
1361 if (data[IFLA_GRE_IFLAGS])
1362 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1363 if (data[IFLA_GRE_OFLAGS])
1364 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1365 if (flags & (GRE_VERSION|GRE_ROUTING))
1366 return -EINVAL;
1367
1368 return 0;
1369}
1370
e1a80002
HX
1371static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1372{
1373 __be32 daddr;
1374
1375 if (tb[IFLA_ADDRESS]) {
1376 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1377 return -EINVAL;
1378 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1379 return -EADDRNOTAVAIL;
1380 }
1381
1382 if (!data)
1383 goto out;
1384
1385 if (data[IFLA_GRE_REMOTE]) {
1386 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1387 if (!daddr)
1388 return -EINVAL;
1389 }
1390
1391out:
1392 return ipgre_tunnel_validate(tb, data);
1393}
1394
c19e654d
HX
1395static void ipgre_netlink_parms(struct nlattr *data[],
1396 struct ip_tunnel_parm *parms)
1397{
7bb82d92 1398 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1399
1400 parms->iph.protocol = IPPROTO_GRE;
1401
1402 if (!data)
1403 return;
1404
1405 if (data[IFLA_GRE_LINK])
1406 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1407
1408 if (data[IFLA_GRE_IFLAGS])
1409 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1410
1411 if (data[IFLA_GRE_OFLAGS])
1412 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1413
1414 if (data[IFLA_GRE_IKEY])
1415 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1416
1417 if (data[IFLA_GRE_OKEY])
1418 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1419
1420 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1421 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1422
1423 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1424 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1425
1426 if (data[IFLA_GRE_TTL])
1427 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1428
1429 if (data[IFLA_GRE_TOS])
1430 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1431
1432 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1433 parms->iph.frag_off = htons(IP_DF);
1434}
1435
e1a80002
HX
1436static int ipgre_tap_init(struct net_device *dev)
1437{
1438 struct ip_tunnel *tunnel;
1439
1440 tunnel = netdev_priv(dev);
1441
1442 tunnel->dev = dev;
1443 strcpy(tunnel->parms.name, dev->name);
1444
1445 ipgre_tunnel_bind_dev(dev);
1446
1447 return 0;
1448}
1449
b8c26a33
SH
1450static const struct net_device_ops ipgre_tap_netdev_ops = {
1451 .ndo_init = ipgre_tap_init,
1452 .ndo_uninit = ipgre_tunnel_uninit,
1453 .ndo_start_xmit = ipgre_tunnel_xmit,
1454 .ndo_set_mac_address = eth_mac_addr,
1455 .ndo_validate_addr = eth_validate_addr,
1456 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1457};
1458
e1a80002
HX
1459static void ipgre_tap_setup(struct net_device *dev)
1460{
1461
1462 ether_setup(dev);
1463
2e9526b3 1464 dev->netdev_ops = &ipgre_tap_netdev_ops;
e1a80002 1465 dev->destructor = free_netdev;
e1a80002
HX
1466
1467 dev->iflink = 0;
1468 dev->features |= NETIF_F_NETNS_LOCAL;
1469}
1470
81adee47 1471static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
c19e654d
HX
1472 struct nlattr *data[])
1473{
1474 struct ip_tunnel *nt;
1475 struct net *net = dev_net(dev);
1476 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1477 int mtu;
1478 int err;
1479
1480 nt = netdev_priv(dev);
1481 ipgre_netlink_parms(data, &nt->parms);
1482
e1a80002 1483 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1484 return -EEXIST;
1485
e1a80002
HX
1486 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1487 random_ether_addr(dev->dev_addr);
1488
c19e654d
HX
1489 mtu = ipgre_tunnel_bind_dev(dev);
1490 if (!tb[IFLA_MTU])
1491 dev->mtu = mtu;
1492
1493 err = register_netdevice(dev);
1494 if (err)
1495 goto out;
1496
1497 dev_hold(dev);
1498 ipgre_tunnel_link(ign, nt);
1499
1500out:
1501 return err;
1502}
1503
1504static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1505 struct nlattr *data[])
1506{
1507 struct ip_tunnel *t, *nt;
1508 struct net *net = dev_net(dev);
1509 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1510 struct ip_tunnel_parm p;
1511 int mtu;
1512
1513 if (dev == ign->fb_tunnel_dev)
1514 return -EINVAL;
1515
1516 nt = netdev_priv(dev);
1517 ipgre_netlink_parms(data, &p);
1518
1519 t = ipgre_tunnel_locate(net, &p, 0);
1520
1521 if (t) {
1522 if (t->dev != dev)
1523 return -EEXIST;
1524 } else {
c19e654d
HX
1525 t = nt;
1526
2e9526b3
HX
1527 if (dev->type != ARPHRD_ETHER) {
1528 unsigned nflags = 0;
c19e654d 1529
2e9526b3
HX
1530 if (ipv4_is_multicast(p.iph.daddr))
1531 nflags = IFF_BROADCAST;
1532 else if (p.iph.daddr)
1533 nflags = IFF_POINTOPOINT;
1534
1535 if ((dev->flags ^ nflags) &
1536 (IFF_POINTOPOINT | IFF_BROADCAST))
1537 return -EINVAL;
1538 }
c19e654d
HX
1539
1540 ipgre_tunnel_unlink(ign, t);
1541 t->parms.iph.saddr = p.iph.saddr;
1542 t->parms.iph.daddr = p.iph.daddr;
1543 t->parms.i_key = p.i_key;
2e9526b3
HX
1544 if (dev->type != ARPHRD_ETHER) {
1545 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1546 memcpy(dev->broadcast, &p.iph.daddr, 4);
1547 }
c19e654d
HX
1548 ipgre_tunnel_link(ign, t);
1549 netdev_state_change(dev);
1550 }
1551
1552 t->parms.o_key = p.o_key;
1553 t->parms.iph.ttl = p.iph.ttl;
1554 t->parms.iph.tos = p.iph.tos;
1555 t->parms.iph.frag_off = p.iph.frag_off;
1556
1557 if (t->parms.link != p.link) {
1558 t->parms.link = p.link;
1559 mtu = ipgre_tunnel_bind_dev(dev);
1560 if (!tb[IFLA_MTU])
1561 dev->mtu = mtu;
1562 netdev_state_change(dev);
1563 }
1564
1565 return 0;
1566}
1567
1568static size_t ipgre_get_size(const struct net_device *dev)
1569{
1570 return
1571 /* IFLA_GRE_LINK */
1572 nla_total_size(4) +
1573 /* IFLA_GRE_IFLAGS */
1574 nla_total_size(2) +
1575 /* IFLA_GRE_OFLAGS */
1576 nla_total_size(2) +
1577 /* IFLA_GRE_IKEY */
1578 nla_total_size(4) +
1579 /* IFLA_GRE_OKEY */
1580 nla_total_size(4) +
1581 /* IFLA_GRE_LOCAL */
1582 nla_total_size(4) +
1583 /* IFLA_GRE_REMOTE */
1584 nla_total_size(4) +
1585 /* IFLA_GRE_TTL */
1586 nla_total_size(1) +
1587 /* IFLA_GRE_TOS */
1588 nla_total_size(1) +
1589 /* IFLA_GRE_PMTUDISC */
1590 nla_total_size(1) +
1591 0;
1592}
1593
1594static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1595{
1596 struct ip_tunnel *t = netdev_priv(dev);
1597 struct ip_tunnel_parm *p = &t->parms;
1598
1599 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1600 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1601 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1602 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1603 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1604 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1605 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1606 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1607 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1608 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1609
1610 return 0;
1611
1612nla_put_failure:
1613 return -EMSGSIZE;
1614}
1615
1616static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1617 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1618 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1619 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1620 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1621 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1622 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1623 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1624 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1625 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1626 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1627};
1628
1629static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1630 .kind = "gre",
1631 .maxtype = IFLA_GRE_MAX,
1632 .policy = ipgre_policy,
1633 .priv_size = sizeof(struct ip_tunnel),
1634 .setup = ipgre_tunnel_setup,
1635 .validate = ipgre_tunnel_validate,
1636 .newlink = ipgre_newlink,
1637 .changelink = ipgre_changelink,
1638 .get_size = ipgre_get_size,
1639 .fill_info = ipgre_fill_info,
1640};
1641
e1a80002
HX
1642static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1643 .kind = "gretap",
1644 .maxtype = IFLA_GRE_MAX,
1645 .policy = ipgre_policy,
1646 .priv_size = sizeof(struct ip_tunnel),
1647 .setup = ipgre_tap_setup,
1648 .validate = ipgre_tap_validate,
1649 .newlink = ipgre_newlink,
1650 .changelink = ipgre_changelink,
1651 .get_size = ipgre_get_size,
1652 .fill_info = ipgre_fill_info,
1653};
1654
1da177e4
LT
1655/*
1656 * And now the modules code and kernel interface.
1657 */
1658
1659static int __init ipgre_init(void)
1660{
1661 int err;
1662
1663 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1664
cfb8fbf2 1665 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1666 if (err < 0)
c2892f02
AD
1667 return err;
1668
1669 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1670 if (err < 0) {
1671 printk(KERN_INFO "ipgre init: can't add protocol\n");
1672 goto add_proto_failed;
1673 }
7daa0004 1674
c19e654d
HX
1675 err = rtnl_link_register(&ipgre_link_ops);
1676 if (err < 0)
1677 goto rtnl_link_failed;
1678
e1a80002
HX
1679 err = rtnl_link_register(&ipgre_tap_ops);
1680 if (err < 0)
1681 goto tap_ops_failed;
1682
c19e654d 1683out:
1da177e4 1684 return err;
c19e654d 1685
e1a80002
HX
1686tap_ops_failed:
1687 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1688rtnl_link_failed:
c19e654d 1689 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
c2892f02
AD
1690add_proto_failed:
1691 unregister_pernet_device(&ipgre_net_ops);
c19e654d 1692 goto out;
1da177e4
LT
1693}
1694
db44575f 1695static void __exit ipgre_fini(void)
1da177e4 1696{
e1a80002 1697 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1698 rtnl_link_unregister(&ipgre_link_ops);
1da177e4
LT
1699 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1700 printk(KERN_INFO "ipgre close: can't remove protocol\n");
c2892f02 1701 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1702}
1703
1704module_init(ipgre_init);
1705module_exit(ipgre_fini);
1706MODULE_LICENSE("GPL");
4d74f8ba
PM
1707MODULE_ALIAS_RTNL_LINK("gre");
1708MODULE_ALIAS_RTNL_LINK("gretap");