]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
iwlwifi: don't include iwl-dev.h from iwl-devtrace.h
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
e1a80002 30#include <linux/etherdevice.h>
46f25dff 31#include <linux/if_ether.h>
1da177e4
LT
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
59a4c759
PE
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
c19e654d 45#include <net/rtnetlink.h>
1da177e4
LT
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
a43912ab 69 Current solution: HARD_TX_LOCK lock breaks dead loops.
1da177e4
LT
70
71
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107
108
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
115
116 Alexey Kuznetsov.
117 */
118
c19e654d 119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 122static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
123
124/* Fallback tunnel: no source, no destination, no key, no options */
125
eb8ce741
PE
126#define HASH_SIZE 16
127
f99189b1 128static int ipgre_net_id __read_mostly;
59a4c759 129struct ipgre_net {
eb8ce741
PE
130 struct ip_tunnel *tunnels[4][HASH_SIZE];
131
7daa0004 132 struct net_device *fb_tunnel_dev;
59a4c759
PE
133};
134
1da177e4
LT
135/* Tunnel hash table */
136
137/*
138 4 hash tables:
139
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
144
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
148
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
151 */
152
d5a0a1e3 153#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 154
eb8ce741
PE
155#define tunnels_r_l tunnels[3]
156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0]
8d5b2c08
ED
159/*
160 * Locking : hash tables are protected by RCU and a spinlock
161 */
162static DEFINE_SPINLOCK(ipgre_lock);
1da177e4 163
8d5b2c08
ED
164#define for_each_ip_tunnel_rcu(start) \
165 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1da177e4
LT
166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
749c10f9 169static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
e1a80002
HX
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
1da177e4 172{
749c10f9
TT
173 struct net *net = dev_net(dev);
174 int link = dev->ifindex;
1da177e4
LT
175 unsigned h0 = HASH(remote);
176 unsigned h1 = HASH(key);
afcf1242 177 struct ip_tunnel *t, *cand = NULL;
7daa0004 178 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
179 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
180 ARPHRD_ETHER : ARPHRD_IPGRE;
afcf1242 181 int score, cand_score = 4;
1da177e4 182
8d5b2c08 183 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
749c10f9
TT
184 if (local != t->parms.iph.saddr ||
185 remote != t->parms.iph.daddr ||
186 key != t->parms.i_key ||
187 !(t->dev->flags & IFF_UP))
188 continue;
189
190 if (t->dev->type != ARPHRD_IPGRE &&
191 t->dev->type != dev_type)
192 continue;
193
afcf1242 194 score = 0;
749c10f9 195 if (t->parms.link != link)
afcf1242 196 score |= 1;
749c10f9 197 if (t->dev->type != dev_type)
afcf1242
TT
198 score |= 2;
199 if (score == 0)
749c10f9 200 return t;
afcf1242
TT
201
202 if (score < cand_score) {
203 cand = t;
204 cand_score = score;
205 }
1da177e4 206 }
e1a80002 207
8d5b2c08 208 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
749c10f9
TT
209 if (remote != t->parms.iph.daddr ||
210 key != t->parms.i_key ||
211 !(t->dev->flags & IFF_UP))
212 continue;
213
214 if (t->dev->type != ARPHRD_IPGRE &&
215 t->dev->type != dev_type)
216 continue;
217
afcf1242 218 score = 0;
749c10f9 219 if (t->parms.link != link)
afcf1242 220 score |= 1;
749c10f9 221 if (t->dev->type != dev_type)
afcf1242
TT
222 score |= 2;
223 if (score == 0)
749c10f9 224 return t;
afcf1242
TT
225
226 if (score < cand_score) {
227 cand = t;
228 cand_score = score;
229 }
1da177e4 230 }
e1a80002 231
8d5b2c08 232 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
749c10f9
TT
233 if ((local != t->parms.iph.saddr &&
234 (local != t->parms.iph.daddr ||
235 !ipv4_is_multicast(local))) ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
238 continue;
239
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
242 continue;
243
afcf1242 244 score = 0;
749c10f9 245 if (t->parms.link != link)
afcf1242 246 score |= 1;
749c10f9 247 if (t->dev->type != dev_type)
afcf1242
TT
248 score |= 2;
249 if (score == 0)
749c10f9 250 return t;
afcf1242
TT
251
252 if (score < cand_score) {
253 cand = t;
254 cand_score = score;
255 }
1da177e4 256 }
e1a80002 257
8d5b2c08 258 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
749c10f9
TT
259 if (t->parms.i_key != key ||
260 !(t->dev->flags & IFF_UP))
261 continue;
262
263 if (t->dev->type != ARPHRD_IPGRE &&
264 t->dev->type != dev_type)
265 continue;
266
afcf1242 267 score = 0;
749c10f9 268 if (t->parms.link != link)
afcf1242 269 score |= 1;
749c10f9 270 if (t->dev->type != dev_type)
afcf1242
TT
271 score |= 2;
272 if (score == 0)
749c10f9 273 return t;
afcf1242
TT
274
275 if (score < cand_score) {
276 cand = t;
277 cand_score = score;
278 }
1da177e4
LT
279 }
280
afcf1242
TT
281 if (cand != NULL)
282 return cand;
e1a80002 283
8d5b2c08
ED
284 dev = ign->fb_tunnel_dev;
285 if (dev->flags & IFF_UP)
286 return netdev_priv(dev);
749c10f9 287
1da177e4
LT
288 return NULL;
289}
290
f57e7d5a
PE
291static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
292 struct ip_tunnel_parm *parms)
1da177e4 293{
5056a1ef
YH
294 __be32 remote = parms->iph.daddr;
295 __be32 local = parms->iph.saddr;
296 __be32 key = parms->i_key;
1da177e4
LT
297 unsigned h = HASH(key);
298 int prio = 0;
299
300 if (local)
301 prio |= 1;
f97c1e0c 302 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
303 prio |= 2;
304 h ^= HASH(remote);
305 }
306
eb8ce741 307 return &ign->tunnels[prio][h];
1da177e4
LT
308}
309
f57e7d5a
PE
310static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
311 struct ip_tunnel *t)
5056a1ef 312{
f57e7d5a 313 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
314}
315
f57e7d5a 316static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 317{
f57e7d5a 318 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4 319
8d5b2c08 320 spin_lock_bh(&ipgre_lock);
1da177e4 321 t->next = *tp;
8d5b2c08
ED
322 rcu_assign_pointer(*tp, t);
323 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
324}
325
f57e7d5a 326static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
327{
328 struct ip_tunnel **tp;
329
f57e7d5a 330 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4 331 if (t == *tp) {
8d5b2c08 332 spin_lock_bh(&ipgre_lock);
1da177e4 333 *tp = t->next;
8d5b2c08 334 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
335 break;
336 }
337 }
338}
339
e1a80002
HX
340static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
341 struct ip_tunnel_parm *parms,
342 int type)
1da177e4 343{
d5a0a1e3
AV
344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
749c10f9 347 int link = parms->link;
e1a80002
HX
348 struct ip_tunnel *t, **tp;
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350
351 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
352 if (local == t->parms.iph.saddr &&
353 remote == t->parms.iph.daddr &&
354 key == t->parms.i_key &&
749c10f9 355 link == t->parms.link &&
e1a80002
HX
356 type == t->dev->type)
357 break;
358
359 return t;
360}
361
362static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
363 struct ip_tunnel_parm *parms, int create)
364{
365 struct ip_tunnel *t, *nt;
1da177e4 366 struct net_device *dev;
1da177e4 367 char name[IFNAMSIZ];
f57e7d5a 368 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 369
e1a80002
HX
370 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
371 if (t || !create)
372 return t;
1da177e4
LT
373
374 if (parms->name[0])
375 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
376 else
377 sprintf(name, "gre%%d");
1da177e4
LT
378
379 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
380 if (!dev)
381 return NULL;
382
0b67eceb
PE
383 dev_net_set(dev, net);
384
b37d428b
PE
385 if (strchr(name, '%')) {
386 if (dev_alloc_name(dev, name) < 0)
387 goto failed_free;
388 }
389
2941a486 390 nt = netdev_priv(dev);
1da177e4 391 nt->parms = *parms;
c19e654d 392 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 393
42aa9162
HX
394 dev->mtu = ipgre_tunnel_bind_dev(dev);
395
b37d428b
PE
396 if (register_netdevice(dev) < 0)
397 goto failed_free;
1da177e4 398
1da177e4 399 dev_hold(dev);
f57e7d5a 400 ipgre_tunnel_link(ign, nt);
1da177e4
LT
401 return nt;
402
b37d428b
PE
403failed_free:
404 free_netdev(dev);
1da177e4
LT
405 return NULL;
406}
407
408static void ipgre_tunnel_uninit(struct net_device *dev)
409{
f57e7d5a
PE
410 struct net *net = dev_net(dev);
411 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
412
413 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
414 dev_put(dev);
415}
416
417
418static void ipgre_err(struct sk_buff *skb, u32 info)
419{
1da177e4 420
071f92d0 421/* All the routers (except for Linux) return only
1da177e4
LT
422 8 bytes of packet payload. It means, that precise relaying of
423 ICMP in the real Internet is absolutely infeasible.
424
425 Moreover, Cisco "wise men" put GRE key to the third word
426 in GRE header. It makes impossible maintaining even soft state for keyed
427 GRE tunnels with enabled checksum. Tell them "thank you".
428
429 Well, I wonder, rfc1812 was written by Cisco employee,
430 what the hell these idiots break standrads established
431 by themself???
432 */
433
6ed2533e 434 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 435 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 436 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
437 const int type = icmp_hdr(skb)->type;
438 const int code = icmp_hdr(skb)->code;
1da177e4 439 struct ip_tunnel *t;
d5a0a1e3 440 __be16 flags;
1da177e4
LT
441
442 flags = p[0];
443 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
444 if (flags&(GRE_VERSION|GRE_ROUTING))
445 return;
446 if (flags&GRE_KEY) {
447 grehlen += 4;
448 if (flags&GRE_CSUM)
449 grehlen += 4;
450 }
451 }
452
453 /* If only 8 bytes returned, keyed message will be dropped here */
454 if (skb_headlen(skb) < grehlen)
455 return;
456
457 switch (type) {
458 default:
459 case ICMP_PARAMETERPROB:
460 return;
461
462 case ICMP_DEST_UNREACH:
463 switch (code) {
464 case ICMP_SR_FAILED:
465 case ICMP_PORT_UNREACH:
466 /* Impossible event. */
467 return;
468 case ICMP_FRAG_NEEDED:
469 /* Soft state for pmtu is maintained by IP core. */
470 return;
471 default:
472 /* All others are translated to HOST_UNREACH.
473 rfc2003 contains "deep thoughts" about NET_UNREACH,
474 I believe they are just ether pollution. --ANK
475 */
476 break;
477 }
478 break;
479 case ICMP_TIME_EXCEEDED:
480 if (code != ICMP_EXC_TTL)
481 return;
482 break;
483 }
484
8d5b2c08 485 rcu_read_lock();
749c10f9 486 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
e1a80002
HX
487 flags & GRE_KEY ?
488 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
489 p[1]);
f97c1e0c
JP
490 if (t == NULL || t->parms.iph.daddr == 0 ||
491 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
492 goto out;
493
494 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
495 goto out;
496
da6185d8 497 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
498 t->err_count++;
499 else
500 t->err_count = 1;
501 t->err_time = jiffies;
502out:
8d5b2c08 503 rcu_read_unlock();
1da177e4 504 return;
1da177e4
LT
505}
506
507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
508{
509 if (INET_ECN_is_ce(iph->tos)) {
510 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 511 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 512 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 513 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
514 }
515 }
516}
517
518static inline u8
519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
520{
521 u8 inner = 0;
522 if (skb->protocol == htons(ETH_P_IP))
523 inner = old_iph->tos;
524 else if (skb->protocol == htons(ETH_P_IPV6))
525 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
526 return INET_ECN_encapsulate(tos, inner);
527}
528
529static int ipgre_rcv(struct sk_buff *skb)
530{
531 struct iphdr *iph;
532 u8 *h;
d5a0a1e3 533 __be16 flags;
d3bc23e7 534 __sum16 csum = 0;
d5a0a1e3 535 __be32 key = 0;
1da177e4
LT
536 u32 seqno = 0;
537 struct ip_tunnel *tunnel;
538 int offset = 4;
e1a80002 539 __be16 gre_proto;
64194c31 540 unsigned int len;
1da177e4
LT
541
542 if (!pskb_may_pull(skb, 16))
543 goto drop_nolock;
544
eddc9ec5 545 iph = ip_hdr(skb);
1da177e4 546 h = skb->data;
d5a0a1e3 547 flags = *(__be16*)h;
1da177e4
LT
548
549 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
550 /* - Version must be 0.
551 - We do not support routing headers.
552 */
553 if (flags&(GRE_VERSION|GRE_ROUTING))
554 goto drop_nolock;
555
556 if (flags&GRE_CSUM) {
fb286bb2 557 switch (skb->ip_summed) {
84fa7933 558 case CHECKSUM_COMPLETE:
d3bc23e7 559 csum = csum_fold(skb->csum);
fb286bb2
HX
560 if (!csum)
561 break;
562 /* fall through */
563 case CHECKSUM_NONE:
564 skb->csum = 0;
565 csum = __skb_checksum_complete(skb);
84fa7933 566 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
567 }
568 offset += 4;
569 }
570 if (flags&GRE_KEY) {
d5a0a1e3 571 key = *(__be32*)(h + offset);
1da177e4
LT
572 offset += 4;
573 }
574 if (flags&GRE_SEQ) {
d5a0a1e3 575 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
576 offset += 4;
577 }
578 }
579
e1a80002
HX
580 gre_proto = *(__be16 *)(h + 2);
581
8d5b2c08 582 rcu_read_lock();
749c10f9 583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
e1a80002
HX
584 iph->saddr, iph->daddr, key,
585 gre_proto))) {
addd68eb
PE
586 struct net_device_stats *stats = &tunnel->dev->stats;
587
1da177e4
LT
588 secpath_reset(skb);
589
e1a80002 590 skb->protocol = gre_proto;
1da177e4
LT
591 /* WCCP version 1 and 2 protocol decoding.
592 * - Change protocol to IP
593 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
594 */
e1a80002 595 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 596 skb->protocol = htons(ETH_P_IP);
e905a9ed 597 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
598 offset += 4;
599 }
600
1d069167 601 skb->mac_header = skb->network_header;
4209fb60 602 __pskb_pull(skb, offset);
9c70220b 603 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
604 skb->pkt_type = PACKET_HOST;
605#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 606 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 607 /* Looped back packet, drop it! */
511c3f92 608 if (skb_rtable(skb)->fl.iif == 0)
1da177e4 609 goto drop;
addd68eb 610 stats->multicast++;
1da177e4
LT
611 skb->pkt_type = PACKET_BROADCAST;
612 }
613#endif
614
615 if (((flags&GRE_CSUM) && csum) ||
616 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
617 stats->rx_crc_errors++;
618 stats->rx_errors++;
1da177e4
LT
619 goto drop;
620 }
621 if (tunnel->parms.i_flags&GRE_SEQ) {
622 if (!(flags&GRE_SEQ) ||
623 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
624 stats->rx_fifo_errors++;
625 stats->rx_errors++;
1da177e4
LT
626 goto drop;
627 }
628 tunnel->i_seqno = seqno + 1;
629 }
e1a80002 630
64194c31
HX
631 len = skb->len;
632
e1a80002
HX
633 /* Warning: All skb pointers will be invalidated! */
634 if (tunnel->dev->type == ARPHRD_ETHER) {
635 if (!pskb_may_pull(skb, ETH_HLEN)) {
636 stats->rx_length_errors++;
637 stats->rx_errors++;
638 goto drop;
639 }
640
641 iph = ip_hdr(skb);
642 skb->protocol = eth_type_trans(skb, tunnel->dev);
643 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
644 }
645
addd68eb 646 stats->rx_packets++;
64194c31 647 stats->rx_bytes += len;
1da177e4 648 skb->dev = tunnel->dev;
adf30907 649 skb_dst_drop(skb);
1da177e4 650 nf_reset(skb);
e1a80002
HX
651
652 skb_reset_network_header(skb);
1da177e4 653 ipgre_ecn_decapsulate(iph, skb);
e1a80002 654
1da177e4 655 netif_rx(skb);
8d5b2c08 656 rcu_read_unlock();
1da177e4
LT
657 return(0);
658 }
45af08be 659 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
660
661drop:
8d5b2c08 662 rcu_read_unlock();
1da177e4
LT
663drop_nolock:
664 kfree_skb(skb);
665 return(0);
666}
667
6fef4c0c 668static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 669{
2941a486 670 struct ip_tunnel *tunnel = netdev_priv(dev);
0bfbedb1
ED
671 struct net_device_stats *stats = &dev->stats;
672 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
eddc9ec5 673 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
674 struct iphdr *tiph;
675 u8 tos;
d5a0a1e3 676 __be16 df;
1da177e4
LT
677 struct rtable *rt; /* Route to the other host */
678 struct net_device *tdev; /* Device to other host */
679 struct iphdr *iph; /* Our new IP header */
c2636b4d 680 unsigned int max_headroom; /* The extra header space needed */
1da177e4 681 int gre_hlen;
d5a0a1e3 682 __be32 dst;
1da177e4
LT
683 int mtu;
684
e1a80002
HX
685 if (dev->type == ARPHRD_ETHER)
686 IPCB(skb)->flags = 0;
687
688 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 689 gre_hlen = 0;
6ed2533e 690 tiph = (struct iphdr *)skb->data;
1da177e4
LT
691 } else {
692 gre_hlen = tunnel->hlen;
693 tiph = &tunnel->parms.iph;
694 }
695
696 if ((dst = tiph->daddr) == 0) {
697 /* NBMA tunnel */
698
adf30907 699 if (skb_dst(skb) == NULL) {
addd68eb 700 stats->tx_fifo_errors++;
1da177e4
LT
701 goto tx_error;
702 }
703
704 if (skb->protocol == htons(ETH_P_IP)) {
511c3f92 705 rt = skb_rtable(skb);
1da177e4
LT
706 if ((dst = rt->rt_gateway) == 0)
707 goto tx_error_icmp;
708 }
709#ifdef CONFIG_IPV6
710 else if (skb->protocol == htons(ETH_P_IPV6)) {
711 struct in6_addr *addr6;
712 int addr_type;
adf30907 713 struct neighbour *neigh = skb_dst(skb)->neighbour;
1da177e4
LT
714
715 if (neigh == NULL)
716 goto tx_error;
717
6ed2533e 718 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
719 addr_type = ipv6_addr_type(addr6);
720
721 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 722 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
723 addr_type = ipv6_addr_type(addr6);
724 }
725
726 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
727 goto tx_error_icmp;
728
729 dst = addr6->s6_addr32[3];
730 }
731#endif
732 else
733 goto tx_error;
734 }
735
736 tos = tiph->tos;
ee686ca9
AJ
737 if (tos == 1) {
738 tos = 0;
1da177e4
LT
739 if (skb->protocol == htons(ETH_P_IP))
740 tos = old_iph->tos;
1da177e4
LT
741 }
742
743 {
744 struct flowi fl = { .oif = tunnel->parms.link,
745 .nl_u = { .ip4_u =
746 { .daddr = dst,
747 .saddr = tiph->saddr,
748 .tos = RT_TOS(tos) } },
749 .proto = IPPROTO_GRE };
96635522 750 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 751 stats->tx_carrier_errors++;
1da177e4
LT
752 goto tx_error;
753 }
754 }
755 tdev = rt->u.dst.dev;
756
757 if (tdev == dev) {
758 ip_rt_put(rt);
addd68eb 759 stats->collisions++;
1da177e4
LT
760 goto tx_error;
761 }
762
763 df = tiph->frag_off;
764 if (df)
c95b819a 765 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4 766 else
adf30907 767 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
1da177e4 768
adf30907
ED
769 if (skb_dst(skb))
770 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4
LT
771
772 if (skb->protocol == htons(ETH_P_IP)) {
773 df |= (old_iph->frag_off&htons(IP_DF));
774
775 if ((old_iph->frag_off&htons(IP_DF)) &&
776 mtu < ntohs(old_iph->tot_len)) {
777 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
778 ip_rt_put(rt);
779 goto tx_error;
780 }
781 }
782#ifdef CONFIG_IPV6
783 else if (skb->protocol == htons(ETH_P_IPV6)) {
adf30907 784 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
1da177e4 785
adf30907 786 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
787 if ((tunnel->parms.iph.daddr &&
788 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
789 rt6->rt6i_dst.plen == 128) {
790 rt6->rt6i_flags |= RTF_MODIFIED;
adf30907 791 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
1da177e4
LT
792 }
793 }
794
795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
3ffe533c 796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1da177e4
LT
797 ip_rt_put(rt);
798 goto tx_error;
799 }
800 }
801#endif
802
803 if (tunnel->err_count > 0) {
da6185d8
WY
804 if (time_before(jiffies,
805 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
806 tunnel->err_count--;
807
808 dst_link_failure(skb);
809 } else
810 tunnel->err_count = 0;
811 }
812
243aad83 813 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
1da177e4 814
cfbba49d
PM
815 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4 817 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
243aad83
TT
818 if (max_headroom > dev->needed_headroom)
819 dev->needed_headroom = max_headroom;
1da177e4
LT
820 if (!new_skb) {
821 ip_rt_put(rt);
0bfbedb1 822 txq->tx_dropped++;
1da177e4 823 dev_kfree_skb(skb);
6ed10654 824 return NETDEV_TX_OK;
1da177e4
LT
825 }
826 if (skb->sk)
827 skb_set_owner_w(new_skb, skb->sk);
828 dev_kfree_skb(skb);
829 skb = new_skb;
eddc9ec5 830 old_iph = ip_hdr(skb);
1da177e4
LT
831 }
832
64194c31 833 skb_reset_transport_header(skb);
e2d1bca7
ACM
834 skb_push(skb, gre_hlen);
835 skb_reset_network_header(skb);
1da177e4 836 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
837 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
838 IPSKB_REROUTED);
adf30907
ED
839 skb_dst_drop(skb);
840 skb_dst_set(skb, &rt->u.dst);
1da177e4
LT
841
842 /*
843 * Push down and install the IPIP header.
844 */
845
eddc9ec5 846 iph = ip_hdr(skb);
1da177e4
LT
847 iph->version = 4;
848 iph->ihl = sizeof(struct iphdr) >> 2;
849 iph->frag_off = df;
850 iph->protocol = IPPROTO_GRE;
851 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
852 iph->daddr = rt->rt_dst;
853 iph->saddr = rt->rt_src;
854
855 if ((iph->ttl = tiph->ttl) == 0) {
856 if (skb->protocol == htons(ETH_P_IP))
857 iph->ttl = old_iph->ttl;
858#ifdef CONFIG_IPV6
859 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 860 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
861#endif
862 else
863 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
864 }
865
e1a80002
HX
866 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
867 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
868 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
869
870 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 871 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
872
873 if (tunnel->parms.o_flags&GRE_SEQ) {
874 ++tunnel->o_seqno;
875 *ptr = htonl(tunnel->o_seqno);
876 ptr--;
877 }
878 if (tunnel->parms.o_flags&GRE_KEY) {
879 *ptr = tunnel->parms.o_key;
880 ptr--;
881 }
882 if (tunnel->parms.o_flags&GRE_CSUM) {
883 *ptr = 0;
5f92a738 884 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
885 }
886 }
887
888 nf_reset(skb);
889
890 IPTUNNEL_XMIT();
6ed10654 891 return NETDEV_TX_OK;
1da177e4
LT
892
893tx_error_icmp:
894 dst_link_failure(skb);
895
896tx_error:
897 stats->tx_errors++;
898 dev_kfree_skb(skb);
6ed10654 899 return NETDEV_TX_OK;
1da177e4
LT
900}
901
42aa9162 902static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
903{
904 struct net_device *tdev = NULL;
905 struct ip_tunnel *tunnel;
906 struct iphdr *iph;
907 int hlen = LL_MAX_HEADER;
908 int mtu = ETH_DATA_LEN;
909 int addend = sizeof(struct iphdr) + 4;
910
911 tunnel = netdev_priv(dev);
912 iph = &tunnel->parms.iph;
913
c95b819a 914 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
915
916 if (iph->daddr) {
917 struct flowi fl = { .oif = tunnel->parms.link,
918 .nl_u = { .ip4_u =
919 { .daddr = iph->daddr,
920 .saddr = iph->saddr,
921 .tos = RT_TOS(iph->tos) } },
922 .proto = IPPROTO_GRE };
923 struct rtable *rt;
96635522 924 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
925 tdev = rt->u.dst.dev;
926 ip_rt_put(rt);
927 }
e1a80002
HX
928
929 if (dev->type != ARPHRD_ETHER)
930 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
931 }
932
933 if (!tdev && tunnel->parms.link)
96635522 934 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
935
936 if (tdev) {
c95b819a 937 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
938 mtu = tdev->mtu;
939 }
940 dev->iflink = tunnel->parms.link;
941
942 /* Precalculate GRE options length */
943 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
944 if (tunnel->parms.o_flags&GRE_CSUM)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_KEY)
947 addend += 4;
948 if (tunnel->parms.o_flags&GRE_SEQ)
949 addend += 4;
950 }
c95b819a 951 dev->needed_headroom = addend + hlen;
8cdb0456 952 mtu -= dev->hard_header_len + addend;
42aa9162
HX
953
954 if (mtu < 68)
955 mtu = 68;
956
ee34c1eb
MS
957 tunnel->hlen = addend;
958
42aa9162 959 return mtu;
ee34c1eb
MS
960}
961
1da177e4
LT
962static int
963ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
964{
965 int err = 0;
966 struct ip_tunnel_parm p;
967 struct ip_tunnel *t;
f57e7d5a
PE
968 struct net *net = dev_net(dev);
969 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
970
971 switch (cmd) {
972 case SIOCGETTUNNEL:
973 t = NULL;
7daa0004 974 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
975 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
976 err = -EFAULT;
977 break;
978 }
f57e7d5a 979 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
980 }
981 if (t == NULL)
2941a486 982 t = netdev_priv(dev);
1da177e4
LT
983 memcpy(&p, &t->parms, sizeof(p));
984 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
985 err = -EFAULT;
986 break;
987
988 case SIOCADDTUNNEL:
989 case SIOCCHGTUNNEL:
990 err = -EPERM;
991 if (!capable(CAP_NET_ADMIN))
992 goto done;
993
994 err = -EFAULT;
995 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
996 goto done;
997
998 err = -EINVAL;
999 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1000 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1001 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1002 goto done;
1003 if (p.iph.ttl)
1004 p.iph.frag_off |= htons(IP_DF);
1005
1006 if (!(p.i_flags&GRE_KEY))
1007 p.i_key = 0;
1008 if (!(p.o_flags&GRE_KEY))
1009 p.o_key = 0;
1010
f57e7d5a 1011 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1012
7daa0004 1013 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1014 if (t != NULL) {
1015 if (t->dev != dev) {
1016 err = -EEXIST;
1017 break;
1018 }
1019 } else {
6ed2533e 1020 unsigned nflags = 0;
1da177e4 1021
2941a486 1022 t = netdev_priv(dev);
1da177e4 1023
f97c1e0c 1024 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1025 nflags = IFF_BROADCAST;
1026 else if (p.iph.daddr)
1027 nflags = IFF_POINTOPOINT;
1028
1029 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1030 err = -EINVAL;
1031 break;
1032 }
f57e7d5a 1033 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1034 t->parms.iph.saddr = p.iph.saddr;
1035 t->parms.iph.daddr = p.iph.daddr;
1036 t->parms.i_key = p.i_key;
1037 t->parms.o_key = p.o_key;
1038 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1039 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1040 ipgre_tunnel_link(ign, t);
1da177e4
LT
1041 netdev_state_change(dev);
1042 }
1043 }
1044
1045 if (t) {
1046 err = 0;
1047 if (cmd == SIOCCHGTUNNEL) {
1048 t->parms.iph.ttl = p.iph.ttl;
1049 t->parms.iph.tos = p.iph.tos;
1050 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1051 if (t->parms.link != p.link) {
1052 t->parms.link = p.link;
42aa9162 1053 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1054 netdev_state_change(dev);
1055 }
1da177e4
LT
1056 }
1057 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1058 err = -EFAULT;
1059 } else
1060 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1061 break;
1062
1063 case SIOCDELTUNNEL:
1064 err = -EPERM;
1065 if (!capable(CAP_NET_ADMIN))
1066 goto done;
1067
7daa0004 1068 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1069 err = -EFAULT;
1070 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1071 goto done;
1072 err = -ENOENT;
f57e7d5a 1073 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1074 goto done;
1075 err = -EPERM;
7daa0004 1076 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1077 goto done;
1078 dev = t->dev;
1079 }
22f8cde5
SH
1080 unregister_netdevice(dev);
1081 err = 0;
1da177e4
LT
1082 break;
1083
1084 default:
1085 err = -EINVAL;
1086 }
1087
1088done:
1089 return err;
1090}
1091
1da177e4
LT
1092static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1093{
2941a486 1094 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1095 if (new_mtu < 68 ||
1096 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1097 return -EINVAL;
1098 dev->mtu = new_mtu;
1099 return 0;
1100}
1101
1da177e4
LT
1102/* Nice toy. Unfortunately, useless in real life :-)
1103 It allows to construct virtual multiprotocol broadcast "LAN"
1104 over the Internet, provided multicast routing is tuned.
1105
1106
1107 I have no idea was this bicycle invented before me,
1108 so that I had to set ARPHRD_IPGRE to a random value.
1109 I have an impression, that Cisco could make something similar,
1110 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1111
1da177e4
LT
1112 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1113 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1114
1115 ping -t 255 224.66.66.66
1116
1117 If nobody answers, mbone does not work.
1118
1119 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1120 ip addr add 10.66.66.<somewhat>/24 dev Universe
1121 ifconfig Universe up
1122 ifconfig Universe add fe80::<Your_real_addr>/10
1123 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1124 ftp 10.66.66.66
1125 ...
1126 ftp fec0:6666:6666::193.233.7.65
1127 ...
1128
1129 */
1130
3b04ddde
SH
1131static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1132 unsigned short type,
1133 const void *daddr, const void *saddr, unsigned len)
1da177e4 1134{
2941a486 1135 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1136 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1137 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1138
1139 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1140 p[0] = t->parms.o_flags;
1141 p[1] = htons(type);
1142
1143 /*
e905a9ed 1144 * Set the source hardware address.
1da177e4 1145 */
e905a9ed 1146
1da177e4
LT
1147 if (saddr)
1148 memcpy(&iph->saddr, saddr, 4);
6d55cb91 1149 if (daddr)
1da177e4 1150 memcpy(&iph->daddr, daddr, 4);
6d55cb91 1151 if (iph->daddr)
1da177e4 1152 return t->hlen;
e905a9ed 1153
1da177e4
LT
1154 return -t->hlen;
1155}
1156
6a5f44d7
TT
1157static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1158{
6ed2533e 1159 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1160 memcpy(haddr, &iph->saddr, 4);
1161 return 4;
1162}
1163
3b04ddde
SH
1164static const struct header_ops ipgre_header_ops = {
1165 .create = ipgre_header,
6a5f44d7 1166 .parse = ipgre_header_parse,
3b04ddde
SH
1167};
1168
6a5f44d7 1169#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1170static int ipgre_open(struct net_device *dev)
1171{
2941a486 1172 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1173
f97c1e0c 1174 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1175 struct flowi fl = { .oif = t->parms.link,
1176 .nl_u = { .ip4_u =
1177 { .daddr = t->parms.iph.daddr,
1178 .saddr = t->parms.iph.saddr,
1179 .tos = RT_TOS(t->parms.iph.tos) } },
1180 .proto = IPPROTO_GRE };
1181 struct rtable *rt;
96635522 1182 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1183 return -EADDRNOTAVAIL;
1184 dev = rt->u.dst.dev;
1185 ip_rt_put(rt);
e5ed6399 1186 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1187 return -EADDRNOTAVAIL;
1188 t->mlink = dev->ifindex;
e5ed6399 1189 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1190 }
1191 return 0;
1192}
1193
1194static int ipgre_close(struct net_device *dev)
1195{
2941a486 1196 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1197
f97c1e0c 1198 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1199 struct in_device *in_dev;
c346dca1 1200 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1201 if (in_dev) {
1202 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1203 in_dev_put(in_dev);
1204 }
1205 }
1206 return 0;
1207}
1208
1209#endif
1210
b8c26a33
SH
1211static const struct net_device_ops ipgre_netdev_ops = {
1212 .ndo_init = ipgre_tunnel_init,
1213 .ndo_uninit = ipgre_tunnel_uninit,
1214#ifdef CONFIG_NET_IPGRE_BROADCAST
1215 .ndo_open = ipgre_open,
1216 .ndo_stop = ipgre_close,
1217#endif
1218 .ndo_start_xmit = ipgre_tunnel_xmit,
1219 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1220 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1221};
1222
1da177e4
LT
1223static void ipgre_tunnel_setup(struct net_device *dev)
1224{
b8c26a33 1225 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1226 dev->destructor = free_netdev;
1da177e4
LT
1227
1228 dev->type = ARPHRD_IPGRE;
c95b819a 1229 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1230 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1231 dev->flags = IFF_NOARP;
1232 dev->iflink = 0;
1233 dev->addr_len = 4;
0b67eceb 1234 dev->features |= NETIF_F_NETNS_LOCAL;
108bfa89 1235 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1da177e4
LT
1236}
1237
1238static int ipgre_tunnel_init(struct net_device *dev)
1239{
1da177e4
LT
1240 struct ip_tunnel *tunnel;
1241 struct iphdr *iph;
1da177e4 1242
2941a486 1243 tunnel = netdev_priv(dev);
1da177e4
LT
1244 iph = &tunnel->parms.iph;
1245
1246 tunnel->dev = dev;
1247 strcpy(tunnel->parms.name, dev->name);
1248
1249 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1250 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1251
1da177e4 1252 if (iph->daddr) {
1da177e4 1253#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1254 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1255 if (!iph->saddr)
1256 return -EINVAL;
1257 dev->flags = IFF_BROADCAST;
3b04ddde 1258 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1259 }
1260#endif
ee34c1eb 1261 } else
6a5f44d7 1262 dev->header_ops = &ipgre_header_ops;
1da177e4 1263
1da177e4
LT
1264 return 0;
1265}
1266
b8c26a33 1267static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1268{
2941a486 1269 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1270 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1271 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1272
1273 tunnel->dev = dev;
1274 strcpy(tunnel->parms.name, dev->name);
1275
1276 iph->version = 4;
1277 iph->protocol = IPPROTO_GRE;
1278 iph->ihl = 5;
1279 tunnel->hlen = sizeof(struct iphdr) + 4;
1280
1281 dev_hold(dev);
eb8ce741 1282 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1283}
1284
1285
32613090 1286static const struct net_protocol ipgre_protocol = {
1da177e4
LT
1287 .handler = ipgre_rcv,
1288 .err_handler = ipgre_err,
f96c148f 1289 .netns_ok = 1,
1da177e4
LT
1290};
1291
eef6dd65 1292static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
eb8ce741
PE
1293{
1294 int prio;
1295
1296 for (prio = 0; prio < 4; prio++) {
1297 int h;
1298 for (h = 0; h < HASH_SIZE; h++) {
eef6dd65
ED
1299 struct ip_tunnel *t = ign->tunnels[prio][h];
1300
1301 while (t != NULL) {
1302 unregister_netdevice_queue(t->dev, head);
1303 t = t->next;
1304 }
eb8ce741
PE
1305 }
1306 }
1307}
1308
2c8c1e72 1309static int __net_init ipgre_init_net(struct net *net)
59a4c759 1310{
cfb8fbf2 1311 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
59a4c759 1312 int err;
59a4c759 1313
7daa0004
PE
1314 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1315 ipgre_tunnel_setup);
1316 if (!ign->fb_tunnel_dev) {
1317 err = -ENOMEM;
1318 goto err_alloc_dev;
1319 }
be77e593 1320 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1321
b8c26a33 1322 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1323 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1324
1325 if ((err = register_netdev(ign->fb_tunnel_dev)))
1326 goto err_reg_dev;
1327
59a4c759
PE
1328 return 0;
1329
7daa0004
PE
1330err_reg_dev:
1331 free_netdev(ign->fb_tunnel_dev);
1332err_alloc_dev:
59a4c759
PE
1333 return err;
1334}
1335
2c8c1e72 1336static void __net_exit ipgre_exit_net(struct net *net)
59a4c759
PE
1337{
1338 struct ipgre_net *ign;
eef6dd65 1339 LIST_HEAD(list);
59a4c759
PE
1340
1341 ign = net_generic(net, ipgre_net_id);
7daa0004 1342 rtnl_lock();
eef6dd65
ED
1343 ipgre_destroy_tunnels(ign, &list);
1344 unregister_netdevice_many(&list);
7daa0004 1345 rtnl_unlock();
59a4c759
PE
1346}
1347
1348static struct pernet_operations ipgre_net_ops = {
1349 .init = ipgre_init_net,
1350 .exit = ipgre_exit_net,
cfb8fbf2
EB
1351 .id = &ipgre_net_id,
1352 .size = sizeof(struct ipgre_net),
59a4c759 1353};
1da177e4 1354
c19e654d
HX
1355static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1356{
1357 __be16 flags;
1358
1359 if (!data)
1360 return 0;
1361
1362 flags = 0;
1363 if (data[IFLA_GRE_IFLAGS])
1364 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1365 if (data[IFLA_GRE_OFLAGS])
1366 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1367 if (flags & (GRE_VERSION|GRE_ROUTING))
1368 return -EINVAL;
1369
1370 return 0;
1371}
1372
e1a80002
HX
1373static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1374{
1375 __be32 daddr;
1376
1377 if (tb[IFLA_ADDRESS]) {
1378 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1379 return -EINVAL;
1380 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1381 return -EADDRNOTAVAIL;
1382 }
1383
1384 if (!data)
1385 goto out;
1386
1387 if (data[IFLA_GRE_REMOTE]) {
1388 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1389 if (!daddr)
1390 return -EINVAL;
1391 }
1392
1393out:
1394 return ipgre_tunnel_validate(tb, data);
1395}
1396
c19e654d
HX
1397static void ipgre_netlink_parms(struct nlattr *data[],
1398 struct ip_tunnel_parm *parms)
1399{
7bb82d92 1400 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1401
1402 parms->iph.protocol = IPPROTO_GRE;
1403
1404 if (!data)
1405 return;
1406
1407 if (data[IFLA_GRE_LINK])
1408 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1409
1410 if (data[IFLA_GRE_IFLAGS])
1411 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1412
1413 if (data[IFLA_GRE_OFLAGS])
1414 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1415
1416 if (data[IFLA_GRE_IKEY])
1417 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1418
1419 if (data[IFLA_GRE_OKEY])
1420 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1421
1422 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1423 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1424
1425 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1426 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1427
1428 if (data[IFLA_GRE_TTL])
1429 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1430
1431 if (data[IFLA_GRE_TOS])
1432 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1433
1434 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1435 parms->iph.frag_off = htons(IP_DF);
1436}
1437
e1a80002
HX
1438static int ipgre_tap_init(struct net_device *dev)
1439{
1440 struct ip_tunnel *tunnel;
1441
1442 tunnel = netdev_priv(dev);
1443
1444 tunnel->dev = dev;
1445 strcpy(tunnel->parms.name, dev->name);
1446
1447 ipgre_tunnel_bind_dev(dev);
1448
1449 return 0;
1450}
1451
b8c26a33
SH
1452static const struct net_device_ops ipgre_tap_netdev_ops = {
1453 .ndo_init = ipgre_tap_init,
1454 .ndo_uninit = ipgre_tunnel_uninit,
1455 .ndo_start_xmit = ipgre_tunnel_xmit,
1456 .ndo_set_mac_address = eth_mac_addr,
1457 .ndo_validate_addr = eth_validate_addr,
1458 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1459};
1460
e1a80002
HX
1461static void ipgre_tap_setup(struct net_device *dev)
1462{
1463
1464 ether_setup(dev);
1465
2e9526b3 1466 dev->netdev_ops = &ipgre_tap_netdev_ops;
e1a80002 1467 dev->destructor = free_netdev;
e1a80002
HX
1468
1469 dev->iflink = 0;
1470 dev->features |= NETIF_F_NETNS_LOCAL;
1471}
1472
81adee47 1473static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
c19e654d
HX
1474 struct nlattr *data[])
1475{
1476 struct ip_tunnel *nt;
1477 struct net *net = dev_net(dev);
1478 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1479 int mtu;
1480 int err;
1481
1482 nt = netdev_priv(dev);
1483 ipgre_netlink_parms(data, &nt->parms);
1484
e1a80002 1485 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1486 return -EEXIST;
1487
e1a80002
HX
1488 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1489 random_ether_addr(dev->dev_addr);
1490
c19e654d
HX
1491 mtu = ipgre_tunnel_bind_dev(dev);
1492 if (!tb[IFLA_MTU])
1493 dev->mtu = mtu;
1494
1495 err = register_netdevice(dev);
1496 if (err)
1497 goto out;
1498
1499 dev_hold(dev);
1500 ipgre_tunnel_link(ign, nt);
1501
1502out:
1503 return err;
1504}
1505
1506static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1507 struct nlattr *data[])
1508{
1509 struct ip_tunnel *t, *nt;
1510 struct net *net = dev_net(dev);
1511 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1512 struct ip_tunnel_parm p;
1513 int mtu;
1514
1515 if (dev == ign->fb_tunnel_dev)
1516 return -EINVAL;
1517
1518 nt = netdev_priv(dev);
1519 ipgre_netlink_parms(data, &p);
1520
1521 t = ipgre_tunnel_locate(net, &p, 0);
1522
1523 if (t) {
1524 if (t->dev != dev)
1525 return -EEXIST;
1526 } else {
c19e654d
HX
1527 t = nt;
1528
2e9526b3
HX
1529 if (dev->type != ARPHRD_ETHER) {
1530 unsigned nflags = 0;
c19e654d 1531
2e9526b3
HX
1532 if (ipv4_is_multicast(p.iph.daddr))
1533 nflags = IFF_BROADCAST;
1534 else if (p.iph.daddr)
1535 nflags = IFF_POINTOPOINT;
1536
1537 if ((dev->flags ^ nflags) &
1538 (IFF_POINTOPOINT | IFF_BROADCAST))
1539 return -EINVAL;
1540 }
c19e654d
HX
1541
1542 ipgre_tunnel_unlink(ign, t);
1543 t->parms.iph.saddr = p.iph.saddr;
1544 t->parms.iph.daddr = p.iph.daddr;
1545 t->parms.i_key = p.i_key;
2e9526b3
HX
1546 if (dev->type != ARPHRD_ETHER) {
1547 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1548 memcpy(dev->broadcast, &p.iph.daddr, 4);
1549 }
c19e654d
HX
1550 ipgre_tunnel_link(ign, t);
1551 netdev_state_change(dev);
1552 }
1553
1554 t->parms.o_key = p.o_key;
1555 t->parms.iph.ttl = p.iph.ttl;
1556 t->parms.iph.tos = p.iph.tos;
1557 t->parms.iph.frag_off = p.iph.frag_off;
1558
1559 if (t->parms.link != p.link) {
1560 t->parms.link = p.link;
1561 mtu = ipgre_tunnel_bind_dev(dev);
1562 if (!tb[IFLA_MTU])
1563 dev->mtu = mtu;
1564 netdev_state_change(dev);
1565 }
1566
1567 return 0;
1568}
1569
1570static size_t ipgre_get_size(const struct net_device *dev)
1571{
1572 return
1573 /* IFLA_GRE_LINK */
1574 nla_total_size(4) +
1575 /* IFLA_GRE_IFLAGS */
1576 nla_total_size(2) +
1577 /* IFLA_GRE_OFLAGS */
1578 nla_total_size(2) +
1579 /* IFLA_GRE_IKEY */
1580 nla_total_size(4) +
1581 /* IFLA_GRE_OKEY */
1582 nla_total_size(4) +
1583 /* IFLA_GRE_LOCAL */
1584 nla_total_size(4) +
1585 /* IFLA_GRE_REMOTE */
1586 nla_total_size(4) +
1587 /* IFLA_GRE_TTL */
1588 nla_total_size(1) +
1589 /* IFLA_GRE_TOS */
1590 nla_total_size(1) +
1591 /* IFLA_GRE_PMTUDISC */
1592 nla_total_size(1) +
1593 0;
1594}
1595
1596static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1597{
1598 struct ip_tunnel *t = netdev_priv(dev);
1599 struct ip_tunnel_parm *p = &t->parms;
1600
1601 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1602 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1603 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1604 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1605 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1606 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1607 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1608 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1609 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1610 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1611
1612 return 0;
1613
1614nla_put_failure:
1615 return -EMSGSIZE;
1616}
1617
1618static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1619 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1620 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1621 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1622 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1623 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1624 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1625 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1626 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1627 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1628 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1629};
1630
1631static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1632 .kind = "gre",
1633 .maxtype = IFLA_GRE_MAX,
1634 .policy = ipgre_policy,
1635 .priv_size = sizeof(struct ip_tunnel),
1636 .setup = ipgre_tunnel_setup,
1637 .validate = ipgre_tunnel_validate,
1638 .newlink = ipgre_newlink,
1639 .changelink = ipgre_changelink,
1640 .get_size = ipgre_get_size,
1641 .fill_info = ipgre_fill_info,
1642};
1643
e1a80002
HX
1644static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1645 .kind = "gretap",
1646 .maxtype = IFLA_GRE_MAX,
1647 .policy = ipgre_policy,
1648 .priv_size = sizeof(struct ip_tunnel),
1649 .setup = ipgre_tap_setup,
1650 .validate = ipgre_tap_validate,
1651 .newlink = ipgre_newlink,
1652 .changelink = ipgre_changelink,
1653 .get_size = ipgre_get_size,
1654 .fill_info = ipgre_fill_info,
1655};
1656
1da177e4
LT
1657/*
1658 * And now the modules code and kernel interface.
1659 */
1660
1661static int __init ipgre_init(void)
1662{
1663 int err;
1664
1665 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1666
cfb8fbf2 1667 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1668 if (err < 0)
c2892f02
AD
1669 return err;
1670
1671 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1672 if (err < 0) {
1673 printk(KERN_INFO "ipgre init: can't add protocol\n");
1674 goto add_proto_failed;
1675 }
7daa0004 1676
c19e654d
HX
1677 err = rtnl_link_register(&ipgre_link_ops);
1678 if (err < 0)
1679 goto rtnl_link_failed;
1680
e1a80002
HX
1681 err = rtnl_link_register(&ipgre_tap_ops);
1682 if (err < 0)
1683 goto tap_ops_failed;
1684
c19e654d 1685out:
1da177e4 1686 return err;
c19e654d 1687
e1a80002
HX
1688tap_ops_failed:
1689 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1690rtnl_link_failed:
c19e654d 1691 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
c2892f02
AD
1692add_proto_failed:
1693 unregister_pernet_device(&ipgre_net_ops);
c19e654d 1694 goto out;
1da177e4
LT
1695}
1696
db44575f 1697static void __exit ipgre_fini(void)
1da177e4 1698{
e1a80002 1699 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1700 rtnl_link_unregister(&ipgre_link_ops);
1da177e4
LT
1701 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1702 printk(KERN_INFO "ipgre close: can't remove protocol\n");
c2892f02 1703 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1704}
1705
1706module_init(ipgre_init);
1707module_exit(ipgre_fini);
1708MODULE_LICENSE("GPL");
4d74f8ba
PM
1709MODULE_ALIAS_RTNL_LINK("gre");
1710MODULE_ALIAS_RTNL_LINK("gretap");