]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
qlcnic: dont assume NET_IP_ALIGN is 2
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4 16#include <linux/kernel.h>
5a0e3ad6 17#include <linux/slab.h>
1da177e4
LT
18#include <asm/uaccess.h>
19#include <linux/skbuff.h>
20#include <linux/netdevice.h>
21#include <linux/in.h>
22#include <linux/tcp.h>
23#include <linux/udp.h>
24#include <linux/if_arp.h>
25#include <linux/mroute.h>
26#include <linux/init.h>
27#include <linux/in6.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/netfilter_ipv4.h>
e1a80002 31#include <linux/etherdevice.h>
46f25dff 32#include <linux/if_ether.h>
1da177e4
LT
33
34#include <net/sock.h>
35#include <net/ip.h>
36#include <net/icmp.h>
37#include <net/protocol.h>
38#include <net/ipip.h>
39#include <net/arp.h>
40#include <net/checksum.h>
41#include <net/dsfield.h>
42#include <net/inet_ecn.h>
43#include <net/xfrm.h>
59a4c759
PE
44#include <net/net_namespace.h>
45#include <net/netns/generic.h>
c19e654d 46#include <net/rtnetlink.h>
1da177e4
LT
47
48#ifdef CONFIG_IPV6
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#endif
53
54/*
55 Problems & solutions
56 --------------------
57
58 1. The most important issue is detecting local dead loops.
59 They would cause complete host lockup in transmit, which
60 would be "resolved" by stack overflow or, if queueing is enabled,
61 with infinite looping in net_bh.
62
63 We cannot track such dead loops during route installation,
64 it is infeasible task. The most general solutions would be
65 to keep skb->encapsulation counter (sort of local ttl),
66 and silently drop packet when it expires. It is the best
67 solution, but it supposes maintaing new variable in ALL
68 skb, even if no tunneling is used.
69
a43912ab 70 Current solution: HARD_TX_LOCK lock breaks dead loops.
1da177e4
LT
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
c19e654d 120static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
121static int ipgre_tunnel_init(struct net_device *dev);
122static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 123static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
124
125/* Fallback tunnel: no source, no destination, no key, no options */
126
eb8ce741
PE
127#define HASH_SIZE 16
128
f99189b1 129static int ipgre_net_id __read_mostly;
59a4c759 130struct ipgre_net {
eb8ce741
PE
131 struct ip_tunnel *tunnels[4][HASH_SIZE];
132
7daa0004 133 struct net_device *fb_tunnel_dev;
59a4c759
PE
134};
135
1da177e4
LT
136/* Tunnel hash table */
137
138/*
139 4 hash tables:
140
141 3: (remote,local)
142 2: (remote,*)
143 1: (*,local)
144 0: (*,*)
145
146 We require exact key match i.e. if a key is present in packet
147 it will match only tunnel with the same key; if it is not present,
148 it will match only keyless tunnel.
149
150 All keysless packets, if not matched configured keyless tunnels
151 will match fallback tunnel.
152 */
153
d5a0a1e3 154#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 155
eb8ce741
PE
156#define tunnels_r_l tunnels[3]
157#define tunnels_r tunnels[2]
158#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0]
8d5b2c08
ED
160/*
161 * Locking : hash tables are protected by RCU and a spinlock
162 */
163static DEFINE_SPINLOCK(ipgre_lock);
1da177e4 164
8d5b2c08
ED
165#define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1da177e4
LT
167
168/* Given src, dst and key, find appropriate for input tunnel. */
169
749c10f9 170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
e1a80002
HX
171 __be32 remote, __be32 local,
172 __be32 key, __be16 gre_proto)
1da177e4 173{
749c10f9
TT
174 struct net *net = dev_net(dev);
175 int link = dev->ifindex;
1da177e4
LT
176 unsigned h0 = HASH(remote);
177 unsigned h1 = HASH(key);
afcf1242 178 struct ip_tunnel *t, *cand = NULL;
7daa0004 179 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181 ARPHRD_ETHER : ARPHRD_IPGRE;
afcf1242 182 int score, cand_score = 4;
1da177e4 183
8d5b2c08 184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
749c10f9
TT
185 if (local != t->parms.iph.saddr ||
186 remote != t->parms.iph.daddr ||
187 key != t->parms.i_key ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (t->dev->type != ARPHRD_IPGRE &&
192 t->dev->type != dev_type)
193 continue;
194
afcf1242 195 score = 0;
749c10f9 196 if (t->parms.link != link)
afcf1242 197 score |= 1;
749c10f9 198 if (t->dev->type != dev_type)
afcf1242
TT
199 score |= 2;
200 if (score == 0)
749c10f9 201 return t;
afcf1242
TT
202
203 if (score < cand_score) {
204 cand = t;
205 cand_score = score;
206 }
1da177e4 207 }
e1a80002 208
8d5b2c08 209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
749c10f9
TT
210 if (remote != t->parms.iph.daddr ||
211 key != t->parms.i_key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->dev->type != ARPHRD_IPGRE &&
216 t->dev->type != dev_type)
217 continue;
218
afcf1242 219 score = 0;
749c10f9 220 if (t->parms.link != link)
afcf1242 221 score |= 1;
749c10f9 222 if (t->dev->type != dev_type)
afcf1242
TT
223 score |= 2;
224 if (score == 0)
749c10f9 225 return t;
afcf1242
TT
226
227 if (score < cand_score) {
228 cand = t;
229 cand_score = score;
230 }
1da177e4 231 }
e1a80002 232
8d5b2c08 233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
749c10f9
TT
234 if ((local != t->parms.iph.saddr &&
235 (local != t->parms.iph.daddr ||
236 !ipv4_is_multicast(local))) ||
237 key != t->parms.i_key ||
238 !(t->dev->flags & IFF_UP))
239 continue;
240
241 if (t->dev->type != ARPHRD_IPGRE &&
242 t->dev->type != dev_type)
243 continue;
244
afcf1242 245 score = 0;
749c10f9 246 if (t->parms.link != link)
afcf1242 247 score |= 1;
749c10f9 248 if (t->dev->type != dev_type)
afcf1242
TT
249 score |= 2;
250 if (score == 0)
749c10f9 251 return t;
afcf1242
TT
252
253 if (score < cand_score) {
254 cand = t;
255 cand_score = score;
256 }
1da177e4 257 }
e1a80002 258
8d5b2c08 259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
749c10f9
TT
260 if (t->parms.i_key != key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
263
264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type)
266 continue;
267
afcf1242 268 score = 0;
749c10f9 269 if (t->parms.link != link)
afcf1242 270 score |= 1;
749c10f9 271 if (t->dev->type != dev_type)
afcf1242
TT
272 score |= 2;
273 if (score == 0)
749c10f9 274 return t;
afcf1242
TT
275
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
279 }
1da177e4
LT
280 }
281
afcf1242
TT
282 if (cand != NULL)
283 return cand;
e1a80002 284
8d5b2c08
ED
285 dev = ign->fb_tunnel_dev;
286 if (dev->flags & IFF_UP)
287 return netdev_priv(dev);
749c10f9 288
1da177e4
LT
289 return NULL;
290}
291
f57e7d5a
PE
292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms)
1da177e4 294{
5056a1ef
YH
295 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key;
1da177e4
LT
298 unsigned h = HASH(key);
299 int prio = 0;
300
301 if (local)
302 prio |= 1;
f97c1e0c 303 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
304 prio |= 2;
305 h ^= HASH(remote);
306 }
307
eb8ce741 308 return &ign->tunnels[prio][h];
1da177e4
LT
309}
310
f57e7d5a
PE
311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t)
5056a1ef 313{
f57e7d5a 314 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
315}
316
f57e7d5a 317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 318{
f57e7d5a 319 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4 320
8d5b2c08 321 spin_lock_bh(&ipgre_lock);
1da177e4 322 t->next = *tp;
8d5b2c08
ED
323 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
325}
326
f57e7d5a 327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
328{
329 struct ip_tunnel **tp;
330
f57e7d5a 331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4 332 if (t == *tp) {
8d5b2c08 333 spin_lock_bh(&ipgre_lock);
1da177e4 334 *tp = t->next;
8d5b2c08 335 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
336 break;
337 }
338 }
339}
340
e1a80002
HX
341static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
342 struct ip_tunnel_parm *parms,
343 int type)
1da177e4 344{
d5a0a1e3
AV
345 __be32 remote = parms->iph.daddr;
346 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key;
749c10f9 348 int link = parms->link;
e1a80002
HX
349 struct ip_tunnel *t, **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
353 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key &&
749c10f9 356 link == t->parms.link &&
e1a80002
HX
357 type == t->dev->type)
358 break;
359
360 return t;
361}
362
363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create)
365{
366 struct ip_tunnel *t, *nt;
1da177e4 367 struct net_device *dev;
1da177e4 368 char name[IFNAMSIZ];
f57e7d5a 369 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 370
e1a80002
HX
371 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
372 if (t || !create)
373 return t;
1da177e4
LT
374
375 if (parms->name[0])
376 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
377 else
378 sprintf(name, "gre%%d");
1da177e4
LT
379
380 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
381 if (!dev)
382 return NULL;
383
0b67eceb
PE
384 dev_net_set(dev, net);
385
b37d428b
PE
386 if (strchr(name, '%')) {
387 if (dev_alloc_name(dev, name) < 0)
388 goto failed_free;
389 }
390
2941a486 391 nt = netdev_priv(dev);
1da177e4 392 nt->parms = *parms;
c19e654d 393 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 394
42aa9162
HX
395 dev->mtu = ipgre_tunnel_bind_dev(dev);
396
b37d428b
PE
397 if (register_netdevice(dev) < 0)
398 goto failed_free;
1da177e4 399
1da177e4 400 dev_hold(dev);
f57e7d5a 401 ipgre_tunnel_link(ign, nt);
1da177e4
LT
402 return nt;
403
b37d428b
PE
404failed_free:
405 free_netdev(dev);
1da177e4
LT
406 return NULL;
407}
408
409static void ipgre_tunnel_uninit(struct net_device *dev)
410{
f57e7d5a
PE
411 struct net *net = dev_net(dev);
412 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
413
414 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
415 dev_put(dev);
416}
417
418
419static void ipgre_err(struct sk_buff *skb, u32 info)
420{
1da177e4 421
071f92d0 422/* All the routers (except for Linux) return only
1da177e4
LT
423 8 bytes of packet payload. It means, that precise relaying of
424 ICMP in the real Internet is absolutely infeasible.
425
426 Moreover, Cisco "wise men" put GRE key to the third word
427 in GRE header. It makes impossible maintaining even soft state for keyed
428 GRE tunnels with enabled checksum. Tell them "thank you".
429
430 Well, I wonder, rfc1812 was written by Cisco employee,
431 what the hell these idiots break standrads established
432 by themself???
433 */
434
6ed2533e 435 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 436 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 437 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
438 const int type = icmp_hdr(skb)->type;
439 const int code = icmp_hdr(skb)->code;
1da177e4 440 struct ip_tunnel *t;
d5a0a1e3 441 __be16 flags;
1da177e4
LT
442
443 flags = p[0];
444 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
445 if (flags&(GRE_VERSION|GRE_ROUTING))
446 return;
447 if (flags&GRE_KEY) {
448 grehlen += 4;
449 if (flags&GRE_CSUM)
450 grehlen += 4;
451 }
452 }
453
454 /* If only 8 bytes returned, keyed message will be dropped here */
455 if (skb_headlen(skb) < grehlen)
456 return;
457
458 switch (type) {
459 default:
460 case ICMP_PARAMETERPROB:
461 return;
462
463 case ICMP_DEST_UNREACH:
464 switch (code) {
465 case ICMP_SR_FAILED:
466 case ICMP_PORT_UNREACH:
467 /* Impossible event. */
468 return;
469 case ICMP_FRAG_NEEDED:
470 /* Soft state for pmtu is maintained by IP core. */
471 return;
472 default:
473 /* All others are translated to HOST_UNREACH.
474 rfc2003 contains "deep thoughts" about NET_UNREACH,
475 I believe they are just ether pollution. --ANK
476 */
477 break;
478 }
479 break;
480 case ICMP_TIME_EXCEEDED:
481 if (code != ICMP_EXC_TTL)
482 return;
483 break;
484 }
485
8d5b2c08 486 rcu_read_lock();
749c10f9 487 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
e1a80002
HX
488 flags & GRE_KEY ?
489 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
490 p[1]);
f97c1e0c
JP
491 if (t == NULL || t->parms.iph.daddr == 0 ||
492 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
493 goto out;
494
495 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
496 goto out;
497
da6185d8 498 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
499 t->err_count++;
500 else
501 t->err_count = 1;
502 t->err_time = jiffies;
503out:
8d5b2c08 504 rcu_read_unlock();
1da177e4
LT
505}
506
507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
508{
509 if (INET_ECN_is_ce(iph->tos)) {
510 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 511 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 512 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 513 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
514 }
515 }
516}
517
518static inline u8
519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
520{
521 u8 inner = 0;
522 if (skb->protocol == htons(ETH_P_IP))
523 inner = old_iph->tos;
524 else if (skb->protocol == htons(ETH_P_IPV6))
525 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
526 return INET_ECN_encapsulate(tos, inner);
527}
528
529static int ipgre_rcv(struct sk_buff *skb)
530{
531 struct iphdr *iph;
532 u8 *h;
d5a0a1e3 533 __be16 flags;
d3bc23e7 534 __sum16 csum = 0;
d5a0a1e3 535 __be32 key = 0;
1da177e4
LT
536 u32 seqno = 0;
537 struct ip_tunnel *tunnel;
538 int offset = 4;
e1a80002 539 __be16 gre_proto;
1da177e4
LT
540
541 if (!pskb_may_pull(skb, 16))
542 goto drop_nolock;
543
eddc9ec5 544 iph = ip_hdr(skb);
1da177e4 545 h = skb->data;
d5a0a1e3 546 flags = *(__be16*)h;
1da177e4
LT
547
548 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
549 /* - Version must be 0.
550 - We do not support routing headers.
551 */
552 if (flags&(GRE_VERSION|GRE_ROUTING))
553 goto drop_nolock;
554
555 if (flags&GRE_CSUM) {
fb286bb2 556 switch (skb->ip_summed) {
84fa7933 557 case CHECKSUM_COMPLETE:
d3bc23e7 558 csum = csum_fold(skb->csum);
fb286bb2
HX
559 if (!csum)
560 break;
561 /* fall through */
562 case CHECKSUM_NONE:
563 skb->csum = 0;
564 csum = __skb_checksum_complete(skb);
84fa7933 565 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
566 }
567 offset += 4;
568 }
569 if (flags&GRE_KEY) {
d5a0a1e3 570 key = *(__be32*)(h + offset);
1da177e4
LT
571 offset += 4;
572 }
573 if (flags&GRE_SEQ) {
d5a0a1e3 574 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
575 offset += 4;
576 }
577 }
578
e1a80002
HX
579 gre_proto = *(__be16 *)(h + 2);
580
8d5b2c08 581 rcu_read_lock();
749c10f9 582 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
e1a80002
HX
583 iph->saddr, iph->daddr, key,
584 gre_proto))) {
addd68eb
PE
585 struct net_device_stats *stats = &tunnel->dev->stats;
586
1da177e4
LT
587 secpath_reset(skb);
588
e1a80002 589 skb->protocol = gre_proto;
1da177e4
LT
590 /* WCCP version 1 and 2 protocol decoding.
591 * - Change protocol to IP
592 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
593 */
e1a80002 594 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 595 skb->protocol = htons(ETH_P_IP);
e905a9ed 596 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
597 offset += 4;
598 }
599
1d069167 600 skb->mac_header = skb->network_header;
4209fb60 601 __pskb_pull(skb, offset);
9c70220b 602 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
603 skb->pkt_type = PACKET_HOST;
604#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 605 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 606 /* Looped back packet, drop it! */
511c3f92 607 if (skb_rtable(skb)->fl.iif == 0)
1da177e4 608 goto drop;
addd68eb 609 stats->multicast++;
1da177e4
LT
610 skb->pkt_type = PACKET_BROADCAST;
611 }
612#endif
613
614 if (((flags&GRE_CSUM) && csum) ||
615 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
616 stats->rx_crc_errors++;
617 stats->rx_errors++;
1da177e4
LT
618 goto drop;
619 }
620 if (tunnel->parms.i_flags&GRE_SEQ) {
621 if (!(flags&GRE_SEQ) ||
622 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
623 stats->rx_fifo_errors++;
624 stats->rx_errors++;
1da177e4
LT
625 goto drop;
626 }
627 tunnel->i_seqno = seqno + 1;
628 }
e1a80002
HX
629
630 /* Warning: All skb pointers will be invalidated! */
631 if (tunnel->dev->type == ARPHRD_ETHER) {
632 if (!pskb_may_pull(skb, ETH_HLEN)) {
633 stats->rx_length_errors++;
634 stats->rx_errors++;
635 goto drop;
636 }
637
638 iph = ip_hdr(skb);
639 skb->protocol = eth_type_trans(skb, tunnel->dev);
640 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
641 }
642
d19d56dd 643 skb_tunnel_rx(skb, tunnel->dev);
e1a80002
HX
644
645 skb_reset_network_header(skb);
1da177e4 646 ipgre_ecn_decapsulate(iph, skb);
e1a80002 647
1da177e4 648 netif_rx(skb);
8d5b2c08 649 rcu_read_unlock();
1da177e4
LT
650 return(0);
651 }
45af08be 652 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
653
654drop:
8d5b2c08 655 rcu_read_unlock();
1da177e4
LT
656drop_nolock:
657 kfree_skb(skb);
658 return(0);
659}
660
6fef4c0c 661static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 662{
2941a486 663 struct ip_tunnel *tunnel = netdev_priv(dev);
0bfbedb1
ED
664 struct net_device_stats *stats = &dev->stats;
665 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
eddc9ec5 666 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
667 struct iphdr *tiph;
668 u8 tos;
d5a0a1e3 669 __be16 df;
1da177e4
LT
670 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */
c2636b4d 673 unsigned int max_headroom; /* The extra header space needed */
1da177e4 674 int gre_hlen;
d5a0a1e3 675 __be32 dst;
1da177e4
LT
676 int mtu;
677
e1a80002
HX
678 if (dev->type == ARPHRD_ETHER)
679 IPCB(skb)->flags = 0;
680
681 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 682 gre_hlen = 0;
6ed2533e 683 tiph = (struct iphdr *)skb->data;
1da177e4
LT
684 } else {
685 gre_hlen = tunnel->hlen;
686 tiph = &tunnel->parms.iph;
687 }
688
689 if ((dst = tiph->daddr) == 0) {
690 /* NBMA tunnel */
691
adf30907 692 if (skb_dst(skb) == NULL) {
addd68eb 693 stats->tx_fifo_errors++;
1da177e4
LT
694 goto tx_error;
695 }
696
697 if (skb->protocol == htons(ETH_P_IP)) {
511c3f92 698 rt = skb_rtable(skb);
1da177e4
LT
699 if ((dst = rt->rt_gateway) == 0)
700 goto tx_error_icmp;
701 }
702#ifdef CONFIG_IPV6
703 else if (skb->protocol == htons(ETH_P_IPV6)) {
704 struct in6_addr *addr6;
705 int addr_type;
adf30907 706 struct neighbour *neigh = skb_dst(skb)->neighbour;
1da177e4
LT
707
708 if (neigh == NULL)
709 goto tx_error;
710
6ed2533e 711 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
712 addr_type = ipv6_addr_type(addr6);
713
714 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 715 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
716 addr_type = ipv6_addr_type(addr6);
717 }
718
719 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
720 goto tx_error_icmp;
721
722 dst = addr6->s6_addr32[3];
723 }
724#endif
725 else
726 goto tx_error;
727 }
728
729 tos = tiph->tos;
ee686ca9
AJ
730 if (tos == 1) {
731 tos = 0;
1da177e4
LT
732 if (skb->protocol == htons(ETH_P_IP))
733 tos = old_iph->tos;
dd4ba83d
SH
734 else if (skb->protocol == htons(ETH_P_IPV6))
735 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
1da177e4
LT
736 }
737
738 {
739 struct flowi fl = { .oif = tunnel->parms.link,
740 .nl_u = { .ip4_u =
741 { .daddr = dst,
742 .saddr = tiph->saddr,
743 .tos = RT_TOS(tos) } },
744 .proto = IPPROTO_GRE };
96635522 745 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 746 stats->tx_carrier_errors++;
1da177e4
LT
747 goto tx_error;
748 }
749 }
d8d1f30b 750 tdev = rt->dst.dev;
1da177e4
LT
751
752 if (tdev == dev) {
753 ip_rt_put(rt);
addd68eb 754 stats->collisions++;
1da177e4
LT
755 goto tx_error;
756 }
757
758 df = tiph->frag_off;
759 if (df)
d8d1f30b 760 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
1da177e4 761 else
adf30907 762 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
1da177e4 763
adf30907
ED
764 if (skb_dst(skb))
765 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4
LT
766
767 if (skb->protocol == htons(ETH_P_IP)) {
768 df |= (old_iph->frag_off&htons(IP_DF));
769
770 if ((old_iph->frag_off&htons(IP_DF)) &&
771 mtu < ntohs(old_iph->tot_len)) {
772 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
773 ip_rt_put(rt);
774 goto tx_error;
775 }
776 }
777#ifdef CONFIG_IPV6
778 else if (skb->protocol == htons(ETH_P_IPV6)) {
adf30907 779 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
1da177e4 780
adf30907 781 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
782 if ((tunnel->parms.iph.daddr &&
783 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
784 rt6->rt6i_dst.plen == 128) {
785 rt6->rt6i_flags |= RTF_MODIFIED;
adf30907 786 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
1da177e4
LT
787 }
788 }
789
790 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
3ffe533c 791 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1da177e4
LT
792 ip_rt_put(rt);
793 goto tx_error;
794 }
795 }
796#endif
797
798 if (tunnel->err_count > 0) {
da6185d8
WY
799 if (time_before(jiffies,
800 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
801 tunnel->err_count--;
802
803 dst_link_failure(skb);
804 } else
805 tunnel->err_count = 0;
806 }
807
d8d1f30b 808 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
1da177e4 809
cfbba49d
PM
810 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
811 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4 812 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
243aad83
TT
813 if (max_headroom > dev->needed_headroom)
814 dev->needed_headroom = max_headroom;
1da177e4
LT
815 if (!new_skb) {
816 ip_rt_put(rt);
0bfbedb1 817 txq->tx_dropped++;
1da177e4 818 dev_kfree_skb(skb);
6ed10654 819 return NETDEV_TX_OK;
1da177e4
LT
820 }
821 if (skb->sk)
822 skb_set_owner_w(new_skb, skb->sk);
823 dev_kfree_skb(skb);
824 skb = new_skb;
eddc9ec5 825 old_iph = ip_hdr(skb);
1da177e4
LT
826 }
827
64194c31 828 skb_reset_transport_header(skb);
e2d1bca7
ACM
829 skb_push(skb, gre_hlen);
830 skb_reset_network_header(skb);
1da177e4 831 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
832 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
833 IPSKB_REROUTED);
adf30907 834 skb_dst_drop(skb);
d8d1f30b 835 skb_dst_set(skb, &rt->dst);
1da177e4
LT
836
837 /*
838 * Push down and install the IPIP header.
839 */
840
eddc9ec5 841 iph = ip_hdr(skb);
1da177e4
LT
842 iph->version = 4;
843 iph->ihl = sizeof(struct iphdr) >> 2;
844 iph->frag_off = df;
845 iph->protocol = IPPROTO_GRE;
846 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
847 iph->daddr = rt->rt_dst;
848 iph->saddr = rt->rt_src;
849
850 if ((iph->ttl = tiph->ttl) == 0) {
851 if (skb->protocol == htons(ETH_P_IP))
852 iph->ttl = old_iph->ttl;
853#ifdef CONFIG_IPV6
854 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 855 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
856#endif
857 else
d8d1f30b 858 iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
1da177e4
LT
859 }
860
e1a80002
HX
861 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
862 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
863 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
864
865 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 866 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
867
868 if (tunnel->parms.o_flags&GRE_SEQ) {
869 ++tunnel->o_seqno;
870 *ptr = htonl(tunnel->o_seqno);
871 ptr--;
872 }
873 if (tunnel->parms.o_flags&GRE_KEY) {
874 *ptr = tunnel->parms.o_key;
875 ptr--;
876 }
877 if (tunnel->parms.o_flags&GRE_CSUM) {
878 *ptr = 0;
5f92a738 879 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
880 }
881 }
882
883 nf_reset(skb);
884
885 IPTUNNEL_XMIT();
6ed10654 886 return NETDEV_TX_OK;
1da177e4
LT
887
888tx_error_icmp:
889 dst_link_failure(skb);
890
891tx_error:
892 stats->tx_errors++;
893 dev_kfree_skb(skb);
6ed10654 894 return NETDEV_TX_OK;
1da177e4
LT
895}
896
42aa9162 897static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
898{
899 struct net_device *tdev = NULL;
900 struct ip_tunnel *tunnel;
901 struct iphdr *iph;
902 int hlen = LL_MAX_HEADER;
903 int mtu = ETH_DATA_LEN;
904 int addend = sizeof(struct iphdr) + 4;
905
906 tunnel = netdev_priv(dev);
907 iph = &tunnel->parms.iph;
908
c95b819a 909 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
910
911 if (iph->daddr) {
912 struct flowi fl = { .oif = tunnel->parms.link,
913 .nl_u = { .ip4_u =
914 { .daddr = iph->daddr,
915 .saddr = iph->saddr,
916 .tos = RT_TOS(iph->tos) } },
917 .proto = IPPROTO_GRE };
918 struct rtable *rt;
96635522 919 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
d8d1f30b 920 tdev = rt->dst.dev;
ee34c1eb
MS
921 ip_rt_put(rt);
922 }
e1a80002
HX
923
924 if (dev->type != ARPHRD_ETHER)
925 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
926 }
927
928 if (!tdev && tunnel->parms.link)
96635522 929 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
930
931 if (tdev) {
c95b819a 932 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
933 mtu = tdev->mtu;
934 }
935 dev->iflink = tunnel->parms.link;
936
937 /* Precalculate GRE options length */
938 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
939 if (tunnel->parms.o_flags&GRE_CSUM)
940 addend += 4;
941 if (tunnel->parms.o_flags&GRE_KEY)
942 addend += 4;
943 if (tunnel->parms.o_flags&GRE_SEQ)
944 addend += 4;
945 }
c95b819a 946 dev->needed_headroom = addend + hlen;
8cdb0456 947 mtu -= dev->hard_header_len + addend;
42aa9162
HX
948
949 if (mtu < 68)
950 mtu = 68;
951
ee34c1eb
MS
952 tunnel->hlen = addend;
953
42aa9162 954 return mtu;
ee34c1eb
MS
955}
956
1da177e4
LT
957static int
958ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
959{
960 int err = 0;
961 struct ip_tunnel_parm p;
962 struct ip_tunnel *t;
f57e7d5a
PE
963 struct net *net = dev_net(dev);
964 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
965
966 switch (cmd) {
967 case SIOCGETTUNNEL:
968 t = NULL;
7daa0004 969 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
970 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
971 err = -EFAULT;
972 break;
973 }
f57e7d5a 974 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
975 }
976 if (t == NULL)
2941a486 977 t = netdev_priv(dev);
1da177e4
LT
978 memcpy(&p, &t->parms, sizeof(p));
979 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
980 err = -EFAULT;
981 break;
982
983 case SIOCADDTUNNEL:
984 case SIOCCHGTUNNEL:
985 err = -EPERM;
986 if (!capable(CAP_NET_ADMIN))
987 goto done;
988
989 err = -EFAULT;
990 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
991 goto done;
992
993 err = -EINVAL;
994 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
995 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
996 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
997 goto done;
998 if (p.iph.ttl)
999 p.iph.frag_off |= htons(IP_DF);
1000
1001 if (!(p.i_flags&GRE_KEY))
1002 p.i_key = 0;
1003 if (!(p.o_flags&GRE_KEY))
1004 p.o_key = 0;
1005
f57e7d5a 1006 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1007
7daa0004 1008 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1009 if (t != NULL) {
1010 if (t->dev != dev) {
1011 err = -EEXIST;
1012 break;
1013 }
1014 } else {
6ed2533e 1015 unsigned nflags = 0;
1da177e4 1016
2941a486 1017 t = netdev_priv(dev);
1da177e4 1018
f97c1e0c 1019 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1020 nflags = IFF_BROADCAST;
1021 else if (p.iph.daddr)
1022 nflags = IFF_POINTOPOINT;
1023
1024 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1025 err = -EINVAL;
1026 break;
1027 }
f57e7d5a 1028 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1029 t->parms.iph.saddr = p.iph.saddr;
1030 t->parms.iph.daddr = p.iph.daddr;
1031 t->parms.i_key = p.i_key;
1032 t->parms.o_key = p.o_key;
1033 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1034 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1035 ipgre_tunnel_link(ign, t);
1da177e4
LT
1036 netdev_state_change(dev);
1037 }
1038 }
1039
1040 if (t) {
1041 err = 0;
1042 if (cmd == SIOCCHGTUNNEL) {
1043 t->parms.iph.ttl = p.iph.ttl;
1044 t->parms.iph.tos = p.iph.tos;
1045 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1046 if (t->parms.link != p.link) {
1047 t->parms.link = p.link;
42aa9162 1048 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1049 netdev_state_change(dev);
1050 }
1da177e4
LT
1051 }
1052 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1053 err = -EFAULT;
1054 } else
1055 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1056 break;
1057
1058 case SIOCDELTUNNEL:
1059 err = -EPERM;
1060 if (!capable(CAP_NET_ADMIN))
1061 goto done;
1062
7daa0004 1063 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1064 err = -EFAULT;
1065 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1066 goto done;
1067 err = -ENOENT;
f57e7d5a 1068 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1069 goto done;
1070 err = -EPERM;
7daa0004 1071 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1072 goto done;
1073 dev = t->dev;
1074 }
22f8cde5
SH
1075 unregister_netdevice(dev);
1076 err = 0;
1da177e4
LT
1077 break;
1078
1079 default:
1080 err = -EINVAL;
1081 }
1082
1083done:
1084 return err;
1085}
1086
1da177e4
LT
1087static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1088{
2941a486 1089 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1090 if (new_mtu < 68 ||
1091 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1092 return -EINVAL;
1093 dev->mtu = new_mtu;
1094 return 0;
1095}
1096
1da177e4
LT
1097/* Nice toy. Unfortunately, useless in real life :-)
1098 It allows to construct virtual multiprotocol broadcast "LAN"
1099 over the Internet, provided multicast routing is tuned.
1100
1101
1102 I have no idea was this bicycle invented before me,
1103 so that I had to set ARPHRD_IPGRE to a random value.
1104 I have an impression, that Cisco could make something similar,
1105 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1106
1da177e4
LT
1107 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1108 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1109
1110 ping -t 255 224.66.66.66
1111
1112 If nobody answers, mbone does not work.
1113
1114 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1115 ip addr add 10.66.66.<somewhat>/24 dev Universe
1116 ifconfig Universe up
1117 ifconfig Universe add fe80::<Your_real_addr>/10
1118 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1119 ftp 10.66.66.66
1120 ...
1121 ftp fec0:6666:6666::193.233.7.65
1122 ...
1123
1124 */
1125
3b04ddde
SH
1126static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127 unsigned short type,
1128 const void *daddr, const void *saddr, unsigned len)
1da177e4 1129{
2941a486 1130 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1131 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1132 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1133
1134 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1135 p[0] = t->parms.o_flags;
1136 p[1] = htons(type);
1137
1138 /*
e905a9ed 1139 * Set the source hardware address.
1da177e4 1140 */
e905a9ed 1141
1da177e4
LT
1142 if (saddr)
1143 memcpy(&iph->saddr, saddr, 4);
6d55cb91 1144 if (daddr)
1da177e4 1145 memcpy(&iph->daddr, daddr, 4);
6d55cb91 1146 if (iph->daddr)
1da177e4 1147 return t->hlen;
e905a9ed 1148
1da177e4
LT
1149 return -t->hlen;
1150}
1151
6a5f44d7
TT
1152static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1153{
6ed2533e 1154 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1155 memcpy(haddr, &iph->saddr, 4);
1156 return 4;
1157}
1158
3b04ddde
SH
1159static const struct header_ops ipgre_header_ops = {
1160 .create = ipgre_header,
6a5f44d7 1161 .parse = ipgre_header_parse,
3b04ddde
SH
1162};
1163
6a5f44d7 1164#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1165static int ipgre_open(struct net_device *dev)
1166{
2941a486 1167 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1168
f97c1e0c 1169 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1170 struct flowi fl = { .oif = t->parms.link,
1171 .nl_u = { .ip4_u =
1172 { .daddr = t->parms.iph.daddr,
1173 .saddr = t->parms.iph.saddr,
1174 .tos = RT_TOS(t->parms.iph.tos) } },
1175 .proto = IPPROTO_GRE };
1176 struct rtable *rt;
96635522 1177 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4 1178 return -EADDRNOTAVAIL;
d8d1f30b 1179 dev = rt->dst.dev;
1da177e4 1180 ip_rt_put(rt);
e5ed6399 1181 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1182 return -EADDRNOTAVAIL;
1183 t->mlink = dev->ifindex;
e5ed6399 1184 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1185 }
1186 return 0;
1187}
1188
1189static int ipgre_close(struct net_device *dev)
1190{
2941a486 1191 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1192
f97c1e0c 1193 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1194 struct in_device *in_dev;
c346dca1 1195 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1196 if (in_dev) {
1197 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1198 in_dev_put(in_dev);
1199 }
1200 }
1201 return 0;
1202}
1203
1204#endif
1205
b8c26a33
SH
1206static const struct net_device_ops ipgre_netdev_ops = {
1207 .ndo_init = ipgre_tunnel_init,
1208 .ndo_uninit = ipgre_tunnel_uninit,
1209#ifdef CONFIG_NET_IPGRE_BROADCAST
1210 .ndo_open = ipgre_open,
1211 .ndo_stop = ipgre_close,
1212#endif
1213 .ndo_start_xmit = ipgre_tunnel_xmit,
1214 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1215 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1216};
1217
1da177e4
LT
1218static void ipgre_tunnel_setup(struct net_device *dev)
1219{
b8c26a33 1220 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1221 dev->destructor = free_netdev;
1da177e4
LT
1222
1223 dev->type = ARPHRD_IPGRE;
c95b819a 1224 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1225 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1226 dev->flags = IFF_NOARP;
1227 dev->iflink = 0;
1228 dev->addr_len = 4;
0b67eceb 1229 dev->features |= NETIF_F_NETNS_LOCAL;
108bfa89 1230 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1da177e4
LT
1231}
1232
1233static int ipgre_tunnel_init(struct net_device *dev)
1234{
1da177e4
LT
1235 struct ip_tunnel *tunnel;
1236 struct iphdr *iph;
1da177e4 1237
2941a486 1238 tunnel = netdev_priv(dev);
1da177e4
LT
1239 iph = &tunnel->parms.iph;
1240
1241 tunnel->dev = dev;
1242 strcpy(tunnel->parms.name, dev->name);
1243
1244 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1245 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1246
1da177e4 1247 if (iph->daddr) {
1da177e4 1248#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1249 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1250 if (!iph->saddr)
1251 return -EINVAL;
1252 dev->flags = IFF_BROADCAST;
3b04ddde 1253 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1254 }
1255#endif
ee34c1eb 1256 } else
6a5f44d7 1257 dev->header_ops = &ipgre_header_ops;
1da177e4 1258
1da177e4
LT
1259 return 0;
1260}
1261
b8c26a33 1262static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1263{
2941a486 1264 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1265 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1266 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1267
1268 tunnel->dev = dev;
1269 strcpy(tunnel->parms.name, dev->name);
1270
1271 iph->version = 4;
1272 iph->protocol = IPPROTO_GRE;
1273 iph->ihl = 5;
1274 tunnel->hlen = sizeof(struct iphdr) + 4;
1275
1276 dev_hold(dev);
eb8ce741 1277 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1278}
1279
1280
32613090 1281static const struct net_protocol ipgre_protocol = {
1da177e4
LT
1282 .handler = ipgre_rcv,
1283 .err_handler = ipgre_err,
f96c148f 1284 .netns_ok = 1,
1da177e4
LT
1285};
1286
eef6dd65 1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
eb8ce741
PE
1288{
1289 int prio;
1290
1291 for (prio = 0; prio < 4; prio++) {
1292 int h;
1293 for (h = 0; h < HASH_SIZE; h++) {
eef6dd65
ED
1294 struct ip_tunnel *t = ign->tunnels[prio][h];
1295
1296 while (t != NULL) {
1297 unregister_netdevice_queue(t->dev, head);
1298 t = t->next;
1299 }
eb8ce741
PE
1300 }
1301 }
1302}
1303
2c8c1e72 1304static int __net_init ipgre_init_net(struct net *net)
59a4c759 1305{
cfb8fbf2 1306 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
59a4c759 1307 int err;
59a4c759 1308
7daa0004
PE
1309 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1310 ipgre_tunnel_setup);
1311 if (!ign->fb_tunnel_dev) {
1312 err = -ENOMEM;
1313 goto err_alloc_dev;
1314 }
be77e593 1315 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1316
b8c26a33 1317 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1318 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1319
1320 if ((err = register_netdev(ign->fb_tunnel_dev)))
1321 goto err_reg_dev;
1322
59a4c759
PE
1323 return 0;
1324
7daa0004
PE
1325err_reg_dev:
1326 free_netdev(ign->fb_tunnel_dev);
1327err_alloc_dev:
59a4c759
PE
1328 return err;
1329}
1330
2c8c1e72 1331static void __net_exit ipgre_exit_net(struct net *net)
59a4c759
PE
1332{
1333 struct ipgre_net *ign;
eef6dd65 1334 LIST_HEAD(list);
59a4c759
PE
1335
1336 ign = net_generic(net, ipgre_net_id);
7daa0004 1337 rtnl_lock();
eef6dd65
ED
1338 ipgre_destroy_tunnels(ign, &list);
1339 unregister_netdevice_many(&list);
7daa0004 1340 rtnl_unlock();
59a4c759
PE
1341}
1342
1343static struct pernet_operations ipgre_net_ops = {
1344 .init = ipgre_init_net,
1345 .exit = ipgre_exit_net,
cfb8fbf2
EB
1346 .id = &ipgre_net_id,
1347 .size = sizeof(struct ipgre_net),
59a4c759 1348};
1da177e4 1349
c19e654d
HX
1350static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1351{
1352 __be16 flags;
1353
1354 if (!data)
1355 return 0;
1356
1357 flags = 0;
1358 if (data[IFLA_GRE_IFLAGS])
1359 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1360 if (data[IFLA_GRE_OFLAGS])
1361 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1362 if (flags & (GRE_VERSION|GRE_ROUTING))
1363 return -EINVAL;
1364
1365 return 0;
1366}
1367
e1a80002
HX
1368static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1369{
1370 __be32 daddr;
1371
1372 if (tb[IFLA_ADDRESS]) {
1373 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1374 return -EINVAL;
1375 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1376 return -EADDRNOTAVAIL;
1377 }
1378
1379 if (!data)
1380 goto out;
1381
1382 if (data[IFLA_GRE_REMOTE]) {
1383 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1384 if (!daddr)
1385 return -EINVAL;
1386 }
1387
1388out:
1389 return ipgre_tunnel_validate(tb, data);
1390}
1391
c19e654d
HX
1392static void ipgre_netlink_parms(struct nlattr *data[],
1393 struct ip_tunnel_parm *parms)
1394{
7bb82d92 1395 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1396
1397 parms->iph.protocol = IPPROTO_GRE;
1398
1399 if (!data)
1400 return;
1401
1402 if (data[IFLA_GRE_LINK])
1403 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1404
1405 if (data[IFLA_GRE_IFLAGS])
1406 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1407
1408 if (data[IFLA_GRE_OFLAGS])
1409 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1410
1411 if (data[IFLA_GRE_IKEY])
1412 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1413
1414 if (data[IFLA_GRE_OKEY])
1415 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1416
1417 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1418 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1419
1420 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1421 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1422
1423 if (data[IFLA_GRE_TTL])
1424 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1425
1426 if (data[IFLA_GRE_TOS])
1427 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1428
1429 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1430 parms->iph.frag_off = htons(IP_DF);
1431}
1432
e1a80002
HX
1433static int ipgre_tap_init(struct net_device *dev)
1434{
1435 struct ip_tunnel *tunnel;
1436
1437 tunnel = netdev_priv(dev);
1438
1439 tunnel->dev = dev;
1440 strcpy(tunnel->parms.name, dev->name);
1441
1442 ipgre_tunnel_bind_dev(dev);
1443
1444 return 0;
1445}
1446
b8c26a33
SH
1447static const struct net_device_ops ipgre_tap_netdev_ops = {
1448 .ndo_init = ipgre_tap_init,
1449 .ndo_uninit = ipgre_tunnel_uninit,
1450 .ndo_start_xmit = ipgre_tunnel_xmit,
1451 .ndo_set_mac_address = eth_mac_addr,
1452 .ndo_validate_addr = eth_validate_addr,
1453 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1454};
1455
e1a80002
HX
1456static void ipgre_tap_setup(struct net_device *dev)
1457{
1458
1459 ether_setup(dev);
1460
2e9526b3 1461 dev->netdev_ops = &ipgre_tap_netdev_ops;
e1a80002 1462 dev->destructor = free_netdev;
e1a80002
HX
1463
1464 dev->iflink = 0;
1465 dev->features |= NETIF_F_NETNS_LOCAL;
1466}
1467
81adee47 1468static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
c19e654d
HX
1469 struct nlattr *data[])
1470{
1471 struct ip_tunnel *nt;
1472 struct net *net = dev_net(dev);
1473 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1474 int mtu;
1475 int err;
1476
1477 nt = netdev_priv(dev);
1478 ipgre_netlink_parms(data, &nt->parms);
1479
e1a80002 1480 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1481 return -EEXIST;
1482
e1a80002
HX
1483 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1484 random_ether_addr(dev->dev_addr);
1485
c19e654d
HX
1486 mtu = ipgre_tunnel_bind_dev(dev);
1487 if (!tb[IFLA_MTU])
1488 dev->mtu = mtu;
1489
1490 err = register_netdevice(dev);
1491 if (err)
1492 goto out;
1493
1494 dev_hold(dev);
1495 ipgre_tunnel_link(ign, nt);
1496
1497out:
1498 return err;
1499}
1500
1501static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1502 struct nlattr *data[])
1503{
1504 struct ip_tunnel *t, *nt;
1505 struct net *net = dev_net(dev);
1506 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1507 struct ip_tunnel_parm p;
1508 int mtu;
1509
1510 if (dev == ign->fb_tunnel_dev)
1511 return -EINVAL;
1512
1513 nt = netdev_priv(dev);
1514 ipgre_netlink_parms(data, &p);
1515
1516 t = ipgre_tunnel_locate(net, &p, 0);
1517
1518 if (t) {
1519 if (t->dev != dev)
1520 return -EEXIST;
1521 } else {
c19e654d
HX
1522 t = nt;
1523
2e9526b3
HX
1524 if (dev->type != ARPHRD_ETHER) {
1525 unsigned nflags = 0;
c19e654d 1526
2e9526b3
HX
1527 if (ipv4_is_multicast(p.iph.daddr))
1528 nflags = IFF_BROADCAST;
1529 else if (p.iph.daddr)
1530 nflags = IFF_POINTOPOINT;
1531
1532 if ((dev->flags ^ nflags) &
1533 (IFF_POINTOPOINT | IFF_BROADCAST))
1534 return -EINVAL;
1535 }
c19e654d
HX
1536
1537 ipgre_tunnel_unlink(ign, t);
1538 t->parms.iph.saddr = p.iph.saddr;
1539 t->parms.iph.daddr = p.iph.daddr;
1540 t->parms.i_key = p.i_key;
2e9526b3
HX
1541 if (dev->type != ARPHRD_ETHER) {
1542 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1543 memcpy(dev->broadcast, &p.iph.daddr, 4);
1544 }
c19e654d
HX
1545 ipgre_tunnel_link(ign, t);
1546 netdev_state_change(dev);
1547 }
1548
1549 t->parms.o_key = p.o_key;
1550 t->parms.iph.ttl = p.iph.ttl;
1551 t->parms.iph.tos = p.iph.tos;
1552 t->parms.iph.frag_off = p.iph.frag_off;
1553
1554 if (t->parms.link != p.link) {
1555 t->parms.link = p.link;
1556 mtu = ipgre_tunnel_bind_dev(dev);
1557 if (!tb[IFLA_MTU])
1558 dev->mtu = mtu;
1559 netdev_state_change(dev);
1560 }
1561
1562 return 0;
1563}
1564
1565static size_t ipgre_get_size(const struct net_device *dev)
1566{
1567 return
1568 /* IFLA_GRE_LINK */
1569 nla_total_size(4) +
1570 /* IFLA_GRE_IFLAGS */
1571 nla_total_size(2) +
1572 /* IFLA_GRE_OFLAGS */
1573 nla_total_size(2) +
1574 /* IFLA_GRE_IKEY */
1575 nla_total_size(4) +
1576 /* IFLA_GRE_OKEY */
1577 nla_total_size(4) +
1578 /* IFLA_GRE_LOCAL */
1579 nla_total_size(4) +
1580 /* IFLA_GRE_REMOTE */
1581 nla_total_size(4) +
1582 /* IFLA_GRE_TTL */
1583 nla_total_size(1) +
1584 /* IFLA_GRE_TOS */
1585 nla_total_size(1) +
1586 /* IFLA_GRE_PMTUDISC */
1587 nla_total_size(1) +
1588 0;
1589}
1590
1591static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1592{
1593 struct ip_tunnel *t = netdev_priv(dev);
1594 struct ip_tunnel_parm *p = &t->parms;
1595
1596 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1597 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1598 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1599 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1600 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1601 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1602 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1603 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1604 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1605 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1606
1607 return 0;
1608
1609nla_put_failure:
1610 return -EMSGSIZE;
1611}
1612
1613static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1614 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1615 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1616 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1617 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1618 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1619 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1620 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1621 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1622 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1623 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1624};
1625
1626static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1627 .kind = "gre",
1628 .maxtype = IFLA_GRE_MAX,
1629 .policy = ipgre_policy,
1630 .priv_size = sizeof(struct ip_tunnel),
1631 .setup = ipgre_tunnel_setup,
1632 .validate = ipgre_tunnel_validate,
1633 .newlink = ipgre_newlink,
1634 .changelink = ipgre_changelink,
1635 .get_size = ipgre_get_size,
1636 .fill_info = ipgre_fill_info,
1637};
1638
e1a80002
HX
1639static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1640 .kind = "gretap",
1641 .maxtype = IFLA_GRE_MAX,
1642 .policy = ipgre_policy,
1643 .priv_size = sizeof(struct ip_tunnel),
1644 .setup = ipgre_tap_setup,
1645 .validate = ipgre_tap_validate,
1646 .newlink = ipgre_newlink,
1647 .changelink = ipgre_changelink,
1648 .get_size = ipgre_get_size,
1649 .fill_info = ipgre_fill_info,
1650};
1651
1da177e4
LT
1652/*
1653 * And now the modules code and kernel interface.
1654 */
1655
1656static int __init ipgre_init(void)
1657{
1658 int err;
1659
1660 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1661
cfb8fbf2 1662 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1663 if (err < 0)
c2892f02
AD
1664 return err;
1665
1666 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1667 if (err < 0) {
1668 printk(KERN_INFO "ipgre init: can't add protocol\n");
1669 goto add_proto_failed;
1670 }
7daa0004 1671
c19e654d
HX
1672 err = rtnl_link_register(&ipgre_link_ops);
1673 if (err < 0)
1674 goto rtnl_link_failed;
1675
e1a80002
HX
1676 err = rtnl_link_register(&ipgre_tap_ops);
1677 if (err < 0)
1678 goto tap_ops_failed;
1679
c19e654d 1680out:
1da177e4 1681 return err;
c19e654d 1682
e1a80002
HX
1683tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1685rtnl_link_failed:
c19e654d 1686 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
c2892f02
AD
1687add_proto_failed:
1688 unregister_pernet_device(&ipgre_net_ops);
c19e654d 1689 goto out;
1da177e4
LT
1690}
1691
db44575f 1692static void __exit ipgre_fini(void)
1da177e4 1693{
e1a80002 1694 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1695 rtnl_link_unregister(&ipgre_link_ops);
1da177e4
LT
1696 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1697 printk(KERN_INFO "ipgre close: can't remove protocol\n");
c2892f02 1698 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1699}
1700
1701module_init(ipgre_init);
1702module_exit(ipgre_fini);
1703MODULE_LICENSE("GPL");
4d74f8ba
PM
1704MODULE_ALIAS_RTNL_LINK("gre");
1705MODULE_ALIAS_RTNL_LINK("gretap");