]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
net: Simplify phonet pernet operations.
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
e1a80002 30#include <linux/etherdevice.h>
46f25dff 31#include <linux/if_ether.h>
1da177e4
LT
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
59a4c759
PE
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
c19e654d 45#include <net/rtnetlink.h>
1da177e4
LT
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
a43912ab 69 Current solution: HARD_TX_LOCK lock breaks dead loops.
1da177e4
LT
70
71
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107
108
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
115
116 Alexey Kuznetsov.
117 */
118
c19e654d 119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 122static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
123
124/* Fallback tunnel: no source, no destination, no key, no options */
125
eb8ce741
PE
126#define HASH_SIZE 16
127
f99189b1 128static int ipgre_net_id __read_mostly;
59a4c759 129struct ipgre_net {
eb8ce741
PE
130 struct ip_tunnel *tunnels[4][HASH_SIZE];
131
7daa0004 132 struct net_device *fb_tunnel_dev;
59a4c759
PE
133};
134
1da177e4
LT
135/* Tunnel hash table */
136
137/*
138 4 hash tables:
139
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
144
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
148
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
151 */
152
d5a0a1e3 153#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 154
eb8ce741
PE
155#define tunnels_r_l tunnels[3]
156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0]
8d5b2c08
ED
159/*
160 * Locking : hash tables are protected by RCU and a spinlock
161 */
162static DEFINE_SPINLOCK(ipgre_lock);
1da177e4 163
8d5b2c08
ED
164#define for_each_ip_tunnel_rcu(start) \
165 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1da177e4
LT
166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
749c10f9 169static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
e1a80002
HX
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
1da177e4 172{
749c10f9
TT
173 struct net *net = dev_net(dev);
174 int link = dev->ifindex;
1da177e4
LT
175 unsigned h0 = HASH(remote);
176 unsigned h1 = HASH(key);
afcf1242 177 struct ip_tunnel *t, *cand = NULL;
7daa0004 178 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
179 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
180 ARPHRD_ETHER : ARPHRD_IPGRE;
afcf1242 181 int score, cand_score = 4;
1da177e4 182
8d5b2c08 183 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
749c10f9
TT
184 if (local != t->parms.iph.saddr ||
185 remote != t->parms.iph.daddr ||
186 key != t->parms.i_key ||
187 !(t->dev->flags & IFF_UP))
188 continue;
189
190 if (t->dev->type != ARPHRD_IPGRE &&
191 t->dev->type != dev_type)
192 continue;
193
afcf1242 194 score = 0;
749c10f9 195 if (t->parms.link != link)
afcf1242 196 score |= 1;
749c10f9 197 if (t->dev->type != dev_type)
afcf1242
TT
198 score |= 2;
199 if (score == 0)
749c10f9 200 return t;
afcf1242
TT
201
202 if (score < cand_score) {
203 cand = t;
204 cand_score = score;
205 }
1da177e4 206 }
e1a80002 207
8d5b2c08 208 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
749c10f9
TT
209 if (remote != t->parms.iph.daddr ||
210 key != t->parms.i_key ||
211 !(t->dev->flags & IFF_UP))
212 continue;
213
214 if (t->dev->type != ARPHRD_IPGRE &&
215 t->dev->type != dev_type)
216 continue;
217
afcf1242 218 score = 0;
749c10f9 219 if (t->parms.link != link)
afcf1242 220 score |= 1;
749c10f9 221 if (t->dev->type != dev_type)
afcf1242
TT
222 score |= 2;
223 if (score == 0)
749c10f9 224 return t;
afcf1242
TT
225
226 if (score < cand_score) {
227 cand = t;
228 cand_score = score;
229 }
1da177e4 230 }
e1a80002 231
8d5b2c08 232 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
749c10f9
TT
233 if ((local != t->parms.iph.saddr &&
234 (local != t->parms.iph.daddr ||
235 !ipv4_is_multicast(local))) ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
238 continue;
239
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
242 continue;
243
afcf1242 244 score = 0;
749c10f9 245 if (t->parms.link != link)
afcf1242 246 score |= 1;
749c10f9 247 if (t->dev->type != dev_type)
afcf1242
TT
248 score |= 2;
249 if (score == 0)
749c10f9 250 return t;
afcf1242
TT
251
252 if (score < cand_score) {
253 cand = t;
254 cand_score = score;
255 }
1da177e4 256 }
e1a80002 257
8d5b2c08 258 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
749c10f9
TT
259 if (t->parms.i_key != key ||
260 !(t->dev->flags & IFF_UP))
261 continue;
262
263 if (t->dev->type != ARPHRD_IPGRE &&
264 t->dev->type != dev_type)
265 continue;
266
afcf1242 267 score = 0;
749c10f9 268 if (t->parms.link != link)
afcf1242 269 score |= 1;
749c10f9 270 if (t->dev->type != dev_type)
afcf1242
TT
271 score |= 2;
272 if (score == 0)
749c10f9 273 return t;
afcf1242
TT
274
275 if (score < cand_score) {
276 cand = t;
277 cand_score = score;
278 }
1da177e4
LT
279 }
280
afcf1242
TT
281 if (cand != NULL)
282 return cand;
e1a80002 283
8d5b2c08
ED
284 dev = ign->fb_tunnel_dev;
285 if (dev->flags & IFF_UP)
286 return netdev_priv(dev);
749c10f9 287
1da177e4
LT
288 return NULL;
289}
290
f57e7d5a
PE
291static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
292 struct ip_tunnel_parm *parms)
1da177e4 293{
5056a1ef
YH
294 __be32 remote = parms->iph.daddr;
295 __be32 local = parms->iph.saddr;
296 __be32 key = parms->i_key;
1da177e4
LT
297 unsigned h = HASH(key);
298 int prio = 0;
299
300 if (local)
301 prio |= 1;
f97c1e0c 302 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
303 prio |= 2;
304 h ^= HASH(remote);
305 }
306
eb8ce741 307 return &ign->tunnels[prio][h];
1da177e4
LT
308}
309
f57e7d5a
PE
310static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
311 struct ip_tunnel *t)
5056a1ef 312{
f57e7d5a 313 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
314}
315
f57e7d5a 316static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 317{
f57e7d5a 318 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4 319
8d5b2c08 320 spin_lock_bh(&ipgre_lock);
1da177e4 321 t->next = *tp;
8d5b2c08
ED
322 rcu_assign_pointer(*tp, t);
323 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
324}
325
f57e7d5a 326static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
327{
328 struct ip_tunnel **tp;
329
f57e7d5a 330 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4 331 if (t == *tp) {
8d5b2c08 332 spin_lock_bh(&ipgre_lock);
1da177e4 333 *tp = t->next;
8d5b2c08 334 spin_unlock_bh(&ipgre_lock);
1da177e4
LT
335 break;
336 }
337 }
338}
339
e1a80002
HX
340static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
341 struct ip_tunnel_parm *parms,
342 int type)
1da177e4 343{
d5a0a1e3
AV
344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
749c10f9 347 int link = parms->link;
e1a80002
HX
348 struct ip_tunnel *t, **tp;
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350
351 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
352 if (local == t->parms.iph.saddr &&
353 remote == t->parms.iph.daddr &&
354 key == t->parms.i_key &&
749c10f9 355 link == t->parms.link &&
e1a80002
HX
356 type == t->dev->type)
357 break;
358
359 return t;
360}
361
362static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
363 struct ip_tunnel_parm *parms, int create)
364{
365 struct ip_tunnel *t, *nt;
1da177e4 366 struct net_device *dev;
1da177e4 367 char name[IFNAMSIZ];
f57e7d5a 368 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 369
e1a80002
HX
370 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
371 if (t || !create)
372 return t;
1da177e4
LT
373
374 if (parms->name[0])
375 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
376 else
377 sprintf(name, "gre%%d");
1da177e4
LT
378
379 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
380 if (!dev)
381 return NULL;
382
0b67eceb
PE
383 dev_net_set(dev, net);
384
b37d428b
PE
385 if (strchr(name, '%')) {
386 if (dev_alloc_name(dev, name) < 0)
387 goto failed_free;
388 }
389
2941a486 390 nt = netdev_priv(dev);
1da177e4 391 nt->parms = *parms;
c19e654d 392 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 393
42aa9162
HX
394 dev->mtu = ipgre_tunnel_bind_dev(dev);
395
b37d428b
PE
396 if (register_netdevice(dev) < 0)
397 goto failed_free;
1da177e4 398
1da177e4 399 dev_hold(dev);
f57e7d5a 400 ipgre_tunnel_link(ign, nt);
1da177e4
LT
401 return nt;
402
b37d428b
PE
403failed_free:
404 free_netdev(dev);
1da177e4
LT
405 return NULL;
406}
407
408static void ipgre_tunnel_uninit(struct net_device *dev)
409{
f57e7d5a
PE
410 struct net *net = dev_net(dev);
411 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
412
413 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
414 dev_put(dev);
415}
416
417
418static void ipgre_err(struct sk_buff *skb, u32 info)
419{
1da177e4 420
071f92d0 421/* All the routers (except for Linux) return only
1da177e4
LT
422 8 bytes of packet payload. It means, that precise relaying of
423 ICMP in the real Internet is absolutely infeasible.
424
425 Moreover, Cisco "wise men" put GRE key to the third word
426 in GRE header. It makes impossible maintaining even soft state for keyed
427 GRE tunnels with enabled checksum. Tell them "thank you".
428
429 Well, I wonder, rfc1812 was written by Cisco employee,
430 what the hell these idiots break standrads established
431 by themself???
432 */
433
6ed2533e 434 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 435 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 436 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
437 const int type = icmp_hdr(skb)->type;
438 const int code = icmp_hdr(skb)->code;
1da177e4 439 struct ip_tunnel *t;
d5a0a1e3 440 __be16 flags;
1da177e4
LT
441
442 flags = p[0];
443 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
444 if (flags&(GRE_VERSION|GRE_ROUTING))
445 return;
446 if (flags&GRE_KEY) {
447 grehlen += 4;
448 if (flags&GRE_CSUM)
449 grehlen += 4;
450 }
451 }
452
453 /* If only 8 bytes returned, keyed message will be dropped here */
454 if (skb_headlen(skb) < grehlen)
455 return;
456
457 switch (type) {
458 default:
459 case ICMP_PARAMETERPROB:
460 return;
461
462 case ICMP_DEST_UNREACH:
463 switch (code) {
464 case ICMP_SR_FAILED:
465 case ICMP_PORT_UNREACH:
466 /* Impossible event. */
467 return;
468 case ICMP_FRAG_NEEDED:
469 /* Soft state for pmtu is maintained by IP core. */
470 return;
471 default:
472 /* All others are translated to HOST_UNREACH.
473 rfc2003 contains "deep thoughts" about NET_UNREACH,
474 I believe they are just ether pollution. --ANK
475 */
476 break;
477 }
478 break;
479 case ICMP_TIME_EXCEEDED:
480 if (code != ICMP_EXC_TTL)
481 return;
482 break;
483 }
484
8d5b2c08 485 rcu_read_lock();
749c10f9 486 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
e1a80002
HX
487 flags & GRE_KEY ?
488 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
489 p[1]);
f97c1e0c
JP
490 if (t == NULL || t->parms.iph.daddr == 0 ||
491 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
492 goto out;
493
494 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
495 goto out;
496
da6185d8 497 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
498 t->err_count++;
499 else
500 t->err_count = 1;
501 t->err_time = jiffies;
502out:
8d5b2c08 503 rcu_read_unlock();
1da177e4 504 return;
1da177e4
LT
505}
506
507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
508{
509 if (INET_ECN_is_ce(iph->tos)) {
510 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 511 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 512 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 513 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
514 }
515 }
516}
517
518static inline u8
519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
520{
521 u8 inner = 0;
522 if (skb->protocol == htons(ETH_P_IP))
523 inner = old_iph->tos;
524 else if (skb->protocol == htons(ETH_P_IPV6))
525 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
526 return INET_ECN_encapsulate(tos, inner);
527}
528
529static int ipgre_rcv(struct sk_buff *skb)
530{
531 struct iphdr *iph;
532 u8 *h;
d5a0a1e3 533 __be16 flags;
d3bc23e7 534 __sum16 csum = 0;
d5a0a1e3 535 __be32 key = 0;
1da177e4
LT
536 u32 seqno = 0;
537 struct ip_tunnel *tunnel;
538 int offset = 4;
e1a80002 539 __be16 gre_proto;
64194c31 540 unsigned int len;
1da177e4
LT
541
542 if (!pskb_may_pull(skb, 16))
543 goto drop_nolock;
544
eddc9ec5 545 iph = ip_hdr(skb);
1da177e4 546 h = skb->data;
d5a0a1e3 547 flags = *(__be16*)h;
1da177e4
LT
548
549 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
550 /* - Version must be 0.
551 - We do not support routing headers.
552 */
553 if (flags&(GRE_VERSION|GRE_ROUTING))
554 goto drop_nolock;
555
556 if (flags&GRE_CSUM) {
fb286bb2 557 switch (skb->ip_summed) {
84fa7933 558 case CHECKSUM_COMPLETE:
d3bc23e7 559 csum = csum_fold(skb->csum);
fb286bb2
HX
560 if (!csum)
561 break;
562 /* fall through */
563 case CHECKSUM_NONE:
564 skb->csum = 0;
565 csum = __skb_checksum_complete(skb);
84fa7933 566 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
567 }
568 offset += 4;
569 }
570 if (flags&GRE_KEY) {
d5a0a1e3 571 key = *(__be32*)(h + offset);
1da177e4
LT
572 offset += 4;
573 }
574 if (flags&GRE_SEQ) {
d5a0a1e3 575 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
576 offset += 4;
577 }
578 }
579
e1a80002
HX
580 gre_proto = *(__be16 *)(h + 2);
581
8d5b2c08 582 rcu_read_lock();
749c10f9 583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
e1a80002
HX
584 iph->saddr, iph->daddr, key,
585 gre_proto))) {
addd68eb
PE
586 struct net_device_stats *stats = &tunnel->dev->stats;
587
1da177e4
LT
588 secpath_reset(skb);
589
e1a80002 590 skb->protocol = gre_proto;
1da177e4
LT
591 /* WCCP version 1 and 2 protocol decoding.
592 * - Change protocol to IP
593 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
594 */
e1a80002 595 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 596 skb->protocol = htons(ETH_P_IP);
e905a9ed 597 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
598 offset += 4;
599 }
600
1d069167 601 skb->mac_header = skb->network_header;
4209fb60 602 __pskb_pull(skb, offset);
9c70220b 603 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
604 skb->pkt_type = PACKET_HOST;
605#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 606 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 607 /* Looped back packet, drop it! */
511c3f92 608 if (skb_rtable(skb)->fl.iif == 0)
1da177e4 609 goto drop;
addd68eb 610 stats->multicast++;
1da177e4
LT
611 skb->pkt_type = PACKET_BROADCAST;
612 }
613#endif
614
615 if (((flags&GRE_CSUM) && csum) ||
616 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
617 stats->rx_crc_errors++;
618 stats->rx_errors++;
1da177e4
LT
619 goto drop;
620 }
621 if (tunnel->parms.i_flags&GRE_SEQ) {
622 if (!(flags&GRE_SEQ) ||
623 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
624 stats->rx_fifo_errors++;
625 stats->rx_errors++;
1da177e4
LT
626 goto drop;
627 }
628 tunnel->i_seqno = seqno + 1;
629 }
e1a80002 630
64194c31
HX
631 len = skb->len;
632
e1a80002
HX
633 /* Warning: All skb pointers will be invalidated! */
634 if (tunnel->dev->type == ARPHRD_ETHER) {
635 if (!pskb_may_pull(skb, ETH_HLEN)) {
636 stats->rx_length_errors++;
637 stats->rx_errors++;
638 goto drop;
639 }
640
641 iph = ip_hdr(skb);
642 skb->protocol = eth_type_trans(skb, tunnel->dev);
643 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
644 }
645
addd68eb 646 stats->rx_packets++;
64194c31 647 stats->rx_bytes += len;
1da177e4 648 skb->dev = tunnel->dev;
adf30907 649 skb_dst_drop(skb);
1da177e4 650 nf_reset(skb);
e1a80002
HX
651
652 skb_reset_network_header(skb);
1da177e4 653 ipgre_ecn_decapsulate(iph, skb);
e1a80002 654
1da177e4 655 netif_rx(skb);
8d5b2c08 656 rcu_read_unlock();
1da177e4
LT
657 return(0);
658 }
45af08be 659 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
660
661drop:
8d5b2c08 662 rcu_read_unlock();
1da177e4
LT
663drop_nolock:
664 kfree_skb(skb);
665 return(0);
666}
667
6fef4c0c 668static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 669{
2941a486 670 struct ip_tunnel *tunnel = netdev_priv(dev);
0bfbedb1
ED
671 struct net_device_stats *stats = &dev->stats;
672 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
eddc9ec5 673 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
674 struct iphdr *tiph;
675 u8 tos;
d5a0a1e3 676 __be16 df;
1da177e4
LT
677 struct rtable *rt; /* Route to the other host */
678 struct net_device *tdev; /* Device to other host */
679 struct iphdr *iph; /* Our new IP header */
c2636b4d 680 unsigned int max_headroom; /* The extra header space needed */
1da177e4 681 int gre_hlen;
d5a0a1e3 682 __be32 dst;
1da177e4
LT
683 int mtu;
684
e1a80002
HX
685 if (dev->type == ARPHRD_ETHER)
686 IPCB(skb)->flags = 0;
687
688 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 689 gre_hlen = 0;
6ed2533e 690 tiph = (struct iphdr *)skb->data;
1da177e4
LT
691 } else {
692 gre_hlen = tunnel->hlen;
693 tiph = &tunnel->parms.iph;
694 }
695
696 if ((dst = tiph->daddr) == 0) {
697 /* NBMA tunnel */
698
adf30907 699 if (skb_dst(skb) == NULL) {
addd68eb 700 stats->tx_fifo_errors++;
1da177e4
LT
701 goto tx_error;
702 }
703
704 if (skb->protocol == htons(ETH_P_IP)) {
511c3f92 705 rt = skb_rtable(skb);
1da177e4
LT
706 if ((dst = rt->rt_gateway) == 0)
707 goto tx_error_icmp;
708 }
709#ifdef CONFIG_IPV6
710 else if (skb->protocol == htons(ETH_P_IPV6)) {
711 struct in6_addr *addr6;
712 int addr_type;
adf30907 713 struct neighbour *neigh = skb_dst(skb)->neighbour;
1da177e4
LT
714
715 if (neigh == NULL)
716 goto tx_error;
717
6ed2533e 718 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
719 addr_type = ipv6_addr_type(addr6);
720
721 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 722 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
723 addr_type = ipv6_addr_type(addr6);
724 }
725
726 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
727 goto tx_error_icmp;
728
729 dst = addr6->s6_addr32[3];
730 }
731#endif
732 else
733 goto tx_error;
734 }
735
736 tos = tiph->tos;
ee686ca9
AJ
737 if (tos == 1) {
738 tos = 0;
1da177e4
LT
739 if (skb->protocol == htons(ETH_P_IP))
740 tos = old_iph->tos;
1da177e4
LT
741 }
742
743 {
744 struct flowi fl = { .oif = tunnel->parms.link,
745 .nl_u = { .ip4_u =
746 { .daddr = dst,
747 .saddr = tiph->saddr,
748 .tos = RT_TOS(tos) } },
749 .proto = IPPROTO_GRE };
96635522 750 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 751 stats->tx_carrier_errors++;
1da177e4
LT
752 goto tx_error;
753 }
754 }
755 tdev = rt->u.dst.dev;
756
757 if (tdev == dev) {
758 ip_rt_put(rt);
addd68eb 759 stats->collisions++;
1da177e4
LT
760 goto tx_error;
761 }
762
763 df = tiph->frag_off;
764 if (df)
c95b819a 765 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4 766 else
adf30907 767 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
1da177e4 768
adf30907
ED
769 if (skb_dst(skb))
770 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4
LT
771
772 if (skb->protocol == htons(ETH_P_IP)) {
773 df |= (old_iph->frag_off&htons(IP_DF));
774
775 if ((old_iph->frag_off&htons(IP_DF)) &&
776 mtu < ntohs(old_iph->tot_len)) {
777 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
778 ip_rt_put(rt);
779 goto tx_error;
780 }
781 }
782#ifdef CONFIG_IPV6
783 else if (skb->protocol == htons(ETH_P_IPV6)) {
adf30907 784 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
1da177e4 785
adf30907 786 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
787 if ((tunnel->parms.iph.daddr &&
788 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
789 rt6->rt6i_dst.plen == 128) {
790 rt6->rt6i_flags |= RTF_MODIFIED;
adf30907 791 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
1da177e4
LT
792 }
793 }
794
795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
797 ip_rt_put(rt);
798 goto tx_error;
799 }
800 }
801#endif
802
803 if (tunnel->err_count > 0) {
da6185d8
WY
804 if (time_before(jiffies,
805 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
806 tunnel->err_count--;
807
808 dst_link_failure(skb);
809 } else
810 tunnel->err_count = 0;
811 }
812
813 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
814
cfbba49d
PM
815 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
817 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
818 if (!new_skb) {
819 ip_rt_put(rt);
0bfbedb1 820 txq->tx_dropped++;
1da177e4 821 dev_kfree_skb(skb);
6ed10654 822 return NETDEV_TX_OK;
1da177e4
LT
823 }
824 if (skb->sk)
825 skb_set_owner_w(new_skb, skb->sk);
826 dev_kfree_skb(skb);
827 skb = new_skb;
eddc9ec5 828 old_iph = ip_hdr(skb);
1da177e4
LT
829 }
830
64194c31 831 skb_reset_transport_header(skb);
e2d1bca7
ACM
832 skb_push(skb, gre_hlen);
833 skb_reset_network_header(skb);
1da177e4 834 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
835 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
836 IPSKB_REROUTED);
adf30907
ED
837 skb_dst_drop(skb);
838 skb_dst_set(skb, &rt->u.dst);
1da177e4
LT
839
840 /*
841 * Push down and install the IPIP header.
842 */
843
eddc9ec5 844 iph = ip_hdr(skb);
1da177e4
LT
845 iph->version = 4;
846 iph->ihl = sizeof(struct iphdr) >> 2;
847 iph->frag_off = df;
848 iph->protocol = IPPROTO_GRE;
849 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
850 iph->daddr = rt->rt_dst;
851 iph->saddr = rt->rt_src;
852
853 if ((iph->ttl = tiph->ttl) == 0) {
854 if (skb->protocol == htons(ETH_P_IP))
855 iph->ttl = old_iph->ttl;
856#ifdef CONFIG_IPV6
857 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 858 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
859#endif
860 else
861 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
862 }
863
e1a80002
HX
864 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
865 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
866 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
867
868 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 869 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
870
871 if (tunnel->parms.o_flags&GRE_SEQ) {
872 ++tunnel->o_seqno;
873 *ptr = htonl(tunnel->o_seqno);
874 ptr--;
875 }
876 if (tunnel->parms.o_flags&GRE_KEY) {
877 *ptr = tunnel->parms.o_key;
878 ptr--;
879 }
880 if (tunnel->parms.o_flags&GRE_CSUM) {
881 *ptr = 0;
5f92a738 882 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
883 }
884 }
885
886 nf_reset(skb);
887
888 IPTUNNEL_XMIT();
6ed10654 889 return NETDEV_TX_OK;
1da177e4
LT
890
891tx_error_icmp:
892 dst_link_failure(skb);
893
894tx_error:
895 stats->tx_errors++;
896 dev_kfree_skb(skb);
6ed10654 897 return NETDEV_TX_OK;
1da177e4
LT
898}
899
42aa9162 900static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
901{
902 struct net_device *tdev = NULL;
903 struct ip_tunnel *tunnel;
904 struct iphdr *iph;
905 int hlen = LL_MAX_HEADER;
906 int mtu = ETH_DATA_LEN;
907 int addend = sizeof(struct iphdr) + 4;
908
909 tunnel = netdev_priv(dev);
910 iph = &tunnel->parms.iph;
911
c95b819a 912 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
913
914 if (iph->daddr) {
915 struct flowi fl = { .oif = tunnel->parms.link,
916 .nl_u = { .ip4_u =
917 { .daddr = iph->daddr,
918 .saddr = iph->saddr,
919 .tos = RT_TOS(iph->tos) } },
920 .proto = IPPROTO_GRE };
921 struct rtable *rt;
96635522 922 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
923 tdev = rt->u.dst.dev;
924 ip_rt_put(rt);
925 }
e1a80002
HX
926
927 if (dev->type != ARPHRD_ETHER)
928 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
929 }
930
931 if (!tdev && tunnel->parms.link)
96635522 932 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
933
934 if (tdev) {
c95b819a 935 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
936 mtu = tdev->mtu;
937 }
938 dev->iflink = tunnel->parms.link;
939
940 /* Precalculate GRE options length */
941 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
942 if (tunnel->parms.o_flags&GRE_CSUM)
943 addend += 4;
944 if (tunnel->parms.o_flags&GRE_KEY)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_SEQ)
947 addend += 4;
948 }
c95b819a 949 dev->needed_headroom = addend + hlen;
8cdb0456 950 mtu -= dev->hard_header_len + addend;
42aa9162
HX
951
952 if (mtu < 68)
953 mtu = 68;
954
ee34c1eb
MS
955 tunnel->hlen = addend;
956
42aa9162 957 return mtu;
ee34c1eb
MS
958}
959
1da177e4
LT
960static int
961ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
962{
963 int err = 0;
964 struct ip_tunnel_parm p;
965 struct ip_tunnel *t;
f57e7d5a
PE
966 struct net *net = dev_net(dev);
967 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
968
969 switch (cmd) {
970 case SIOCGETTUNNEL:
971 t = NULL;
7daa0004 972 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
973 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
974 err = -EFAULT;
975 break;
976 }
f57e7d5a 977 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
978 }
979 if (t == NULL)
2941a486 980 t = netdev_priv(dev);
1da177e4
LT
981 memcpy(&p, &t->parms, sizeof(p));
982 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
983 err = -EFAULT;
984 break;
985
986 case SIOCADDTUNNEL:
987 case SIOCCHGTUNNEL:
988 err = -EPERM;
989 if (!capable(CAP_NET_ADMIN))
990 goto done;
991
992 err = -EFAULT;
993 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
994 goto done;
995
996 err = -EINVAL;
997 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
998 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
999 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1000 goto done;
1001 if (p.iph.ttl)
1002 p.iph.frag_off |= htons(IP_DF);
1003
1004 if (!(p.i_flags&GRE_KEY))
1005 p.i_key = 0;
1006 if (!(p.o_flags&GRE_KEY))
1007 p.o_key = 0;
1008
f57e7d5a 1009 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1010
7daa0004 1011 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1012 if (t != NULL) {
1013 if (t->dev != dev) {
1014 err = -EEXIST;
1015 break;
1016 }
1017 } else {
6ed2533e 1018 unsigned nflags = 0;
1da177e4 1019
2941a486 1020 t = netdev_priv(dev);
1da177e4 1021
f97c1e0c 1022 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1023 nflags = IFF_BROADCAST;
1024 else if (p.iph.daddr)
1025 nflags = IFF_POINTOPOINT;
1026
1027 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1028 err = -EINVAL;
1029 break;
1030 }
f57e7d5a 1031 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1032 t->parms.iph.saddr = p.iph.saddr;
1033 t->parms.iph.daddr = p.iph.daddr;
1034 t->parms.i_key = p.i_key;
1035 t->parms.o_key = p.o_key;
1036 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1037 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1038 ipgre_tunnel_link(ign, t);
1da177e4
LT
1039 netdev_state_change(dev);
1040 }
1041 }
1042
1043 if (t) {
1044 err = 0;
1045 if (cmd == SIOCCHGTUNNEL) {
1046 t->parms.iph.ttl = p.iph.ttl;
1047 t->parms.iph.tos = p.iph.tos;
1048 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1049 if (t->parms.link != p.link) {
1050 t->parms.link = p.link;
42aa9162 1051 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1052 netdev_state_change(dev);
1053 }
1da177e4
LT
1054 }
1055 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1056 err = -EFAULT;
1057 } else
1058 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1059 break;
1060
1061 case SIOCDELTUNNEL:
1062 err = -EPERM;
1063 if (!capable(CAP_NET_ADMIN))
1064 goto done;
1065
7daa0004 1066 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1067 err = -EFAULT;
1068 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1069 goto done;
1070 err = -ENOENT;
f57e7d5a 1071 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1072 goto done;
1073 err = -EPERM;
7daa0004 1074 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1075 goto done;
1076 dev = t->dev;
1077 }
22f8cde5
SH
1078 unregister_netdevice(dev);
1079 err = 0;
1da177e4
LT
1080 break;
1081
1082 default:
1083 err = -EINVAL;
1084 }
1085
1086done:
1087 return err;
1088}
1089
1da177e4
LT
1090static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1091{
2941a486 1092 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1093 if (new_mtu < 68 ||
1094 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1095 return -EINVAL;
1096 dev->mtu = new_mtu;
1097 return 0;
1098}
1099
1da177e4
LT
1100/* Nice toy. Unfortunately, useless in real life :-)
1101 It allows to construct virtual multiprotocol broadcast "LAN"
1102 over the Internet, provided multicast routing is tuned.
1103
1104
1105 I have no idea was this bicycle invented before me,
1106 so that I had to set ARPHRD_IPGRE to a random value.
1107 I have an impression, that Cisco could make something similar,
1108 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1109
1da177e4
LT
1110 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1111 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1112
1113 ping -t 255 224.66.66.66
1114
1115 If nobody answers, mbone does not work.
1116
1117 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1118 ip addr add 10.66.66.<somewhat>/24 dev Universe
1119 ifconfig Universe up
1120 ifconfig Universe add fe80::<Your_real_addr>/10
1121 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1122 ftp 10.66.66.66
1123 ...
1124 ftp fec0:6666:6666::193.233.7.65
1125 ...
1126
1127 */
1128
3b04ddde
SH
1129static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1130 unsigned short type,
1131 const void *daddr, const void *saddr, unsigned len)
1da177e4 1132{
2941a486 1133 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1134 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1135 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1136
1137 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1138 p[0] = t->parms.o_flags;
1139 p[1] = htons(type);
1140
1141 /*
e905a9ed 1142 * Set the source hardware address.
1da177e4 1143 */
e905a9ed 1144
1da177e4
LT
1145 if (saddr)
1146 memcpy(&iph->saddr, saddr, 4);
1147
1148 if (daddr) {
1149 memcpy(&iph->daddr, daddr, 4);
1150 return t->hlen;
1151 }
f97c1e0c 1152 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1153 return t->hlen;
e905a9ed 1154
1da177e4
LT
1155 return -t->hlen;
1156}
1157
6a5f44d7
TT
1158static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1159{
6ed2533e 1160 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1161 memcpy(haddr, &iph->saddr, 4);
1162 return 4;
1163}
1164
3b04ddde
SH
1165static const struct header_ops ipgre_header_ops = {
1166 .create = ipgre_header,
6a5f44d7 1167 .parse = ipgre_header_parse,
3b04ddde
SH
1168};
1169
6a5f44d7 1170#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1171static int ipgre_open(struct net_device *dev)
1172{
2941a486 1173 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1174
f97c1e0c 1175 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1176 struct flowi fl = { .oif = t->parms.link,
1177 .nl_u = { .ip4_u =
1178 { .daddr = t->parms.iph.daddr,
1179 .saddr = t->parms.iph.saddr,
1180 .tos = RT_TOS(t->parms.iph.tos) } },
1181 .proto = IPPROTO_GRE };
1182 struct rtable *rt;
96635522 1183 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1184 return -EADDRNOTAVAIL;
1185 dev = rt->u.dst.dev;
1186 ip_rt_put(rt);
e5ed6399 1187 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1188 return -EADDRNOTAVAIL;
1189 t->mlink = dev->ifindex;
e5ed6399 1190 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1191 }
1192 return 0;
1193}
1194
1195static int ipgre_close(struct net_device *dev)
1196{
2941a486 1197 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1198
f97c1e0c 1199 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1200 struct in_device *in_dev;
c346dca1 1201 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1202 if (in_dev) {
1203 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1204 in_dev_put(in_dev);
1205 }
1206 }
1207 return 0;
1208}
1209
1210#endif
1211
b8c26a33
SH
1212static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_init = ipgre_tunnel_init,
1214 .ndo_uninit = ipgre_tunnel_uninit,
1215#ifdef CONFIG_NET_IPGRE_BROADCAST
1216 .ndo_open = ipgre_open,
1217 .ndo_stop = ipgre_close,
1218#endif
1219 .ndo_start_xmit = ipgre_tunnel_xmit,
1220 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1221 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1222};
1223
1da177e4
LT
1224static void ipgre_tunnel_setup(struct net_device *dev)
1225{
b8c26a33 1226 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1227 dev->destructor = free_netdev;
1da177e4
LT
1228
1229 dev->type = ARPHRD_IPGRE;
c95b819a 1230 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1231 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1232 dev->flags = IFF_NOARP;
1233 dev->iflink = 0;
1234 dev->addr_len = 4;
0b67eceb 1235 dev->features |= NETIF_F_NETNS_LOCAL;
108bfa89 1236 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1da177e4
LT
1237}
1238
1239static int ipgre_tunnel_init(struct net_device *dev)
1240{
1da177e4
LT
1241 struct ip_tunnel *tunnel;
1242 struct iphdr *iph;
1da177e4 1243
2941a486 1244 tunnel = netdev_priv(dev);
1da177e4
LT
1245 iph = &tunnel->parms.iph;
1246
1247 tunnel->dev = dev;
1248 strcpy(tunnel->parms.name, dev->name);
1249
1250 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1251 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1252
1da177e4 1253 if (iph->daddr) {
1da177e4 1254#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1255 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1256 if (!iph->saddr)
1257 return -EINVAL;
1258 dev->flags = IFF_BROADCAST;
3b04ddde 1259 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1260 }
1261#endif
ee34c1eb 1262 } else
6a5f44d7 1263 dev->header_ops = &ipgre_header_ops;
1da177e4 1264
1da177e4
LT
1265 return 0;
1266}
1267
b8c26a33 1268static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1269{
2941a486 1270 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1271 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1272 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1273
1274 tunnel->dev = dev;
1275 strcpy(tunnel->parms.name, dev->name);
1276
1277 iph->version = 4;
1278 iph->protocol = IPPROTO_GRE;
1279 iph->ihl = 5;
1280 tunnel->hlen = sizeof(struct iphdr) + 4;
1281
1282 dev_hold(dev);
eb8ce741 1283 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1284}
1285
1286
32613090 1287static const struct net_protocol ipgre_protocol = {
1da177e4
LT
1288 .handler = ipgre_rcv,
1289 .err_handler = ipgre_err,
f96c148f 1290 .netns_ok = 1,
1da177e4
LT
1291};
1292
eef6dd65 1293static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
eb8ce741
PE
1294{
1295 int prio;
1296
1297 for (prio = 0; prio < 4; prio++) {
1298 int h;
1299 for (h = 0; h < HASH_SIZE; h++) {
eef6dd65
ED
1300 struct ip_tunnel *t = ign->tunnels[prio][h];
1301
1302 while (t != NULL) {
1303 unregister_netdevice_queue(t->dev, head);
1304 t = t->next;
1305 }
eb8ce741
PE
1306 }
1307 }
1308}
1309
59a4c759
PE
1310static int ipgre_init_net(struct net *net)
1311{
1312 int err;
1313 struct ipgre_net *ign;
1314
1315 err = -ENOMEM;
eb8ce741 1316 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
59a4c759
PE
1317 if (ign == NULL)
1318 goto err_alloc;
1319
1320 err = net_assign_generic(net, ipgre_net_id, ign);
1321 if (err < 0)
1322 goto err_assign;
1323
7daa0004
PE
1324 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1325 ipgre_tunnel_setup);
1326 if (!ign->fb_tunnel_dev) {
1327 err = -ENOMEM;
1328 goto err_alloc_dev;
1329 }
be77e593 1330 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1331
b8c26a33 1332 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1333 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1334
1335 if ((err = register_netdev(ign->fb_tunnel_dev)))
1336 goto err_reg_dev;
1337
59a4c759
PE
1338 return 0;
1339
7daa0004
PE
1340err_reg_dev:
1341 free_netdev(ign->fb_tunnel_dev);
1342err_alloc_dev:
1343 /* nothing */
59a4c759
PE
1344err_assign:
1345 kfree(ign);
1346err_alloc:
1347 return err;
1348}
1349
1350static void ipgre_exit_net(struct net *net)
1351{
1352 struct ipgre_net *ign;
eef6dd65 1353 LIST_HEAD(list);
59a4c759
PE
1354
1355 ign = net_generic(net, ipgre_net_id);
7daa0004 1356 rtnl_lock();
eef6dd65
ED
1357 ipgre_destroy_tunnels(ign, &list);
1358 unregister_netdevice_many(&list);
7daa0004 1359 rtnl_unlock();
59a4c759
PE
1360 kfree(ign);
1361}
1362
1363static struct pernet_operations ipgre_net_ops = {
1364 .init = ipgre_init_net,
1365 .exit = ipgre_exit_net,
1366};
1da177e4 1367
c19e654d
HX
1368static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1369{
1370 __be16 flags;
1371
1372 if (!data)
1373 return 0;
1374
1375 flags = 0;
1376 if (data[IFLA_GRE_IFLAGS])
1377 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1378 if (data[IFLA_GRE_OFLAGS])
1379 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1380 if (flags & (GRE_VERSION|GRE_ROUTING))
1381 return -EINVAL;
1382
1383 return 0;
1384}
1385
e1a80002
HX
1386static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1387{
1388 __be32 daddr;
1389
1390 if (tb[IFLA_ADDRESS]) {
1391 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1392 return -EINVAL;
1393 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1394 return -EADDRNOTAVAIL;
1395 }
1396
1397 if (!data)
1398 goto out;
1399
1400 if (data[IFLA_GRE_REMOTE]) {
1401 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1402 if (!daddr)
1403 return -EINVAL;
1404 }
1405
1406out:
1407 return ipgre_tunnel_validate(tb, data);
1408}
1409
c19e654d
HX
1410static void ipgre_netlink_parms(struct nlattr *data[],
1411 struct ip_tunnel_parm *parms)
1412{
7bb82d92 1413 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1414
1415 parms->iph.protocol = IPPROTO_GRE;
1416
1417 if (!data)
1418 return;
1419
1420 if (data[IFLA_GRE_LINK])
1421 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1422
1423 if (data[IFLA_GRE_IFLAGS])
1424 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1425
1426 if (data[IFLA_GRE_OFLAGS])
1427 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1428
1429 if (data[IFLA_GRE_IKEY])
1430 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1431
1432 if (data[IFLA_GRE_OKEY])
1433 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1434
1435 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1436 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1437
1438 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1439 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1440
1441 if (data[IFLA_GRE_TTL])
1442 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1443
1444 if (data[IFLA_GRE_TOS])
1445 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1446
1447 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1448 parms->iph.frag_off = htons(IP_DF);
1449}
1450
e1a80002
HX
1451static int ipgre_tap_init(struct net_device *dev)
1452{
1453 struct ip_tunnel *tunnel;
1454
1455 tunnel = netdev_priv(dev);
1456
1457 tunnel->dev = dev;
1458 strcpy(tunnel->parms.name, dev->name);
1459
1460 ipgre_tunnel_bind_dev(dev);
1461
1462 return 0;
1463}
1464
b8c26a33
SH
1465static const struct net_device_ops ipgre_tap_netdev_ops = {
1466 .ndo_init = ipgre_tap_init,
1467 .ndo_uninit = ipgre_tunnel_uninit,
1468 .ndo_start_xmit = ipgre_tunnel_xmit,
1469 .ndo_set_mac_address = eth_mac_addr,
1470 .ndo_validate_addr = eth_validate_addr,
1471 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1472};
1473
e1a80002
HX
1474static void ipgre_tap_setup(struct net_device *dev)
1475{
1476
1477 ether_setup(dev);
1478
2e9526b3 1479 dev->netdev_ops = &ipgre_tap_netdev_ops;
e1a80002 1480 dev->destructor = free_netdev;
e1a80002
HX
1481
1482 dev->iflink = 0;
1483 dev->features |= NETIF_F_NETNS_LOCAL;
1484}
1485
81adee47 1486static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
c19e654d
HX
1487 struct nlattr *data[])
1488{
1489 struct ip_tunnel *nt;
1490 struct net *net = dev_net(dev);
1491 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1492 int mtu;
1493 int err;
1494
1495 nt = netdev_priv(dev);
1496 ipgre_netlink_parms(data, &nt->parms);
1497
e1a80002 1498 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1499 return -EEXIST;
1500
e1a80002
HX
1501 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1502 random_ether_addr(dev->dev_addr);
1503
c19e654d
HX
1504 mtu = ipgre_tunnel_bind_dev(dev);
1505 if (!tb[IFLA_MTU])
1506 dev->mtu = mtu;
1507
1508 err = register_netdevice(dev);
1509 if (err)
1510 goto out;
1511
1512 dev_hold(dev);
1513 ipgre_tunnel_link(ign, nt);
1514
1515out:
1516 return err;
1517}
1518
1519static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1520 struct nlattr *data[])
1521{
1522 struct ip_tunnel *t, *nt;
1523 struct net *net = dev_net(dev);
1524 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1525 struct ip_tunnel_parm p;
1526 int mtu;
1527
1528 if (dev == ign->fb_tunnel_dev)
1529 return -EINVAL;
1530
1531 nt = netdev_priv(dev);
1532 ipgre_netlink_parms(data, &p);
1533
1534 t = ipgre_tunnel_locate(net, &p, 0);
1535
1536 if (t) {
1537 if (t->dev != dev)
1538 return -EEXIST;
1539 } else {
c19e654d
HX
1540 t = nt;
1541
2e9526b3
HX
1542 if (dev->type != ARPHRD_ETHER) {
1543 unsigned nflags = 0;
c19e654d 1544
2e9526b3
HX
1545 if (ipv4_is_multicast(p.iph.daddr))
1546 nflags = IFF_BROADCAST;
1547 else if (p.iph.daddr)
1548 nflags = IFF_POINTOPOINT;
1549
1550 if ((dev->flags ^ nflags) &
1551 (IFF_POINTOPOINT | IFF_BROADCAST))
1552 return -EINVAL;
1553 }
c19e654d
HX
1554
1555 ipgre_tunnel_unlink(ign, t);
1556 t->parms.iph.saddr = p.iph.saddr;
1557 t->parms.iph.daddr = p.iph.daddr;
1558 t->parms.i_key = p.i_key;
2e9526b3
HX
1559 if (dev->type != ARPHRD_ETHER) {
1560 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1561 memcpy(dev->broadcast, &p.iph.daddr, 4);
1562 }
c19e654d
HX
1563 ipgre_tunnel_link(ign, t);
1564 netdev_state_change(dev);
1565 }
1566
1567 t->parms.o_key = p.o_key;
1568 t->parms.iph.ttl = p.iph.ttl;
1569 t->parms.iph.tos = p.iph.tos;
1570 t->parms.iph.frag_off = p.iph.frag_off;
1571
1572 if (t->parms.link != p.link) {
1573 t->parms.link = p.link;
1574 mtu = ipgre_tunnel_bind_dev(dev);
1575 if (!tb[IFLA_MTU])
1576 dev->mtu = mtu;
1577 netdev_state_change(dev);
1578 }
1579
1580 return 0;
1581}
1582
1583static size_t ipgre_get_size(const struct net_device *dev)
1584{
1585 return
1586 /* IFLA_GRE_LINK */
1587 nla_total_size(4) +
1588 /* IFLA_GRE_IFLAGS */
1589 nla_total_size(2) +
1590 /* IFLA_GRE_OFLAGS */
1591 nla_total_size(2) +
1592 /* IFLA_GRE_IKEY */
1593 nla_total_size(4) +
1594 /* IFLA_GRE_OKEY */
1595 nla_total_size(4) +
1596 /* IFLA_GRE_LOCAL */
1597 nla_total_size(4) +
1598 /* IFLA_GRE_REMOTE */
1599 nla_total_size(4) +
1600 /* IFLA_GRE_TTL */
1601 nla_total_size(1) +
1602 /* IFLA_GRE_TOS */
1603 nla_total_size(1) +
1604 /* IFLA_GRE_PMTUDISC */
1605 nla_total_size(1) +
1606 0;
1607}
1608
1609static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1610{
1611 struct ip_tunnel *t = netdev_priv(dev);
1612 struct ip_tunnel_parm *p = &t->parms;
1613
1614 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1615 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1616 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1617 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1618 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1619 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1620 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1621 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1622 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1623 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1624
1625 return 0;
1626
1627nla_put_failure:
1628 return -EMSGSIZE;
1629}
1630
1631static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1632 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1633 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1634 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1635 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1636 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1637 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1638 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1639 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1640 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1641 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1642};
1643
1644static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1645 .kind = "gre",
1646 .maxtype = IFLA_GRE_MAX,
1647 .policy = ipgre_policy,
1648 .priv_size = sizeof(struct ip_tunnel),
1649 .setup = ipgre_tunnel_setup,
1650 .validate = ipgre_tunnel_validate,
1651 .newlink = ipgre_newlink,
1652 .changelink = ipgre_changelink,
1653 .get_size = ipgre_get_size,
1654 .fill_info = ipgre_fill_info,
1655};
1656
e1a80002
HX
1657static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1658 .kind = "gretap",
1659 .maxtype = IFLA_GRE_MAX,
1660 .policy = ipgre_policy,
1661 .priv_size = sizeof(struct ip_tunnel),
1662 .setup = ipgre_tap_setup,
1663 .validate = ipgre_tap_validate,
1664 .newlink = ipgre_newlink,
1665 .changelink = ipgre_changelink,
1666 .get_size = ipgre_get_size,
1667 .fill_info = ipgre_fill_info,
1668};
1669
1da177e4
LT
1670/*
1671 * And now the modules code and kernel interface.
1672 */
1673
1674static int __init ipgre_init(void)
1675{
1676 int err;
1677
1678 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1679
1680 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1681 printk(KERN_INFO "ipgre init: can't add protocol\n");
1682 return -EAGAIN;
1683 }
1684
59a4c759
PE
1685 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1686 if (err < 0)
c19e654d 1687 goto gen_device_failed;
7daa0004 1688
c19e654d
HX
1689 err = rtnl_link_register(&ipgre_link_ops);
1690 if (err < 0)
1691 goto rtnl_link_failed;
1692
e1a80002
HX
1693 err = rtnl_link_register(&ipgre_tap_ops);
1694 if (err < 0)
1695 goto tap_ops_failed;
1696
c19e654d 1697out:
1da177e4 1698 return err;
c19e654d 1699
e1a80002
HX
1700tap_ops_failed:
1701 rtnl_link_unregister(&ipgre_link_ops);
c19e654d
HX
1702rtnl_link_failed:
1703 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1704gen_device_failed:
1705 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1706 goto out;
1da177e4
LT
1707}
1708
db44575f 1709static void __exit ipgre_fini(void)
1da177e4 1710{
e1a80002 1711 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d
HX
1712 rtnl_link_unregister(&ipgre_link_ops);
1713 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1714 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1715 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1da177e4
LT
1716}
1717
1718module_init(ipgre_init);
1719module_exit(ipgre_fini);
1720MODULE_LICENSE("GPL");
4d74f8ba
PM
1721MODULE_ALIAS_RTNL_LINK("gre");
1722MODULE_ALIAS_RTNL_LINK("gretap");