]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
bridge : Sanitize skb before it enters the IP stack
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4 16#include <linux/kernel.h>
5a0e3ad6 17#include <linux/slab.h>
1da177e4
LT
18#include <asm/uaccess.h>
19#include <linux/skbuff.h>
20#include <linux/netdevice.h>
21#include <linux/in.h>
22#include <linux/tcp.h>
23#include <linux/udp.h>
24#include <linux/if_arp.h>
25#include <linux/mroute.h>
26#include <linux/init.h>
27#include <linux/in6.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/netfilter_ipv4.h>
e1a80002 31#include <linux/etherdevice.h>
46f25dff 32#include <linux/if_ether.h>
1da177e4
LT
33
34#include <net/sock.h>
35#include <net/ip.h>
36#include <net/icmp.h>
37#include <net/protocol.h>
38#include <net/ipip.h>
39#include <net/arp.h>
40#include <net/checksum.h>
41#include <net/dsfield.h>
42#include <net/inet_ecn.h>
43#include <net/xfrm.h>
59a4c759
PE
44#include <net/net_namespace.h>
45#include <net/netns/generic.h>
c19e654d 46#include <net/rtnetlink.h>
00959ade 47#include <net/gre.h>
1da177e4
LT
48
49#ifdef CONFIG_IPV6
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#endif
54
55/*
56 Problems & solutions
57 --------------------
58
59 1. The most important issue is detecting local dead loops.
60 They would cause complete host lockup in transmit, which
61 would be "resolved" by stack overflow or, if queueing is enabled,
62 with infinite looping in net_bh.
63
64 We cannot track such dead loops during route installation,
65 it is infeasible task. The most general solutions would be
66 to keep skb->encapsulation counter (sort of local ttl),
67 and silently drop packet when it expires. It is the best
68 solution, but it supposes maintaing new variable in ALL
69 skb, even if no tunneling is used.
70
a43912ab 71 Current solution: HARD_TX_LOCK lock breaks dead loops.
1da177e4
LT
72
73
74
75 2. Networking dead loops would not kill routers, but would really
76 kill network. IP hop limit plays role of "t->recursion" in this case,
77 if we copy it from packet being encapsulated to upper header.
78 It is very good solution, but it introduces two problems:
79
80 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
81 do not work over tunnels.
82 - traceroute does not work. I planned to relay ICMP from tunnel,
83 so that this problem would be solved and traceroute output
84 would even more informative. This idea appeared to be wrong:
85 only Linux complies to rfc1812 now (yes, guys, Linux is the only
86 true router now :-)), all routers (at least, in neighbourhood of mine)
87 return only 8 bytes of payload. It is the end.
88
89 Hence, if we want that OSPF worked or traceroute said something reasonable,
90 we should search for another solution.
91
92 One of them is to parse packet trying to detect inner encapsulation
93 made by our node. It is difficult or even impossible, especially,
94 taking into account fragmentation. TO be short, tt is not solution at all.
95
96 Current solution: The solution was UNEXPECTEDLY SIMPLE.
97 We force DF flag on tunnels with preconfigured hop limit,
98 that is ALL. :-) Well, it does not remove the problem completely,
99 but exponential growth of network traffic is changed to linear
100 (branches, that exceed pmtu are pruned) and tunnel mtu
101 fastly degrades to value <68, where looping stops.
102 Yes, it is not good if there exists a router in the loop,
103 which does not force DF, even when encapsulating packets have DF set.
104 But it is not our problem! Nobody could accuse us, we made
105 all that we could make. Even if it is your gated who injected
106 fatal route to network, even if it were you who configured
107 fatal static route: you are innocent. :-)
108
109
110
111 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
112 practically identical code. It would be good to glue them
113 together, but it is not very evident, how to make them modular.
114 sit is integral part of IPv6, ipip and gre are naturally modular.
115 We could extract common parts (hash table, ioctl etc)
116 to a separate module (ip_tunnel.c).
117
118 Alexey Kuznetsov.
119 */
120
c19e654d 121static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
122static int ipgre_tunnel_init(struct net_device *dev);
123static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 124static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
125
126/* Fallback tunnel: no source, no destination, no key, no options */
127
eb8ce741
PE
128#define HASH_SIZE 16
129
f99189b1 130static int ipgre_net_id __read_mostly;
59a4c759 131struct ipgre_net {
1507850b 132 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
eb8ce741 133
7daa0004 134 struct net_device *fb_tunnel_dev;
59a4c759
PE
135};
136
1da177e4
LT
137/* Tunnel hash table */
138
139/*
140 4 hash tables:
141
142 3: (remote,local)
143 2: (remote,*)
144 1: (*,local)
145 0: (*,*)
146
147 We require exact key match i.e. if a key is present in packet
148 it will match only tunnel with the same key; if it is not present,
149 it will match only keyless tunnel.
150
151 All keysless packets, if not matched configured keyless tunnels
152 will match fallback tunnel.
153 */
154
d5a0a1e3 155#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 156
eb8ce741
PE
157#define tunnels_r_l tunnels[3]
158#define tunnels_r tunnels[2]
159#define tunnels_l tunnels[1]
160#define tunnels_wc tunnels[0]
8d5b2c08 161/*
1507850b 162 * Locking : hash tables are protected by RCU and RTNL
8d5b2c08 163 */
1da177e4 164
8d5b2c08
ED
165#define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1da177e4
LT
167
168/* Given src, dst and key, find appropriate for input tunnel. */
169
749c10f9 170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
e1a80002
HX
171 __be32 remote, __be32 local,
172 __be32 key, __be16 gre_proto)
1da177e4 173{
749c10f9
TT
174 struct net *net = dev_net(dev);
175 int link = dev->ifindex;
1507850b
ED
176 unsigned int h0 = HASH(remote);
177 unsigned int h1 = HASH(key);
afcf1242 178 struct ip_tunnel *t, *cand = NULL;
7daa0004 179 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181 ARPHRD_ETHER : ARPHRD_IPGRE;
afcf1242 182 int score, cand_score = 4;
1da177e4 183
8d5b2c08 184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
749c10f9
TT
185 if (local != t->parms.iph.saddr ||
186 remote != t->parms.iph.daddr ||
187 key != t->parms.i_key ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (t->dev->type != ARPHRD_IPGRE &&
192 t->dev->type != dev_type)
193 continue;
194
afcf1242 195 score = 0;
749c10f9 196 if (t->parms.link != link)
afcf1242 197 score |= 1;
749c10f9 198 if (t->dev->type != dev_type)
afcf1242
TT
199 score |= 2;
200 if (score == 0)
749c10f9 201 return t;
afcf1242
TT
202
203 if (score < cand_score) {
204 cand = t;
205 cand_score = score;
206 }
1da177e4 207 }
e1a80002 208
8d5b2c08 209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
749c10f9
TT
210 if (remote != t->parms.iph.daddr ||
211 key != t->parms.i_key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->dev->type != ARPHRD_IPGRE &&
216 t->dev->type != dev_type)
217 continue;
218
afcf1242 219 score = 0;
749c10f9 220 if (t->parms.link != link)
afcf1242 221 score |= 1;
749c10f9 222 if (t->dev->type != dev_type)
afcf1242
TT
223 score |= 2;
224 if (score == 0)
749c10f9 225 return t;
afcf1242
TT
226
227 if (score < cand_score) {
228 cand = t;
229 cand_score = score;
230 }
1da177e4 231 }
e1a80002 232
8d5b2c08 233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
749c10f9
TT
234 if ((local != t->parms.iph.saddr &&
235 (local != t->parms.iph.daddr ||
236 !ipv4_is_multicast(local))) ||
237 key != t->parms.i_key ||
238 !(t->dev->flags & IFF_UP))
239 continue;
240
241 if (t->dev->type != ARPHRD_IPGRE &&
242 t->dev->type != dev_type)
243 continue;
244
afcf1242 245 score = 0;
749c10f9 246 if (t->parms.link != link)
afcf1242 247 score |= 1;
749c10f9 248 if (t->dev->type != dev_type)
afcf1242
TT
249 score |= 2;
250 if (score == 0)
749c10f9 251 return t;
afcf1242
TT
252
253 if (score < cand_score) {
254 cand = t;
255 cand_score = score;
256 }
1da177e4 257 }
e1a80002 258
8d5b2c08 259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
749c10f9
TT
260 if (t->parms.i_key != key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
263
264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type)
266 continue;
267
afcf1242 268 score = 0;
749c10f9 269 if (t->parms.link != link)
afcf1242 270 score |= 1;
749c10f9 271 if (t->dev->type != dev_type)
afcf1242
TT
272 score |= 2;
273 if (score == 0)
749c10f9 274 return t;
afcf1242
TT
275
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
279 }
1da177e4
LT
280 }
281
afcf1242
TT
282 if (cand != NULL)
283 return cand;
e1a80002 284
8d5b2c08
ED
285 dev = ign->fb_tunnel_dev;
286 if (dev->flags & IFF_UP)
287 return netdev_priv(dev);
749c10f9 288
1da177e4
LT
289 return NULL;
290}
291
1507850b 292static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
f57e7d5a 293 struct ip_tunnel_parm *parms)
1da177e4 294{
5056a1ef
YH
295 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key;
1507850b 298 unsigned int h = HASH(key);
1da177e4
LT
299 int prio = 0;
300
301 if (local)
302 prio |= 1;
f97c1e0c 303 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
304 prio |= 2;
305 h ^= HASH(remote);
306 }
307
eb8ce741 308 return &ign->tunnels[prio][h];
1da177e4
LT
309}
310
1507850b 311static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
f57e7d5a 312 struct ip_tunnel *t)
5056a1ef 313{
f57e7d5a 314 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
315}
316
f57e7d5a 317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 318{
1507850b 319 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
1da177e4 320
1507850b 321 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
8d5b2c08 322 rcu_assign_pointer(*tp, t);
1da177e4
LT
323}
324
f57e7d5a 325static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 326{
1507850b
ED
327 struct ip_tunnel __rcu **tp;
328 struct ip_tunnel *iter;
329
330 for (tp = ipgre_bucket(ign, t);
331 (iter = rtnl_dereference(*tp)) != NULL;
332 tp = &iter->next) {
333 if (t == iter) {
334 rcu_assign_pointer(*tp, t->next);
1da177e4
LT
335 break;
336 }
337 }
338}
339
e1a80002
HX
340static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
341 struct ip_tunnel_parm *parms,
342 int type)
1da177e4 343{
d5a0a1e3
AV
344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
749c10f9 347 int link = parms->link;
1507850b
ED
348 struct ip_tunnel *t;
349 struct ip_tunnel __rcu **tp;
e1a80002
HX
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351
1507850b
ED
352 for (tp = __ipgre_bucket(ign, parms);
353 (t = rtnl_dereference(*tp)) != NULL;
354 tp = &t->next)
e1a80002
HX
355 if (local == t->parms.iph.saddr &&
356 remote == t->parms.iph.daddr &&
357 key == t->parms.i_key &&
749c10f9 358 link == t->parms.link &&
e1a80002
HX
359 type == t->dev->type)
360 break;
361
362 return t;
363}
364
1507850b 365static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
e1a80002
HX
366 struct ip_tunnel_parm *parms, int create)
367{
368 struct ip_tunnel *t, *nt;
1da177e4 369 struct net_device *dev;
1da177e4 370 char name[IFNAMSIZ];
f57e7d5a 371 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 372
e1a80002
HX
373 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
374 if (t || !create)
375 return t;
1da177e4
LT
376
377 if (parms->name[0])
378 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
379 else
380 sprintf(name, "gre%%d");
1da177e4
LT
381
382 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
383 if (!dev)
384 return NULL;
385
0b67eceb
PE
386 dev_net_set(dev, net);
387
b37d428b
PE
388 if (strchr(name, '%')) {
389 if (dev_alloc_name(dev, name) < 0)
390 goto failed_free;
391 }
392
2941a486 393 nt = netdev_priv(dev);
1da177e4 394 nt->parms = *parms;
c19e654d 395 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 396
42aa9162
HX
397 dev->mtu = ipgre_tunnel_bind_dev(dev);
398
b37d428b
PE
399 if (register_netdevice(dev) < 0)
400 goto failed_free;
1da177e4 401
1da177e4 402 dev_hold(dev);
f57e7d5a 403 ipgre_tunnel_link(ign, nt);
1da177e4
LT
404 return nt;
405
b37d428b
PE
406failed_free:
407 free_netdev(dev);
1da177e4
LT
408 return NULL;
409}
410
411static void ipgre_tunnel_uninit(struct net_device *dev)
412{
f57e7d5a
PE
413 struct net *net = dev_net(dev);
414 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
415
416 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
417 dev_put(dev);
418}
419
420
421static void ipgre_err(struct sk_buff *skb, u32 info)
422{
1da177e4 423
071f92d0 424/* All the routers (except for Linux) return only
1da177e4
LT
425 8 bytes of packet payload. It means, that precise relaying of
426 ICMP in the real Internet is absolutely infeasible.
427
428 Moreover, Cisco "wise men" put GRE key to the third word
429 in GRE header. It makes impossible maintaining even soft state for keyed
430 GRE tunnels with enabled checksum. Tell them "thank you".
431
432 Well, I wonder, rfc1812 was written by Cisco employee,
433 what the hell these idiots break standrads established
434 by themself???
435 */
436
6ed2533e 437 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 438 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 439 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
440 const int type = icmp_hdr(skb)->type;
441 const int code = icmp_hdr(skb)->code;
1da177e4 442 struct ip_tunnel *t;
d5a0a1e3 443 __be16 flags;
1da177e4
LT
444
445 flags = p[0];
446 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
447 if (flags&(GRE_VERSION|GRE_ROUTING))
448 return;
449 if (flags&GRE_KEY) {
450 grehlen += 4;
451 if (flags&GRE_CSUM)
452 grehlen += 4;
453 }
454 }
455
456 /* If only 8 bytes returned, keyed message will be dropped here */
457 if (skb_headlen(skb) < grehlen)
458 return;
459
460 switch (type) {
461 default:
462 case ICMP_PARAMETERPROB:
463 return;
464
465 case ICMP_DEST_UNREACH:
466 switch (code) {
467 case ICMP_SR_FAILED:
468 case ICMP_PORT_UNREACH:
469 /* Impossible event. */
470 return;
471 case ICMP_FRAG_NEEDED:
472 /* Soft state for pmtu is maintained by IP core. */
473 return;
474 default:
475 /* All others are translated to HOST_UNREACH.
476 rfc2003 contains "deep thoughts" about NET_UNREACH,
477 I believe they are just ether pollution. --ANK
478 */
479 break;
480 }
481 break;
482 case ICMP_TIME_EXCEEDED:
483 if (code != ICMP_EXC_TTL)
484 return;
485 break;
486 }
487
8d5b2c08 488 rcu_read_lock();
749c10f9 489 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
e1a80002
HX
490 flags & GRE_KEY ?
491 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
492 p[1]);
f97c1e0c
JP
493 if (t == NULL || t->parms.iph.daddr == 0 ||
494 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
495 goto out;
496
497 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
498 goto out;
499
da6185d8 500 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
501 t->err_count++;
502 else
503 t->err_count = 1;
504 t->err_time = jiffies;
505out:
8d5b2c08 506 rcu_read_unlock();
1da177e4
LT
507}
508
509static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
510{
511 if (INET_ECN_is_ce(iph->tos)) {
512 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 513 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 514 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 515 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
516 }
517 }
518}
519
520static inline u8
521ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
522{
523 u8 inner = 0;
524 if (skb->protocol == htons(ETH_P_IP))
525 inner = old_iph->tos;
526 else if (skb->protocol == htons(ETH_P_IPV6))
527 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
528 return INET_ECN_encapsulate(tos, inner);
529}
530
531static int ipgre_rcv(struct sk_buff *skb)
532{
533 struct iphdr *iph;
534 u8 *h;
d5a0a1e3 535 __be16 flags;
d3bc23e7 536 __sum16 csum = 0;
d5a0a1e3 537 __be32 key = 0;
1da177e4
LT
538 u32 seqno = 0;
539 struct ip_tunnel *tunnel;
540 int offset = 4;
e1a80002 541 __be16 gre_proto;
1da177e4
LT
542
543 if (!pskb_may_pull(skb, 16))
544 goto drop_nolock;
545
eddc9ec5 546 iph = ip_hdr(skb);
1da177e4 547 h = skb->data;
d5a0a1e3 548 flags = *(__be16*)h;
1da177e4
LT
549
550 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
551 /* - Version must be 0.
552 - We do not support routing headers.
553 */
554 if (flags&(GRE_VERSION|GRE_ROUTING))
555 goto drop_nolock;
556
557 if (flags&GRE_CSUM) {
fb286bb2 558 switch (skb->ip_summed) {
84fa7933 559 case CHECKSUM_COMPLETE:
d3bc23e7 560 csum = csum_fold(skb->csum);
fb286bb2
HX
561 if (!csum)
562 break;
563 /* fall through */
564 case CHECKSUM_NONE:
565 skb->csum = 0;
566 csum = __skb_checksum_complete(skb);
84fa7933 567 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
568 }
569 offset += 4;
570 }
571 if (flags&GRE_KEY) {
d5a0a1e3 572 key = *(__be32*)(h + offset);
1da177e4
LT
573 offset += 4;
574 }
575 if (flags&GRE_SEQ) {
d5a0a1e3 576 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
577 offset += 4;
578 }
579 }
580
e1a80002
HX
581 gre_proto = *(__be16 *)(h + 2);
582
8d5b2c08 583 rcu_read_lock();
749c10f9 584 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
e1a80002
HX
585 iph->saddr, iph->daddr, key,
586 gre_proto))) {
addd68eb
PE
587 struct net_device_stats *stats = &tunnel->dev->stats;
588
1da177e4
LT
589 secpath_reset(skb);
590
e1a80002 591 skb->protocol = gre_proto;
1da177e4
LT
592 /* WCCP version 1 and 2 protocol decoding.
593 * - Change protocol to IP
594 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
595 */
e1a80002 596 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 597 skb->protocol = htons(ETH_P_IP);
e905a9ed 598 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
599 offset += 4;
600 }
601
1d069167 602 skb->mac_header = skb->network_header;
4209fb60 603 __pskb_pull(skb, offset);
9c70220b 604 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
605 skb->pkt_type = PACKET_HOST;
606#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 607 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 608 /* Looped back packet, drop it! */
511c3f92 609 if (skb_rtable(skb)->fl.iif == 0)
1da177e4 610 goto drop;
addd68eb 611 stats->multicast++;
1da177e4
LT
612 skb->pkt_type = PACKET_BROADCAST;
613 }
614#endif
615
616 if (((flags&GRE_CSUM) && csum) ||
617 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
618 stats->rx_crc_errors++;
619 stats->rx_errors++;
1da177e4
LT
620 goto drop;
621 }
622 if (tunnel->parms.i_flags&GRE_SEQ) {
623 if (!(flags&GRE_SEQ) ||
624 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
625 stats->rx_fifo_errors++;
626 stats->rx_errors++;
1da177e4
LT
627 goto drop;
628 }
629 tunnel->i_seqno = seqno + 1;
630 }
e1a80002
HX
631
632 /* Warning: All skb pointers will be invalidated! */
633 if (tunnel->dev->type == ARPHRD_ETHER) {
634 if (!pskb_may_pull(skb, ETH_HLEN)) {
635 stats->rx_length_errors++;
636 stats->rx_errors++;
637 goto drop;
638 }
639
640 iph = ip_hdr(skb);
641 skb->protocol = eth_type_trans(skb, tunnel->dev);
642 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
643 }
644
d19d56dd 645 skb_tunnel_rx(skb, tunnel->dev);
e1a80002
HX
646
647 skb_reset_network_header(skb);
1da177e4 648 ipgre_ecn_decapsulate(iph, skb);
e1a80002 649
1da177e4 650 netif_rx(skb);
8d5b2c08 651 rcu_read_unlock();
1da177e4
LT
652 return(0);
653 }
45af08be 654 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
655
656drop:
8d5b2c08 657 rcu_read_unlock();
1da177e4
LT
658drop_nolock:
659 kfree_skb(skb);
660 return(0);
661}
662
6fef4c0c 663static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 664{
2941a486 665 struct ip_tunnel *tunnel = netdev_priv(dev);
0bfbedb1
ED
666 struct net_device_stats *stats = &dev->stats;
667 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
eddc9ec5 668 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
669 struct iphdr *tiph;
670 u8 tos;
d5a0a1e3 671 __be16 df;
1da177e4 672 struct rtable *rt; /* Route to the other host */
1507850b 673 struct net_device *tdev; /* Device to other host */
1da177e4 674 struct iphdr *iph; /* Our new IP header */
c2636b4d 675 unsigned int max_headroom; /* The extra header space needed */
1da177e4 676 int gre_hlen;
d5a0a1e3 677 __be32 dst;
1da177e4
LT
678 int mtu;
679
e1a80002
HX
680 if (dev->type == ARPHRD_ETHER)
681 IPCB(skb)->flags = 0;
682
683 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 684 gre_hlen = 0;
6ed2533e 685 tiph = (struct iphdr *)skb->data;
1da177e4
LT
686 } else {
687 gre_hlen = tunnel->hlen;
688 tiph = &tunnel->parms.iph;
689 }
690
691 if ((dst = tiph->daddr) == 0) {
692 /* NBMA tunnel */
693
adf30907 694 if (skb_dst(skb) == NULL) {
addd68eb 695 stats->tx_fifo_errors++;
1da177e4
LT
696 goto tx_error;
697 }
698
699 if (skb->protocol == htons(ETH_P_IP)) {
511c3f92 700 rt = skb_rtable(skb);
1da177e4
LT
701 if ((dst = rt->rt_gateway) == 0)
702 goto tx_error_icmp;
703 }
704#ifdef CONFIG_IPV6
705 else if (skb->protocol == htons(ETH_P_IPV6)) {
706 struct in6_addr *addr6;
707 int addr_type;
adf30907 708 struct neighbour *neigh = skb_dst(skb)->neighbour;
1da177e4
LT
709
710 if (neigh == NULL)
711 goto tx_error;
712
6ed2533e 713 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
714 addr_type = ipv6_addr_type(addr6);
715
716 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 717 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
718 addr_type = ipv6_addr_type(addr6);
719 }
720
721 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
722 goto tx_error_icmp;
723
724 dst = addr6->s6_addr32[3];
725 }
726#endif
727 else
728 goto tx_error;
729 }
730
731 tos = tiph->tos;
ee686ca9
AJ
732 if (tos == 1) {
733 tos = 0;
1da177e4
LT
734 if (skb->protocol == htons(ETH_P_IP))
735 tos = old_iph->tos;
dd4ba83d
SH
736 else if (skb->protocol == htons(ETH_P_IPV6))
737 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
1da177e4
LT
738 }
739
740 {
741 struct flowi fl = { .oif = tunnel->parms.link,
742 .nl_u = { .ip4_u =
743 { .daddr = dst,
744 .saddr = tiph->saddr,
745 .tos = RT_TOS(tos) } },
746 .proto = IPPROTO_GRE };
96635522 747 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 748 stats->tx_carrier_errors++;
1da177e4
LT
749 goto tx_error;
750 }
751 }
d8d1f30b 752 tdev = rt->dst.dev;
1da177e4
LT
753
754 if (tdev == dev) {
755 ip_rt_put(rt);
addd68eb 756 stats->collisions++;
1da177e4
LT
757 goto tx_error;
758 }
759
760 df = tiph->frag_off;
761 if (df)
d8d1f30b 762 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
1da177e4 763 else
adf30907 764 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
1da177e4 765
adf30907
ED
766 if (skb_dst(skb))
767 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1da177e4
LT
768
769 if (skb->protocol == htons(ETH_P_IP)) {
770 df |= (old_iph->frag_off&htons(IP_DF));
771
772 if ((old_iph->frag_off&htons(IP_DF)) &&
773 mtu < ntohs(old_iph->tot_len)) {
774 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
775 ip_rt_put(rt);
776 goto tx_error;
777 }
778 }
779#ifdef CONFIG_IPV6
780 else if (skb->protocol == htons(ETH_P_IPV6)) {
adf30907 781 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
1da177e4 782
adf30907 783 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
784 if ((tunnel->parms.iph.daddr &&
785 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
786 rt6->rt6i_dst.plen == 128) {
787 rt6->rt6i_flags |= RTF_MODIFIED;
adf30907 788 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
1da177e4
LT
789 }
790 }
791
792 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
3ffe533c 793 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1da177e4
LT
794 ip_rt_put(rt);
795 goto tx_error;
796 }
797 }
798#endif
799
800 if (tunnel->err_count > 0) {
da6185d8
WY
801 if (time_before(jiffies,
802 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1da177e4
LT
803 tunnel->err_count--;
804
805 dst_link_failure(skb);
806 } else
807 tunnel->err_count = 0;
808 }
809
d8d1f30b 810 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
1da177e4 811
cfbba49d
PM
812 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
813 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4 814 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
243aad83
TT
815 if (max_headroom > dev->needed_headroom)
816 dev->needed_headroom = max_headroom;
1da177e4
LT
817 if (!new_skb) {
818 ip_rt_put(rt);
0bfbedb1 819 txq->tx_dropped++;
1da177e4 820 dev_kfree_skb(skb);
6ed10654 821 return NETDEV_TX_OK;
1da177e4
LT
822 }
823 if (skb->sk)
824 skb_set_owner_w(new_skb, skb->sk);
825 dev_kfree_skb(skb);
826 skb = new_skb;
eddc9ec5 827 old_iph = ip_hdr(skb);
1da177e4
LT
828 }
829
64194c31 830 skb_reset_transport_header(skb);
e2d1bca7
ACM
831 skb_push(skb, gre_hlen);
832 skb_reset_network_header(skb);
1da177e4 833 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
834 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
835 IPSKB_REROUTED);
adf30907 836 skb_dst_drop(skb);
d8d1f30b 837 skb_dst_set(skb, &rt->dst);
1da177e4
LT
838
839 /*
840 * Push down and install the IPIP header.
841 */
842
eddc9ec5 843 iph = ip_hdr(skb);
1da177e4
LT
844 iph->version = 4;
845 iph->ihl = sizeof(struct iphdr) >> 2;
846 iph->frag_off = df;
847 iph->protocol = IPPROTO_GRE;
848 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
849 iph->daddr = rt->rt_dst;
850 iph->saddr = rt->rt_src;
851
852 if ((iph->ttl = tiph->ttl) == 0) {
853 if (skb->protocol == htons(ETH_P_IP))
854 iph->ttl = old_iph->ttl;
855#ifdef CONFIG_IPV6
856 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 857 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
858#endif
859 else
d8d1f30b 860 iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
1da177e4
LT
861 }
862
e1a80002
HX
863 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
864 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
865 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
866
867 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 868 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
869
870 if (tunnel->parms.o_flags&GRE_SEQ) {
871 ++tunnel->o_seqno;
872 *ptr = htonl(tunnel->o_seqno);
873 ptr--;
874 }
875 if (tunnel->parms.o_flags&GRE_KEY) {
876 *ptr = tunnel->parms.o_key;
877 ptr--;
878 }
879 if (tunnel->parms.o_flags&GRE_CSUM) {
880 *ptr = 0;
5f92a738 881 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
882 }
883 }
884
885 nf_reset(skb);
886
887 IPTUNNEL_XMIT();
6ed10654 888 return NETDEV_TX_OK;
1da177e4
LT
889
890tx_error_icmp:
891 dst_link_failure(skb);
892
893tx_error:
894 stats->tx_errors++;
895 dev_kfree_skb(skb);
6ed10654 896 return NETDEV_TX_OK;
1da177e4
LT
897}
898
42aa9162 899static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
900{
901 struct net_device *tdev = NULL;
902 struct ip_tunnel *tunnel;
903 struct iphdr *iph;
904 int hlen = LL_MAX_HEADER;
905 int mtu = ETH_DATA_LEN;
906 int addend = sizeof(struct iphdr) + 4;
907
908 tunnel = netdev_priv(dev);
909 iph = &tunnel->parms.iph;
910
c95b819a 911 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
912
913 if (iph->daddr) {
914 struct flowi fl = { .oif = tunnel->parms.link,
915 .nl_u = { .ip4_u =
916 { .daddr = iph->daddr,
917 .saddr = iph->saddr,
918 .tos = RT_TOS(iph->tos) } },
919 .proto = IPPROTO_GRE };
920 struct rtable *rt;
96635522 921 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
d8d1f30b 922 tdev = rt->dst.dev;
ee34c1eb
MS
923 ip_rt_put(rt);
924 }
e1a80002
HX
925
926 if (dev->type != ARPHRD_ETHER)
927 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
928 }
929
930 if (!tdev && tunnel->parms.link)
96635522 931 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
932
933 if (tdev) {
c95b819a 934 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
935 mtu = tdev->mtu;
936 }
937 dev->iflink = tunnel->parms.link;
938
939 /* Precalculate GRE options length */
940 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
941 if (tunnel->parms.o_flags&GRE_CSUM)
942 addend += 4;
943 if (tunnel->parms.o_flags&GRE_KEY)
944 addend += 4;
945 if (tunnel->parms.o_flags&GRE_SEQ)
946 addend += 4;
947 }
c95b819a 948 dev->needed_headroom = addend + hlen;
8cdb0456 949 mtu -= dev->hard_header_len + addend;
42aa9162
HX
950
951 if (mtu < 68)
952 mtu = 68;
953
ee34c1eb
MS
954 tunnel->hlen = addend;
955
42aa9162 956 return mtu;
ee34c1eb
MS
957}
958
1da177e4
LT
959static int
960ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
961{
962 int err = 0;
963 struct ip_tunnel_parm p;
964 struct ip_tunnel *t;
f57e7d5a
PE
965 struct net *net = dev_net(dev);
966 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
967
968 switch (cmd) {
969 case SIOCGETTUNNEL:
970 t = NULL;
7daa0004 971 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
972 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
973 err = -EFAULT;
974 break;
975 }
f57e7d5a 976 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
977 }
978 if (t == NULL)
2941a486 979 t = netdev_priv(dev);
1da177e4
LT
980 memcpy(&p, &t->parms, sizeof(p));
981 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
982 err = -EFAULT;
983 break;
984
985 case SIOCADDTUNNEL:
986 case SIOCCHGTUNNEL:
987 err = -EPERM;
988 if (!capable(CAP_NET_ADMIN))
989 goto done;
990
991 err = -EFAULT;
992 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
993 goto done;
994
995 err = -EINVAL;
996 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
997 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
998 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
999 goto done;
1000 if (p.iph.ttl)
1001 p.iph.frag_off |= htons(IP_DF);
1002
1003 if (!(p.i_flags&GRE_KEY))
1004 p.i_key = 0;
1005 if (!(p.o_flags&GRE_KEY))
1006 p.o_key = 0;
1007
f57e7d5a 1008 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1009
7daa0004 1010 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1011 if (t != NULL) {
1012 if (t->dev != dev) {
1013 err = -EEXIST;
1014 break;
1015 }
1016 } else {
1507850b 1017 unsigned int nflags = 0;
1da177e4 1018
2941a486 1019 t = netdev_priv(dev);
1da177e4 1020
f97c1e0c 1021 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1022 nflags = IFF_BROADCAST;
1023 else if (p.iph.daddr)
1024 nflags = IFF_POINTOPOINT;
1025
1026 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1027 err = -EINVAL;
1028 break;
1029 }
f57e7d5a 1030 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1031 t->parms.iph.saddr = p.iph.saddr;
1032 t->parms.iph.daddr = p.iph.daddr;
1033 t->parms.i_key = p.i_key;
1034 t->parms.o_key = p.o_key;
1035 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1036 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1037 ipgre_tunnel_link(ign, t);
1da177e4
LT
1038 netdev_state_change(dev);
1039 }
1040 }
1041
1042 if (t) {
1043 err = 0;
1044 if (cmd == SIOCCHGTUNNEL) {
1045 t->parms.iph.ttl = p.iph.ttl;
1046 t->parms.iph.tos = p.iph.tos;
1047 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1048 if (t->parms.link != p.link) {
1049 t->parms.link = p.link;
42aa9162 1050 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1051 netdev_state_change(dev);
1052 }
1da177e4
LT
1053 }
1054 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1055 err = -EFAULT;
1056 } else
1057 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1058 break;
1059
1060 case SIOCDELTUNNEL:
1061 err = -EPERM;
1062 if (!capable(CAP_NET_ADMIN))
1063 goto done;
1064
7daa0004 1065 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1066 err = -EFAULT;
1067 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1068 goto done;
1069 err = -ENOENT;
f57e7d5a 1070 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1071 goto done;
1072 err = -EPERM;
7daa0004 1073 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1074 goto done;
1075 dev = t->dev;
1076 }
22f8cde5
SH
1077 unregister_netdevice(dev);
1078 err = 0;
1da177e4
LT
1079 break;
1080
1081 default:
1082 err = -EINVAL;
1083 }
1084
1085done:
1086 return err;
1087}
1088
1da177e4
LT
1089static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1090{
2941a486 1091 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1092 if (new_mtu < 68 ||
1093 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1094 return -EINVAL;
1095 dev->mtu = new_mtu;
1096 return 0;
1097}
1098
1da177e4
LT
1099/* Nice toy. Unfortunately, useless in real life :-)
1100 It allows to construct virtual multiprotocol broadcast "LAN"
1101 over the Internet, provided multicast routing is tuned.
1102
1103
1104 I have no idea was this bicycle invented before me,
1105 so that I had to set ARPHRD_IPGRE to a random value.
1106 I have an impression, that Cisco could make something similar,
1107 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1108
1da177e4
LT
1109 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1110 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1111
1112 ping -t 255 224.66.66.66
1113
1114 If nobody answers, mbone does not work.
1115
1116 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1117 ip addr add 10.66.66.<somewhat>/24 dev Universe
1118 ifconfig Universe up
1119 ifconfig Universe add fe80::<Your_real_addr>/10
1120 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1121 ftp 10.66.66.66
1122 ...
1123 ftp fec0:6666:6666::193.233.7.65
1124 ...
1125
1126 */
1127
3b04ddde
SH
1128static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1129 unsigned short type,
1507850b 1130 const void *daddr, const void *saddr, unsigned int len)
1da177e4 1131{
2941a486 1132 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1133 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1134 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1135
1136 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1137 p[0] = t->parms.o_flags;
1138 p[1] = htons(type);
1139
1140 /*
e905a9ed 1141 * Set the source hardware address.
1da177e4 1142 */
e905a9ed 1143
1da177e4
LT
1144 if (saddr)
1145 memcpy(&iph->saddr, saddr, 4);
6d55cb91 1146 if (daddr)
1da177e4 1147 memcpy(&iph->daddr, daddr, 4);
6d55cb91 1148 if (iph->daddr)
1da177e4 1149 return t->hlen;
e905a9ed 1150
1da177e4
LT
1151 return -t->hlen;
1152}
1153
6a5f44d7
TT
1154static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1155{
6ed2533e 1156 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1157 memcpy(haddr, &iph->saddr, 4);
1158 return 4;
1159}
1160
3b04ddde
SH
1161static const struct header_ops ipgre_header_ops = {
1162 .create = ipgre_header,
6a5f44d7 1163 .parse = ipgre_header_parse,
3b04ddde
SH
1164};
1165
6a5f44d7 1166#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1167static int ipgre_open(struct net_device *dev)
1168{
2941a486 1169 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1170
f97c1e0c 1171 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1172 struct flowi fl = { .oif = t->parms.link,
1173 .nl_u = { .ip4_u =
1174 { .daddr = t->parms.iph.daddr,
1175 .saddr = t->parms.iph.saddr,
1176 .tos = RT_TOS(t->parms.iph.tos) } },
1177 .proto = IPPROTO_GRE };
1178 struct rtable *rt;
96635522 1179 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4 1180 return -EADDRNOTAVAIL;
d8d1f30b 1181 dev = rt->dst.dev;
1da177e4 1182 ip_rt_put(rt);
e5ed6399 1183 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1184 return -EADDRNOTAVAIL;
1185 t->mlink = dev->ifindex;
e5ed6399 1186 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1187 }
1188 return 0;
1189}
1190
1191static int ipgre_close(struct net_device *dev)
1192{
2941a486 1193 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1194
f97c1e0c 1195 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1196 struct in_device *in_dev;
c346dca1 1197 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1198 if (in_dev) {
1199 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1200 in_dev_put(in_dev);
1201 }
1202 }
1203 return 0;
1204}
1205
1206#endif
1207
b8c26a33
SH
1208static const struct net_device_ops ipgre_netdev_ops = {
1209 .ndo_init = ipgre_tunnel_init,
1210 .ndo_uninit = ipgre_tunnel_uninit,
1211#ifdef CONFIG_NET_IPGRE_BROADCAST
1212 .ndo_open = ipgre_open,
1213 .ndo_stop = ipgre_close,
1214#endif
1215 .ndo_start_xmit = ipgre_tunnel_xmit,
1216 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1217 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1218};
1219
1da177e4
LT
1220static void ipgre_tunnel_setup(struct net_device *dev)
1221{
b8c26a33 1222 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1223 dev->destructor = free_netdev;
1da177e4
LT
1224
1225 dev->type = ARPHRD_IPGRE;
c95b819a 1226 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1227 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1228 dev->flags = IFF_NOARP;
1229 dev->iflink = 0;
1230 dev->addr_len = 4;
0b67eceb 1231 dev->features |= NETIF_F_NETNS_LOCAL;
108bfa89 1232 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1da177e4
LT
1233}
1234
1235static int ipgre_tunnel_init(struct net_device *dev)
1236{
1da177e4
LT
1237 struct ip_tunnel *tunnel;
1238 struct iphdr *iph;
1da177e4 1239
2941a486 1240 tunnel = netdev_priv(dev);
1da177e4
LT
1241 iph = &tunnel->parms.iph;
1242
1243 tunnel->dev = dev;
1244 strcpy(tunnel->parms.name, dev->name);
1245
1246 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1247 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1248
1da177e4 1249 if (iph->daddr) {
1da177e4 1250#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1251 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1252 if (!iph->saddr)
1253 return -EINVAL;
1254 dev->flags = IFF_BROADCAST;
3b04ddde 1255 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1256 }
1257#endif
ee34c1eb 1258 } else
6a5f44d7 1259 dev->header_ops = &ipgre_header_ops;
1da177e4 1260
1da177e4
LT
1261 return 0;
1262}
1263
b8c26a33 1264static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1265{
2941a486 1266 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1267 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1268 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1269
1270 tunnel->dev = dev;
1271 strcpy(tunnel->parms.name, dev->name);
1272
1273 iph->version = 4;
1274 iph->protocol = IPPROTO_GRE;
1275 iph->ihl = 5;
1276 tunnel->hlen = sizeof(struct iphdr) + 4;
1277
1278 dev_hold(dev);
1507850b 1279 rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
1da177e4
LT
1280}
1281
1282
00959ade
DK
1283static const struct gre_protocol ipgre_protocol = {
1284 .handler = ipgre_rcv,
1285 .err_handler = ipgre_err,
1da177e4
LT
1286};
1287
eef6dd65 1288static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
eb8ce741
PE
1289{
1290 int prio;
1291
1292 for (prio = 0; prio < 4; prio++) {
1293 int h;
1294 for (h = 0; h < HASH_SIZE; h++) {
1507850b
ED
1295 struct ip_tunnel *t;
1296
1297 t = rtnl_dereference(ign->tunnels[prio][h]);
eef6dd65
ED
1298
1299 while (t != NULL) {
1300 unregister_netdevice_queue(t->dev, head);
1507850b 1301 t = rtnl_dereference(t->next);
eef6dd65 1302 }
eb8ce741
PE
1303 }
1304 }
1305}
1306
2c8c1e72 1307static int __net_init ipgre_init_net(struct net *net)
59a4c759 1308{
cfb8fbf2 1309 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
59a4c759 1310 int err;
59a4c759 1311
7daa0004
PE
1312 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1313 ipgre_tunnel_setup);
1314 if (!ign->fb_tunnel_dev) {
1315 err = -ENOMEM;
1316 goto err_alloc_dev;
1317 }
be77e593 1318 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1319
b8c26a33 1320 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1321 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1322
1323 if ((err = register_netdev(ign->fb_tunnel_dev)))
1324 goto err_reg_dev;
1325
59a4c759
PE
1326 return 0;
1327
7daa0004
PE
1328err_reg_dev:
1329 free_netdev(ign->fb_tunnel_dev);
1330err_alloc_dev:
59a4c759
PE
1331 return err;
1332}
1333
2c8c1e72 1334static void __net_exit ipgre_exit_net(struct net *net)
59a4c759
PE
1335{
1336 struct ipgre_net *ign;
eef6dd65 1337 LIST_HEAD(list);
59a4c759
PE
1338
1339 ign = net_generic(net, ipgre_net_id);
7daa0004 1340 rtnl_lock();
eef6dd65
ED
1341 ipgre_destroy_tunnels(ign, &list);
1342 unregister_netdevice_many(&list);
7daa0004 1343 rtnl_unlock();
59a4c759
PE
1344}
1345
1346static struct pernet_operations ipgre_net_ops = {
1347 .init = ipgre_init_net,
1348 .exit = ipgre_exit_net,
cfb8fbf2
EB
1349 .id = &ipgre_net_id,
1350 .size = sizeof(struct ipgre_net),
59a4c759 1351};
1da177e4 1352
c19e654d
HX
1353static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1354{
1355 __be16 flags;
1356
1357 if (!data)
1358 return 0;
1359
1360 flags = 0;
1361 if (data[IFLA_GRE_IFLAGS])
1362 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1363 if (data[IFLA_GRE_OFLAGS])
1364 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1365 if (flags & (GRE_VERSION|GRE_ROUTING))
1366 return -EINVAL;
1367
1368 return 0;
1369}
1370
e1a80002
HX
1371static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1372{
1373 __be32 daddr;
1374
1375 if (tb[IFLA_ADDRESS]) {
1376 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1377 return -EINVAL;
1378 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1379 return -EADDRNOTAVAIL;
1380 }
1381
1382 if (!data)
1383 goto out;
1384
1385 if (data[IFLA_GRE_REMOTE]) {
1386 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1387 if (!daddr)
1388 return -EINVAL;
1389 }
1390
1391out:
1392 return ipgre_tunnel_validate(tb, data);
1393}
1394
c19e654d
HX
1395static void ipgre_netlink_parms(struct nlattr *data[],
1396 struct ip_tunnel_parm *parms)
1397{
7bb82d92 1398 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1399
1400 parms->iph.protocol = IPPROTO_GRE;
1401
1402 if (!data)
1403 return;
1404
1405 if (data[IFLA_GRE_LINK])
1406 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1407
1408 if (data[IFLA_GRE_IFLAGS])
1409 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1410
1411 if (data[IFLA_GRE_OFLAGS])
1412 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1413
1414 if (data[IFLA_GRE_IKEY])
1415 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1416
1417 if (data[IFLA_GRE_OKEY])
1418 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1419
1420 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1421 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1422
1423 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1424 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1425
1426 if (data[IFLA_GRE_TTL])
1427 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1428
1429 if (data[IFLA_GRE_TOS])
1430 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1431
1432 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1433 parms->iph.frag_off = htons(IP_DF);
1434}
1435
e1a80002
HX
1436static int ipgre_tap_init(struct net_device *dev)
1437{
1438 struct ip_tunnel *tunnel;
1439
1440 tunnel = netdev_priv(dev);
1441
1442 tunnel->dev = dev;
1443 strcpy(tunnel->parms.name, dev->name);
1444
1445 ipgre_tunnel_bind_dev(dev);
1446
1447 return 0;
1448}
1449
b8c26a33
SH
1450static const struct net_device_ops ipgre_tap_netdev_ops = {
1451 .ndo_init = ipgre_tap_init,
1452 .ndo_uninit = ipgre_tunnel_uninit,
1453 .ndo_start_xmit = ipgre_tunnel_xmit,
1454 .ndo_set_mac_address = eth_mac_addr,
1455 .ndo_validate_addr = eth_validate_addr,
1456 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1457};
1458
e1a80002
HX
1459static void ipgre_tap_setup(struct net_device *dev)
1460{
1461
1462 ether_setup(dev);
1463
2e9526b3 1464 dev->netdev_ops = &ipgre_tap_netdev_ops;
e1a80002 1465 dev->destructor = free_netdev;
e1a80002
HX
1466
1467 dev->iflink = 0;
1468 dev->features |= NETIF_F_NETNS_LOCAL;
1469}
1470
81adee47 1471static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
c19e654d
HX
1472 struct nlattr *data[])
1473{
1474 struct ip_tunnel *nt;
1475 struct net *net = dev_net(dev);
1476 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1477 int mtu;
1478 int err;
1479
1480 nt = netdev_priv(dev);
1481 ipgre_netlink_parms(data, &nt->parms);
1482
e1a80002 1483 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1484 return -EEXIST;
1485
e1a80002
HX
1486 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1487 random_ether_addr(dev->dev_addr);
1488
c19e654d
HX
1489 mtu = ipgre_tunnel_bind_dev(dev);
1490 if (!tb[IFLA_MTU])
1491 dev->mtu = mtu;
1492
1493 err = register_netdevice(dev);
1494 if (err)
1495 goto out;
1496
1497 dev_hold(dev);
1498 ipgre_tunnel_link(ign, nt);
1499
1500out:
1501 return err;
1502}
1503
1504static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1505 struct nlattr *data[])
1506{
1507 struct ip_tunnel *t, *nt;
1508 struct net *net = dev_net(dev);
1509 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1510 struct ip_tunnel_parm p;
1511 int mtu;
1512
1513 if (dev == ign->fb_tunnel_dev)
1514 return -EINVAL;
1515
1516 nt = netdev_priv(dev);
1517 ipgre_netlink_parms(data, &p);
1518
1519 t = ipgre_tunnel_locate(net, &p, 0);
1520
1521 if (t) {
1522 if (t->dev != dev)
1523 return -EEXIST;
1524 } else {
c19e654d
HX
1525 t = nt;
1526
2e9526b3 1527 if (dev->type != ARPHRD_ETHER) {
1507850b 1528 unsigned int nflags = 0;
c19e654d 1529
2e9526b3
HX
1530 if (ipv4_is_multicast(p.iph.daddr))
1531 nflags = IFF_BROADCAST;
1532 else if (p.iph.daddr)
1533 nflags = IFF_POINTOPOINT;
1534
1535 if ((dev->flags ^ nflags) &
1536 (IFF_POINTOPOINT | IFF_BROADCAST))
1537 return -EINVAL;
1538 }
c19e654d
HX
1539
1540 ipgre_tunnel_unlink(ign, t);
1541 t->parms.iph.saddr = p.iph.saddr;
1542 t->parms.iph.daddr = p.iph.daddr;
1543 t->parms.i_key = p.i_key;
2e9526b3
HX
1544 if (dev->type != ARPHRD_ETHER) {
1545 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1546 memcpy(dev->broadcast, &p.iph.daddr, 4);
1547 }
c19e654d
HX
1548 ipgre_tunnel_link(ign, t);
1549 netdev_state_change(dev);
1550 }
1551
1552 t->parms.o_key = p.o_key;
1553 t->parms.iph.ttl = p.iph.ttl;
1554 t->parms.iph.tos = p.iph.tos;
1555 t->parms.iph.frag_off = p.iph.frag_off;
1556
1557 if (t->parms.link != p.link) {
1558 t->parms.link = p.link;
1559 mtu = ipgre_tunnel_bind_dev(dev);
1560 if (!tb[IFLA_MTU])
1561 dev->mtu = mtu;
1562 netdev_state_change(dev);
1563 }
1564
1565 return 0;
1566}
1567
1568static size_t ipgre_get_size(const struct net_device *dev)
1569{
1570 return
1571 /* IFLA_GRE_LINK */
1572 nla_total_size(4) +
1573 /* IFLA_GRE_IFLAGS */
1574 nla_total_size(2) +
1575 /* IFLA_GRE_OFLAGS */
1576 nla_total_size(2) +
1577 /* IFLA_GRE_IKEY */
1578 nla_total_size(4) +
1579 /* IFLA_GRE_OKEY */
1580 nla_total_size(4) +
1581 /* IFLA_GRE_LOCAL */
1582 nla_total_size(4) +
1583 /* IFLA_GRE_REMOTE */
1584 nla_total_size(4) +
1585 /* IFLA_GRE_TTL */
1586 nla_total_size(1) +
1587 /* IFLA_GRE_TOS */
1588 nla_total_size(1) +
1589 /* IFLA_GRE_PMTUDISC */
1590 nla_total_size(1) +
1591 0;
1592}
1593
1594static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1595{
1596 struct ip_tunnel *t = netdev_priv(dev);
1597 struct ip_tunnel_parm *p = &t->parms;
1598
1599 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1600 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1601 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1602 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1603 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1604 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1605 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1606 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1607 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1608 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1609
1610 return 0;
1611
1612nla_put_failure:
1613 return -EMSGSIZE;
1614}
1615
1616static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1617 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1618 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1619 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1620 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1621 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1622 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1623 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1624 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1625 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1626 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1627};
1628
1629static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1630 .kind = "gre",
1631 .maxtype = IFLA_GRE_MAX,
1632 .policy = ipgre_policy,
1633 .priv_size = sizeof(struct ip_tunnel),
1634 .setup = ipgre_tunnel_setup,
1635 .validate = ipgre_tunnel_validate,
1636 .newlink = ipgre_newlink,
1637 .changelink = ipgre_changelink,
1638 .get_size = ipgre_get_size,
1639 .fill_info = ipgre_fill_info,
1640};
1641
e1a80002
HX
1642static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1643 .kind = "gretap",
1644 .maxtype = IFLA_GRE_MAX,
1645 .policy = ipgre_policy,
1646 .priv_size = sizeof(struct ip_tunnel),
1647 .setup = ipgre_tap_setup,
1648 .validate = ipgre_tap_validate,
1649 .newlink = ipgre_newlink,
1650 .changelink = ipgre_changelink,
1651 .get_size = ipgre_get_size,
1652 .fill_info = ipgre_fill_info,
1653};
1654
1da177e4
LT
1655/*
1656 * And now the modules code and kernel interface.
1657 */
1658
1659static int __init ipgre_init(void)
1660{
1661 int err;
1662
1663 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1664
cfb8fbf2 1665 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1666 if (err < 0)
c2892f02
AD
1667 return err;
1668
00959ade 1669 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02
AD
1670 if (err < 0) {
1671 printk(KERN_INFO "ipgre init: can't add protocol\n");
1672 goto add_proto_failed;
1673 }
7daa0004 1674
c19e654d
HX
1675 err = rtnl_link_register(&ipgre_link_ops);
1676 if (err < 0)
1677 goto rtnl_link_failed;
1678
e1a80002
HX
1679 err = rtnl_link_register(&ipgre_tap_ops);
1680 if (err < 0)
1681 goto tap_ops_failed;
1682
c19e654d 1683out:
1da177e4 1684 return err;
c19e654d 1685
e1a80002
HX
1686tap_ops_failed:
1687 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1688rtnl_link_failed:
00959ade 1689 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02
AD
1690add_proto_failed:
1691 unregister_pernet_device(&ipgre_net_ops);
c19e654d 1692 goto out;
1da177e4
LT
1693}
1694
db44575f 1695static void __exit ipgre_fini(void)
1da177e4 1696{
e1a80002 1697 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1698 rtnl_link_unregister(&ipgre_link_ops);
00959ade 1699 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1da177e4 1700 printk(KERN_INFO "ipgre close: can't remove protocol\n");
c2892f02 1701 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1702}
1703
1704module_init(ipgre_init);
1705module_exit(ipgre_fini);
1706MODULE_LICENSE("GPL");
4d74f8ba
PM
1707MODULE_ALIAS_RTNL_LINK("gre");
1708MODULE_ALIAS_RTNL_LINK("gretap");