]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
e1a80002 30#include <linux/etherdevice.h>
46f25dff 31#include <linux/if_ether.h>
1da177e4
LT
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
59a4c759
PE
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
c19e654d 45#include <net/rtnetlink.h>
1da177e4
LT
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
e905a9ed 69 Current solution: t->recursion lock breaks dead loops. It looks
1da177e4
LT
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
c19e654d 122static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
123static int ipgre_tunnel_init(struct net_device *dev);
124static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 125static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
126
127/* Fallback tunnel: no source, no destination, no key, no options */
128
eb8ce741
PE
129#define HASH_SIZE 16
130
59a4c759
PE
131static int ipgre_net_id;
132struct ipgre_net {
eb8ce741
PE
133 struct ip_tunnel *tunnels[4][HASH_SIZE];
134
7daa0004 135 struct net_device *fb_tunnel_dev;
59a4c759
PE
136};
137
1da177e4
LT
138/* Tunnel hash table */
139
140/*
141 4 hash tables:
142
143 3: (remote,local)
144 2: (remote,*)
145 1: (*,local)
146 0: (*,*)
147
148 We require exact key match i.e. if a key is present in packet
149 it will match only tunnel with the same key; if it is not present,
150 it will match only keyless tunnel.
151
152 All keysless packets, if not matched configured keyless tunnels
153 will match fallback tunnel.
154 */
155
d5a0a1e3 156#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 157
eb8ce741
PE
158#define tunnels_r_l tunnels[3]
159#define tunnels_r tunnels[2]
160#define tunnels_l tunnels[1]
161#define tunnels_wc tunnels[0]
1da177e4
LT
162
163static DEFINE_RWLOCK(ipgre_lock);
164
165/* Given src, dst and key, find appropriate for input tunnel. */
166
749c10f9 167static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
e1a80002
HX
168 __be32 remote, __be32 local,
169 __be32 key, __be16 gre_proto)
1da177e4 170{
749c10f9
TT
171 struct net *net = dev_net(dev);
172 int link = dev->ifindex;
1da177e4
LT
173 unsigned h0 = HASH(remote);
174 unsigned h1 = HASH(key);
749c10f9 175 struct ip_tunnel *t, *sel[4] = { NULL, NULL, NULL, NULL };
7daa0004 176 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
177 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
178 ARPHRD_ETHER : ARPHRD_IPGRE;
749c10f9 179 int idx;
1da177e4 180
eb8ce741 181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
749c10f9
TT
182 if (local != t->parms.iph.saddr ||
183 remote != t->parms.iph.daddr ||
184 key != t->parms.i_key ||
185 !(t->dev->flags & IFF_UP))
186 continue;
187
188 if (t->dev->type != ARPHRD_IPGRE &&
189 t->dev->type != dev_type)
190 continue;
191
192 idx = 0;
193 if (t->parms.link != link)
194 idx |= 1;
195 if (t->dev->type != dev_type)
196 idx |= 2;
197 if (idx == 0)
198 return t;
199 if (sel[idx] == NULL)
200 sel[idx] = t;
1da177e4 201 }
e1a80002 202
eb8ce741 203 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
749c10f9
TT
204 if (remote != t->parms.iph.daddr ||
205 key != t->parms.i_key ||
206 !(t->dev->flags & IFF_UP))
207 continue;
208
209 if (t->dev->type != ARPHRD_IPGRE &&
210 t->dev->type != dev_type)
211 continue;
212
213 idx = 0;
214 if (t->parms.link != link)
215 idx |= 1;
216 if (t->dev->type != dev_type)
217 idx |= 2;
218 if (idx == 0)
219 return t;
220 if (sel[idx] == NULL)
221 sel[idx] = t;
1da177e4 222 }
e1a80002 223
eb8ce741 224 for (t = ign->tunnels_l[h1]; t; t = t->next) {
749c10f9
TT
225 if ((local != t->parms.iph.saddr &&
226 (local != t->parms.iph.daddr ||
227 !ipv4_is_multicast(local))) ||
228 key != t->parms.i_key ||
229 !(t->dev->flags & IFF_UP))
230 continue;
231
232 if (t->dev->type != ARPHRD_IPGRE &&
233 t->dev->type != dev_type)
234 continue;
235
236 idx = 0;
237 if (t->parms.link != link)
238 idx |= 1;
239 if (t->dev->type != dev_type)
240 idx |= 2;
241 if (idx == 0)
242 return t;
243 if (sel[idx] == NULL)
244 sel[idx] = t;
1da177e4 245 }
e1a80002 246
eb8ce741 247 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
749c10f9
TT
248 if (t->parms.i_key != key ||
249 !(t->dev->flags & IFF_UP))
250 continue;
251
252 if (t->dev->type != ARPHRD_IPGRE &&
253 t->dev->type != dev_type)
254 continue;
255
256 idx = 0;
257 if (t->parms.link != link)
258 idx |= 1;
259 if (t->dev->type != dev_type)
260 idx |= 2;
261 if (idx == 0)
262 return t;
263 if (sel[idx] == NULL)
264 sel[idx] = t;
1da177e4
LT
265 }
266
749c10f9
TT
267 for (idx = 1; idx < ARRAY_SIZE(sel); idx++)
268 if (sel[idx] != NULL)
269 return sel[idx];
e1a80002 270
749c10f9 271 if (ign->fb_tunnel_dev->flags & IFF_UP)
7daa0004 272 return netdev_priv(ign->fb_tunnel_dev);
749c10f9 273
1da177e4
LT
274 return NULL;
275}
276
f57e7d5a
PE
277static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
278 struct ip_tunnel_parm *parms)
1da177e4 279{
5056a1ef
YH
280 __be32 remote = parms->iph.daddr;
281 __be32 local = parms->iph.saddr;
282 __be32 key = parms->i_key;
1da177e4
LT
283 unsigned h = HASH(key);
284 int prio = 0;
285
286 if (local)
287 prio |= 1;
f97c1e0c 288 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
289 prio |= 2;
290 h ^= HASH(remote);
291 }
292
eb8ce741 293 return &ign->tunnels[prio][h];
1da177e4
LT
294}
295
f57e7d5a
PE
296static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
297 struct ip_tunnel *t)
5056a1ef 298{
f57e7d5a 299 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
300}
301
f57e7d5a 302static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 303{
f57e7d5a 304 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4
LT
305
306 t->next = *tp;
307 write_lock_bh(&ipgre_lock);
308 *tp = t;
309 write_unlock_bh(&ipgre_lock);
310}
311
f57e7d5a 312static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
313{
314 struct ip_tunnel **tp;
315
f57e7d5a 316 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4
LT
317 if (t == *tp) {
318 write_lock_bh(&ipgre_lock);
319 *tp = t->next;
320 write_unlock_bh(&ipgre_lock);
321 break;
322 }
323 }
324}
325
e1a80002
HX
326static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
327 struct ip_tunnel_parm *parms,
328 int type)
1da177e4 329{
d5a0a1e3
AV
330 __be32 remote = parms->iph.daddr;
331 __be32 local = parms->iph.saddr;
332 __be32 key = parms->i_key;
749c10f9 333 int link = parms->link;
e1a80002
HX
334 struct ip_tunnel *t, **tp;
335 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
336
337 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
338 if (local == t->parms.iph.saddr &&
339 remote == t->parms.iph.daddr &&
340 key == t->parms.i_key &&
749c10f9 341 link == t->parms.link &&
e1a80002
HX
342 type == t->dev->type)
343 break;
344
345 return t;
346}
347
348static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
349 struct ip_tunnel_parm *parms, int create)
350{
351 struct ip_tunnel *t, *nt;
1da177e4 352 struct net_device *dev;
1da177e4 353 char name[IFNAMSIZ];
f57e7d5a 354 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 355
e1a80002
HX
356 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
357 if (t || !create)
358 return t;
1da177e4
LT
359
360 if (parms->name[0])
361 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
362 else
363 sprintf(name, "gre%%d");
1da177e4
LT
364
365 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
366 if (!dev)
367 return NULL;
368
0b67eceb
PE
369 dev_net_set(dev, net);
370
b37d428b
PE
371 if (strchr(name, '%')) {
372 if (dev_alloc_name(dev, name) < 0)
373 goto failed_free;
374 }
375
2941a486 376 nt = netdev_priv(dev);
1da177e4 377 nt->parms = *parms;
c19e654d 378 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 379
42aa9162
HX
380 dev->mtu = ipgre_tunnel_bind_dev(dev);
381
b37d428b
PE
382 if (register_netdevice(dev) < 0)
383 goto failed_free;
1da177e4 384
1da177e4 385 dev_hold(dev);
f57e7d5a 386 ipgre_tunnel_link(ign, nt);
1da177e4
LT
387 return nt;
388
b37d428b
PE
389failed_free:
390 free_netdev(dev);
1da177e4
LT
391 return NULL;
392}
393
394static void ipgre_tunnel_uninit(struct net_device *dev)
395{
f57e7d5a
PE
396 struct net *net = dev_net(dev);
397 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
398
399 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
400 dev_put(dev);
401}
402
403
404static void ipgre_err(struct sk_buff *skb, u32 info)
405{
1da177e4 406
071f92d0 407/* All the routers (except for Linux) return only
1da177e4
LT
408 8 bytes of packet payload. It means, that precise relaying of
409 ICMP in the real Internet is absolutely infeasible.
410
411 Moreover, Cisco "wise men" put GRE key to the third word
412 in GRE header. It makes impossible maintaining even soft state for keyed
413 GRE tunnels with enabled checksum. Tell them "thank you".
414
415 Well, I wonder, rfc1812 was written by Cisco employee,
416 what the hell these idiots break standrads established
417 by themself???
418 */
419
6ed2533e 420 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 421 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 422 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
423 const int type = icmp_hdr(skb)->type;
424 const int code = icmp_hdr(skb)->code;
1da177e4 425 struct ip_tunnel *t;
d5a0a1e3 426 __be16 flags;
1da177e4
LT
427
428 flags = p[0];
429 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
430 if (flags&(GRE_VERSION|GRE_ROUTING))
431 return;
432 if (flags&GRE_KEY) {
433 grehlen += 4;
434 if (flags&GRE_CSUM)
435 grehlen += 4;
436 }
437 }
438
439 /* If only 8 bytes returned, keyed message will be dropped here */
440 if (skb_headlen(skb) < grehlen)
441 return;
442
443 switch (type) {
444 default:
445 case ICMP_PARAMETERPROB:
446 return;
447
448 case ICMP_DEST_UNREACH:
449 switch (code) {
450 case ICMP_SR_FAILED:
451 case ICMP_PORT_UNREACH:
452 /* Impossible event. */
453 return;
454 case ICMP_FRAG_NEEDED:
455 /* Soft state for pmtu is maintained by IP core. */
456 return;
457 default:
458 /* All others are translated to HOST_UNREACH.
459 rfc2003 contains "deep thoughts" about NET_UNREACH,
460 I believe they are just ether pollution. --ANK
461 */
462 break;
463 }
464 break;
465 case ICMP_TIME_EXCEEDED:
466 if (code != ICMP_EXC_TTL)
467 return;
468 break;
469 }
470
471 read_lock(&ipgre_lock);
749c10f9 472 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
e1a80002
HX
473 flags & GRE_KEY ?
474 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
475 p[1]);
f97c1e0c
JP
476 if (t == NULL || t->parms.iph.daddr == 0 ||
477 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
478 goto out;
479
480 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
481 goto out;
482
483 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
484 t->err_count++;
485 else
486 t->err_count = 1;
487 t->err_time = jiffies;
488out:
489 read_unlock(&ipgre_lock);
490 return;
1da177e4
LT
491}
492
493static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
494{
495 if (INET_ECN_is_ce(iph->tos)) {
496 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 497 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 498 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 499 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
500 }
501 }
502}
503
504static inline u8
505ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
506{
507 u8 inner = 0;
508 if (skb->protocol == htons(ETH_P_IP))
509 inner = old_iph->tos;
510 else if (skb->protocol == htons(ETH_P_IPV6))
511 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
512 return INET_ECN_encapsulate(tos, inner);
513}
514
515static int ipgre_rcv(struct sk_buff *skb)
516{
517 struct iphdr *iph;
518 u8 *h;
d5a0a1e3 519 __be16 flags;
d3bc23e7 520 __sum16 csum = 0;
d5a0a1e3 521 __be32 key = 0;
1da177e4
LT
522 u32 seqno = 0;
523 struct ip_tunnel *tunnel;
524 int offset = 4;
e1a80002 525 __be16 gre_proto;
64194c31 526 unsigned int len;
1da177e4
LT
527
528 if (!pskb_may_pull(skb, 16))
529 goto drop_nolock;
530
eddc9ec5 531 iph = ip_hdr(skb);
1da177e4 532 h = skb->data;
d5a0a1e3 533 flags = *(__be16*)h;
1da177e4
LT
534
535 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
536 /* - Version must be 0.
537 - We do not support routing headers.
538 */
539 if (flags&(GRE_VERSION|GRE_ROUTING))
540 goto drop_nolock;
541
542 if (flags&GRE_CSUM) {
fb286bb2 543 switch (skb->ip_summed) {
84fa7933 544 case CHECKSUM_COMPLETE:
d3bc23e7 545 csum = csum_fold(skb->csum);
fb286bb2
HX
546 if (!csum)
547 break;
548 /* fall through */
549 case CHECKSUM_NONE:
550 skb->csum = 0;
551 csum = __skb_checksum_complete(skb);
84fa7933 552 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
553 }
554 offset += 4;
555 }
556 if (flags&GRE_KEY) {
d5a0a1e3 557 key = *(__be32*)(h + offset);
1da177e4
LT
558 offset += 4;
559 }
560 if (flags&GRE_SEQ) {
d5a0a1e3 561 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
562 offset += 4;
563 }
564 }
565
e1a80002
HX
566 gre_proto = *(__be16 *)(h + 2);
567
1da177e4 568 read_lock(&ipgre_lock);
749c10f9 569 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
e1a80002
HX
570 iph->saddr, iph->daddr, key,
571 gre_proto))) {
addd68eb
PE
572 struct net_device_stats *stats = &tunnel->dev->stats;
573
1da177e4
LT
574 secpath_reset(skb);
575
e1a80002 576 skb->protocol = gre_proto;
1da177e4
LT
577 /* WCCP version 1 and 2 protocol decoding.
578 * - Change protocol to IP
579 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
580 */
e1a80002 581 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 582 skb->protocol = htons(ETH_P_IP);
e905a9ed 583 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
584 offset += 4;
585 }
586
1d069167 587 skb->mac_header = skb->network_header;
4209fb60 588 __pskb_pull(skb, offset);
9c70220b 589 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
590 skb->pkt_type = PACKET_HOST;
591#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 592 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 593 /* Looped back packet, drop it! */
ee6b9673 594 if (skb->rtable->fl.iif == 0)
1da177e4 595 goto drop;
addd68eb 596 stats->multicast++;
1da177e4
LT
597 skb->pkt_type = PACKET_BROADCAST;
598 }
599#endif
600
601 if (((flags&GRE_CSUM) && csum) ||
602 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
603 stats->rx_crc_errors++;
604 stats->rx_errors++;
1da177e4
LT
605 goto drop;
606 }
607 if (tunnel->parms.i_flags&GRE_SEQ) {
608 if (!(flags&GRE_SEQ) ||
609 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
610 stats->rx_fifo_errors++;
611 stats->rx_errors++;
1da177e4
LT
612 goto drop;
613 }
614 tunnel->i_seqno = seqno + 1;
615 }
e1a80002 616
64194c31
HX
617 len = skb->len;
618
e1a80002
HX
619 /* Warning: All skb pointers will be invalidated! */
620 if (tunnel->dev->type == ARPHRD_ETHER) {
621 if (!pskb_may_pull(skb, ETH_HLEN)) {
622 stats->rx_length_errors++;
623 stats->rx_errors++;
624 goto drop;
625 }
626
627 iph = ip_hdr(skb);
628 skb->protocol = eth_type_trans(skb, tunnel->dev);
629 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
630 }
631
addd68eb 632 stats->rx_packets++;
64194c31 633 stats->rx_bytes += len;
1da177e4
LT
634 skb->dev = tunnel->dev;
635 dst_release(skb->dst);
636 skb->dst = NULL;
637 nf_reset(skb);
e1a80002
HX
638
639 skb_reset_network_header(skb);
1da177e4 640 ipgre_ecn_decapsulate(iph, skb);
e1a80002 641
1da177e4
LT
642 netif_rx(skb);
643 read_unlock(&ipgre_lock);
644 return(0);
645 }
45af08be 646 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
647
648drop:
649 read_unlock(&ipgre_lock);
650drop_nolock:
651 kfree_skb(skb);
652 return(0);
653}
654
655static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
656{
2941a486 657 struct ip_tunnel *tunnel = netdev_priv(dev);
addd68eb 658 struct net_device_stats *stats = &tunnel->dev->stats;
eddc9ec5 659 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
660 struct iphdr *tiph;
661 u8 tos;
d5a0a1e3 662 __be16 df;
1da177e4
LT
663 struct rtable *rt; /* Route to the other host */
664 struct net_device *tdev; /* Device to other host */
665 struct iphdr *iph; /* Our new IP header */
c2636b4d 666 unsigned int max_headroom; /* The extra header space needed */
1da177e4 667 int gre_hlen;
d5a0a1e3 668 __be32 dst;
1da177e4
LT
669 int mtu;
670
671 if (tunnel->recursion++) {
addd68eb 672 stats->collisions++;
1da177e4
LT
673 goto tx_error;
674 }
675
e1a80002
HX
676 if (dev->type == ARPHRD_ETHER)
677 IPCB(skb)->flags = 0;
678
679 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 680 gre_hlen = 0;
6ed2533e 681 tiph = (struct iphdr *)skb->data;
1da177e4
LT
682 } else {
683 gre_hlen = tunnel->hlen;
684 tiph = &tunnel->parms.iph;
685 }
686
687 if ((dst = tiph->daddr) == 0) {
688 /* NBMA tunnel */
689
690 if (skb->dst == NULL) {
addd68eb 691 stats->tx_fifo_errors++;
1da177e4
LT
692 goto tx_error;
693 }
694
695 if (skb->protocol == htons(ETH_P_IP)) {
ee6b9673 696 rt = skb->rtable;
1da177e4
LT
697 if ((dst = rt->rt_gateway) == 0)
698 goto tx_error_icmp;
699 }
700#ifdef CONFIG_IPV6
701 else if (skb->protocol == htons(ETH_P_IPV6)) {
702 struct in6_addr *addr6;
703 int addr_type;
704 struct neighbour *neigh = skb->dst->neighbour;
705
706 if (neigh == NULL)
707 goto tx_error;
708
6ed2533e 709 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
710 addr_type = ipv6_addr_type(addr6);
711
712 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 713 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
714 addr_type = ipv6_addr_type(addr6);
715 }
716
717 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
718 goto tx_error_icmp;
719
720 dst = addr6->s6_addr32[3];
721 }
722#endif
723 else
724 goto tx_error;
725 }
726
727 tos = tiph->tos;
728 if (tos&1) {
729 if (skb->protocol == htons(ETH_P_IP))
730 tos = old_iph->tos;
731 tos &= ~1;
732 }
733
734 {
735 struct flowi fl = { .oif = tunnel->parms.link,
736 .nl_u = { .ip4_u =
737 { .daddr = dst,
738 .saddr = tiph->saddr,
739 .tos = RT_TOS(tos) } },
740 .proto = IPPROTO_GRE };
96635522 741 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 742 stats->tx_carrier_errors++;
1da177e4
LT
743 goto tx_error;
744 }
745 }
746 tdev = rt->u.dst.dev;
747
748 if (tdev == dev) {
749 ip_rt_put(rt);
addd68eb 750 stats->collisions++;
1da177e4
LT
751 goto tx_error;
752 }
753
754 df = tiph->frag_off;
755 if (df)
c95b819a 756 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4
LT
757 else
758 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
759
760 if (skb->dst)
761 skb->dst->ops->update_pmtu(skb->dst, mtu);
762
763 if (skb->protocol == htons(ETH_P_IP)) {
764 df |= (old_iph->frag_off&htons(IP_DF));
765
766 if ((old_iph->frag_off&htons(IP_DF)) &&
767 mtu < ntohs(old_iph->tot_len)) {
768 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
769 ip_rt_put(rt);
770 goto tx_error;
771 }
772 }
773#ifdef CONFIG_IPV6
774 else if (skb->protocol == htons(ETH_P_IPV6)) {
6ed2533e 775 struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
1da177e4
LT
776
777 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
778 if ((tunnel->parms.iph.daddr &&
779 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
780 rt6->rt6i_dst.plen == 128) {
781 rt6->rt6i_flags |= RTF_MODIFIED;
782 skb->dst->metrics[RTAX_MTU-1] = mtu;
783 }
784 }
785
786 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
787 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
788 ip_rt_put(rt);
789 goto tx_error;
790 }
791 }
792#endif
793
794 if (tunnel->err_count > 0) {
795 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
796 tunnel->err_count--;
797
798 dst_link_failure(skb);
799 } else
800 tunnel->err_count = 0;
801 }
802
803 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
804
cfbba49d
PM
805 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
806 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
807 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
808 if (!new_skb) {
809 ip_rt_put(rt);
e905a9ed 810 stats->tx_dropped++;
1da177e4
LT
811 dev_kfree_skb(skb);
812 tunnel->recursion--;
813 return 0;
814 }
815 if (skb->sk)
816 skb_set_owner_w(new_skb, skb->sk);
817 dev_kfree_skb(skb);
818 skb = new_skb;
eddc9ec5 819 old_iph = ip_hdr(skb);
1da177e4
LT
820 }
821
64194c31 822 skb_reset_transport_header(skb);
e2d1bca7
ACM
823 skb_push(skb, gre_hlen);
824 skb_reset_network_header(skb);
1da177e4 825 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
826 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
827 IPSKB_REROUTED);
1da177e4
LT
828 dst_release(skb->dst);
829 skb->dst = &rt->u.dst;
830
831 /*
832 * Push down and install the IPIP header.
833 */
834
eddc9ec5 835 iph = ip_hdr(skb);
1da177e4
LT
836 iph->version = 4;
837 iph->ihl = sizeof(struct iphdr) >> 2;
838 iph->frag_off = df;
839 iph->protocol = IPPROTO_GRE;
840 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
841 iph->daddr = rt->rt_dst;
842 iph->saddr = rt->rt_src;
843
844 if ((iph->ttl = tiph->ttl) == 0) {
845 if (skb->protocol == htons(ETH_P_IP))
846 iph->ttl = old_iph->ttl;
847#ifdef CONFIG_IPV6
848 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 849 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
850#endif
851 else
852 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
853 }
854
e1a80002
HX
855 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
856 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
857 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
858
859 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 860 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
861
862 if (tunnel->parms.o_flags&GRE_SEQ) {
863 ++tunnel->o_seqno;
864 *ptr = htonl(tunnel->o_seqno);
865 ptr--;
866 }
867 if (tunnel->parms.o_flags&GRE_KEY) {
868 *ptr = tunnel->parms.o_key;
869 ptr--;
870 }
871 if (tunnel->parms.o_flags&GRE_CSUM) {
872 *ptr = 0;
5f92a738 873 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
874 }
875 }
876
877 nf_reset(skb);
878
879 IPTUNNEL_XMIT();
880 tunnel->recursion--;
881 return 0;
882
883tx_error_icmp:
884 dst_link_failure(skb);
885
886tx_error:
887 stats->tx_errors++;
888 dev_kfree_skb(skb);
889 tunnel->recursion--;
890 return 0;
891}
892
42aa9162 893static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
894{
895 struct net_device *tdev = NULL;
896 struct ip_tunnel *tunnel;
897 struct iphdr *iph;
898 int hlen = LL_MAX_HEADER;
899 int mtu = ETH_DATA_LEN;
900 int addend = sizeof(struct iphdr) + 4;
901
902 tunnel = netdev_priv(dev);
903 iph = &tunnel->parms.iph;
904
c95b819a 905 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
906
907 if (iph->daddr) {
908 struct flowi fl = { .oif = tunnel->parms.link,
909 .nl_u = { .ip4_u =
910 { .daddr = iph->daddr,
911 .saddr = iph->saddr,
912 .tos = RT_TOS(iph->tos) } },
913 .proto = IPPROTO_GRE };
914 struct rtable *rt;
96635522 915 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
916 tdev = rt->u.dst.dev;
917 ip_rt_put(rt);
918 }
e1a80002
HX
919
920 if (dev->type != ARPHRD_ETHER)
921 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
922 }
923
924 if (!tdev && tunnel->parms.link)
96635522 925 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
926
927 if (tdev) {
c95b819a 928 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
929 mtu = tdev->mtu;
930 }
931 dev->iflink = tunnel->parms.link;
932
933 /* Precalculate GRE options length */
934 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
935 if (tunnel->parms.o_flags&GRE_CSUM)
936 addend += 4;
937 if (tunnel->parms.o_flags&GRE_KEY)
938 addend += 4;
939 if (tunnel->parms.o_flags&GRE_SEQ)
940 addend += 4;
941 }
c95b819a 942 dev->needed_headroom = addend + hlen;
42aa9162
HX
943 mtu -= dev->hard_header_len - addend;
944
945 if (mtu < 68)
946 mtu = 68;
947
ee34c1eb
MS
948 tunnel->hlen = addend;
949
42aa9162 950 return mtu;
ee34c1eb
MS
951}
952
1da177e4
LT
953static int
954ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
955{
956 int err = 0;
957 struct ip_tunnel_parm p;
958 struct ip_tunnel *t;
f57e7d5a
PE
959 struct net *net = dev_net(dev);
960 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
961
962 switch (cmd) {
963 case SIOCGETTUNNEL:
964 t = NULL;
7daa0004 965 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
966 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
967 err = -EFAULT;
968 break;
969 }
f57e7d5a 970 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
971 }
972 if (t == NULL)
2941a486 973 t = netdev_priv(dev);
1da177e4
LT
974 memcpy(&p, &t->parms, sizeof(p));
975 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
976 err = -EFAULT;
977 break;
978
979 case SIOCADDTUNNEL:
980 case SIOCCHGTUNNEL:
981 err = -EPERM;
982 if (!capable(CAP_NET_ADMIN))
983 goto done;
984
985 err = -EFAULT;
986 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
987 goto done;
988
989 err = -EINVAL;
990 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
991 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
992 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
993 goto done;
994 if (p.iph.ttl)
995 p.iph.frag_off |= htons(IP_DF);
996
997 if (!(p.i_flags&GRE_KEY))
998 p.i_key = 0;
999 if (!(p.o_flags&GRE_KEY))
1000 p.o_key = 0;
1001
f57e7d5a 1002 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 1003
7daa0004 1004 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
1005 if (t != NULL) {
1006 if (t->dev != dev) {
1007 err = -EEXIST;
1008 break;
1009 }
1010 } else {
6ed2533e 1011 unsigned nflags = 0;
1da177e4 1012
2941a486 1013 t = netdev_priv(dev);
1da177e4 1014
f97c1e0c 1015 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
1016 nflags = IFF_BROADCAST;
1017 else if (p.iph.daddr)
1018 nflags = IFF_POINTOPOINT;
1019
1020 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1021 err = -EINVAL;
1022 break;
1023 }
f57e7d5a 1024 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
1025 t->parms.iph.saddr = p.iph.saddr;
1026 t->parms.iph.daddr = p.iph.daddr;
1027 t->parms.i_key = p.i_key;
1028 t->parms.o_key = p.o_key;
1029 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1030 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 1031 ipgre_tunnel_link(ign, t);
1da177e4
LT
1032 netdev_state_change(dev);
1033 }
1034 }
1035
1036 if (t) {
1037 err = 0;
1038 if (cmd == SIOCCHGTUNNEL) {
1039 t->parms.iph.ttl = p.iph.ttl;
1040 t->parms.iph.tos = p.iph.tos;
1041 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
1042 if (t->parms.link != p.link) {
1043 t->parms.link = p.link;
42aa9162 1044 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
1045 netdev_state_change(dev);
1046 }
1da177e4
LT
1047 }
1048 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1049 err = -EFAULT;
1050 } else
1051 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1052 break;
1053
1054 case SIOCDELTUNNEL:
1055 err = -EPERM;
1056 if (!capable(CAP_NET_ADMIN))
1057 goto done;
1058
7daa0004 1059 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1060 err = -EFAULT;
1061 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1062 goto done;
1063 err = -ENOENT;
f57e7d5a 1064 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1065 goto done;
1066 err = -EPERM;
7daa0004 1067 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1068 goto done;
1069 dev = t->dev;
1070 }
22f8cde5
SH
1071 unregister_netdevice(dev);
1072 err = 0;
1da177e4
LT
1073 break;
1074
1075 default:
1076 err = -EINVAL;
1077 }
1078
1079done:
1080 return err;
1081}
1082
1da177e4
LT
1083static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1084{
2941a486 1085 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1086 if (new_mtu < 68 ||
1087 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1088 return -EINVAL;
1089 dev->mtu = new_mtu;
1090 return 0;
1091}
1092
1da177e4
LT
1093/* Nice toy. Unfortunately, useless in real life :-)
1094 It allows to construct virtual multiprotocol broadcast "LAN"
1095 over the Internet, provided multicast routing is tuned.
1096
1097
1098 I have no idea was this bicycle invented before me,
1099 so that I had to set ARPHRD_IPGRE to a random value.
1100 I have an impression, that Cisco could make something similar,
1101 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1102
1da177e4
LT
1103 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1104 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1105
1106 ping -t 255 224.66.66.66
1107
1108 If nobody answers, mbone does not work.
1109
1110 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1111 ip addr add 10.66.66.<somewhat>/24 dev Universe
1112 ifconfig Universe up
1113 ifconfig Universe add fe80::<Your_real_addr>/10
1114 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1115 ftp 10.66.66.66
1116 ...
1117 ftp fec0:6666:6666::193.233.7.65
1118 ...
1119
1120 */
1121
3b04ddde
SH
1122static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1123 unsigned short type,
1124 const void *daddr, const void *saddr, unsigned len)
1da177e4 1125{
2941a486 1126 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1127 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1128 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1129
1130 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1131 p[0] = t->parms.o_flags;
1132 p[1] = htons(type);
1133
1134 /*
e905a9ed 1135 * Set the source hardware address.
1da177e4 1136 */
e905a9ed 1137
1da177e4
LT
1138 if (saddr)
1139 memcpy(&iph->saddr, saddr, 4);
1140
1141 if (daddr) {
1142 memcpy(&iph->daddr, daddr, 4);
1143 return t->hlen;
1144 }
f97c1e0c 1145 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1146 return t->hlen;
e905a9ed 1147
1da177e4
LT
1148 return -t->hlen;
1149}
1150
6a5f44d7
TT
1151static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1152{
6ed2533e 1153 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1154 memcpy(haddr, &iph->saddr, 4);
1155 return 4;
1156}
1157
3b04ddde
SH
1158static const struct header_ops ipgre_header_ops = {
1159 .create = ipgre_header,
6a5f44d7 1160 .parse = ipgre_header_parse,
3b04ddde
SH
1161};
1162
6a5f44d7 1163#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1164static int ipgre_open(struct net_device *dev)
1165{
2941a486 1166 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1167
f97c1e0c 1168 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1169 struct flowi fl = { .oif = t->parms.link,
1170 .nl_u = { .ip4_u =
1171 { .daddr = t->parms.iph.daddr,
1172 .saddr = t->parms.iph.saddr,
1173 .tos = RT_TOS(t->parms.iph.tos) } },
1174 .proto = IPPROTO_GRE };
1175 struct rtable *rt;
96635522 1176 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1177 return -EADDRNOTAVAIL;
1178 dev = rt->u.dst.dev;
1179 ip_rt_put(rt);
e5ed6399 1180 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1181 return -EADDRNOTAVAIL;
1182 t->mlink = dev->ifindex;
e5ed6399 1183 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1184 }
1185 return 0;
1186}
1187
1188static int ipgre_close(struct net_device *dev)
1189{
2941a486 1190 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1191
f97c1e0c 1192 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1193 struct in_device *in_dev;
c346dca1 1194 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1195 if (in_dev) {
1196 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1197 in_dev_put(in_dev);
1198 }
1199 }
1200 return 0;
1201}
1202
1203#endif
1204
b8c26a33
SH
1205static const struct net_device_ops ipgre_netdev_ops = {
1206 .ndo_init = ipgre_tunnel_init,
1207 .ndo_uninit = ipgre_tunnel_uninit,
1208#ifdef CONFIG_NET_IPGRE_BROADCAST
1209 .ndo_open = ipgre_open,
1210 .ndo_stop = ipgre_close,
1211#endif
1212 .ndo_start_xmit = ipgre_tunnel_xmit,
1213 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1214 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1215};
1216
1da177e4
LT
1217static void ipgre_tunnel_setup(struct net_device *dev)
1218{
b8c26a33 1219 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1220 dev->destructor = free_netdev;
1da177e4
LT
1221
1222 dev->type = ARPHRD_IPGRE;
c95b819a 1223 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1224 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1225 dev->flags = IFF_NOARP;
1226 dev->iflink = 0;
1227 dev->addr_len = 4;
0b67eceb 1228 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
1229}
1230
1231static int ipgre_tunnel_init(struct net_device *dev)
1232{
1da177e4
LT
1233 struct ip_tunnel *tunnel;
1234 struct iphdr *iph;
1da177e4 1235
2941a486 1236 tunnel = netdev_priv(dev);
1da177e4
LT
1237 iph = &tunnel->parms.iph;
1238
1239 tunnel->dev = dev;
1240 strcpy(tunnel->parms.name, dev->name);
1241
1242 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1243 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1244
1da177e4 1245 if (iph->daddr) {
1da177e4 1246#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1247 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1248 if (!iph->saddr)
1249 return -EINVAL;
1250 dev->flags = IFF_BROADCAST;
3b04ddde 1251 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1252 }
1253#endif
ee34c1eb 1254 } else
6a5f44d7 1255 dev->header_ops = &ipgre_header_ops;
1da177e4 1256
1da177e4
LT
1257 return 0;
1258}
1259
b8c26a33 1260static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1261{
2941a486 1262 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1263 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1264 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1265
1266 tunnel->dev = dev;
1267 strcpy(tunnel->parms.name, dev->name);
1268
1269 iph->version = 4;
1270 iph->protocol = IPPROTO_GRE;
1271 iph->ihl = 5;
1272 tunnel->hlen = sizeof(struct iphdr) + 4;
1273
1274 dev_hold(dev);
eb8ce741 1275 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1276}
1277
1278
1279static struct net_protocol ipgre_protocol = {
1280 .handler = ipgre_rcv,
1281 .err_handler = ipgre_err,
f96c148f 1282 .netns_ok = 1,
1da177e4
LT
1283};
1284
eb8ce741
PE
1285static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1286{
1287 int prio;
1288
1289 for (prio = 0; prio < 4; prio++) {
1290 int h;
1291 for (h = 0; h < HASH_SIZE; h++) {
1292 struct ip_tunnel *t;
1293 while ((t = ign->tunnels[prio][h]) != NULL)
1294 unregister_netdevice(t->dev);
1295 }
1296 }
1297}
1298
59a4c759
PE
1299static int ipgre_init_net(struct net *net)
1300{
1301 int err;
1302 struct ipgre_net *ign;
1303
1304 err = -ENOMEM;
eb8ce741 1305 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
59a4c759
PE
1306 if (ign == NULL)
1307 goto err_alloc;
1308
1309 err = net_assign_generic(net, ipgre_net_id, ign);
1310 if (err < 0)
1311 goto err_assign;
1312
7daa0004
PE
1313 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1314 ipgre_tunnel_setup);
1315 if (!ign->fb_tunnel_dev) {
1316 err = -ENOMEM;
1317 goto err_alloc_dev;
1318 }
be77e593 1319 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1320
b8c26a33 1321 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1322 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1323
1324 if ((err = register_netdev(ign->fb_tunnel_dev)))
1325 goto err_reg_dev;
1326
59a4c759
PE
1327 return 0;
1328
7daa0004
PE
1329err_reg_dev:
1330 free_netdev(ign->fb_tunnel_dev);
1331err_alloc_dev:
1332 /* nothing */
59a4c759
PE
1333err_assign:
1334 kfree(ign);
1335err_alloc:
1336 return err;
1337}
1338
1339static void ipgre_exit_net(struct net *net)
1340{
1341 struct ipgre_net *ign;
1342
1343 ign = net_generic(net, ipgre_net_id);
7daa0004 1344 rtnl_lock();
eb8ce741 1345 ipgre_destroy_tunnels(ign);
7daa0004 1346 rtnl_unlock();
59a4c759
PE
1347 kfree(ign);
1348}
1349
1350static struct pernet_operations ipgre_net_ops = {
1351 .init = ipgre_init_net,
1352 .exit = ipgre_exit_net,
1353};
1da177e4 1354
c19e654d
HX
1355static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1356{
1357 __be16 flags;
1358
1359 if (!data)
1360 return 0;
1361
1362 flags = 0;
1363 if (data[IFLA_GRE_IFLAGS])
1364 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1365 if (data[IFLA_GRE_OFLAGS])
1366 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1367 if (flags & (GRE_VERSION|GRE_ROUTING))
1368 return -EINVAL;
1369
1370 return 0;
1371}
1372
e1a80002
HX
1373static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1374{
1375 __be32 daddr;
1376
1377 if (tb[IFLA_ADDRESS]) {
1378 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1379 return -EINVAL;
1380 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1381 return -EADDRNOTAVAIL;
1382 }
1383
1384 if (!data)
1385 goto out;
1386
1387 if (data[IFLA_GRE_REMOTE]) {
1388 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1389 if (!daddr)
1390 return -EINVAL;
1391 }
1392
1393out:
1394 return ipgre_tunnel_validate(tb, data);
1395}
1396
c19e654d
HX
1397static void ipgre_netlink_parms(struct nlattr *data[],
1398 struct ip_tunnel_parm *parms)
1399{
7bb82d92 1400 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1401
1402 parms->iph.protocol = IPPROTO_GRE;
1403
1404 if (!data)
1405 return;
1406
1407 if (data[IFLA_GRE_LINK])
1408 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1409
1410 if (data[IFLA_GRE_IFLAGS])
1411 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1412
1413 if (data[IFLA_GRE_OFLAGS])
1414 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1415
1416 if (data[IFLA_GRE_IKEY])
1417 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1418
1419 if (data[IFLA_GRE_OKEY])
1420 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1421
1422 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1423 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1424
1425 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1426 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1427
1428 if (data[IFLA_GRE_TTL])
1429 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1430
1431 if (data[IFLA_GRE_TOS])
1432 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1433
1434 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1435 parms->iph.frag_off = htons(IP_DF);
1436}
1437
e1a80002
HX
1438static int ipgre_tap_init(struct net_device *dev)
1439{
1440 struct ip_tunnel *tunnel;
1441
1442 tunnel = netdev_priv(dev);
1443
1444 tunnel->dev = dev;
1445 strcpy(tunnel->parms.name, dev->name);
1446
1447 ipgre_tunnel_bind_dev(dev);
1448
1449 return 0;
1450}
1451
b8c26a33
SH
1452static const struct net_device_ops ipgre_tap_netdev_ops = {
1453 .ndo_init = ipgre_tap_init,
1454 .ndo_uninit = ipgre_tunnel_uninit,
1455 .ndo_start_xmit = ipgre_tunnel_xmit,
1456 .ndo_set_mac_address = eth_mac_addr,
1457 .ndo_validate_addr = eth_validate_addr,
1458 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1459};
1460
e1a80002
HX
1461static void ipgre_tap_setup(struct net_device *dev)
1462{
1463
1464 ether_setup(dev);
1465
b8c26a33 1466 dev->netdev_ops = &ipgre_netdev_ops;
e1a80002 1467 dev->destructor = free_netdev;
e1a80002
HX
1468
1469 dev->iflink = 0;
1470 dev->features |= NETIF_F_NETNS_LOCAL;
1471}
1472
c19e654d
HX
1473static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1474 struct nlattr *data[])
1475{
1476 struct ip_tunnel *nt;
1477 struct net *net = dev_net(dev);
1478 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1479 int mtu;
1480 int err;
1481
1482 nt = netdev_priv(dev);
1483 ipgre_netlink_parms(data, &nt->parms);
1484
e1a80002 1485 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1486 return -EEXIST;
1487
e1a80002
HX
1488 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1489 random_ether_addr(dev->dev_addr);
1490
c19e654d
HX
1491 mtu = ipgre_tunnel_bind_dev(dev);
1492 if (!tb[IFLA_MTU])
1493 dev->mtu = mtu;
1494
1495 err = register_netdevice(dev);
1496 if (err)
1497 goto out;
1498
1499 dev_hold(dev);
1500 ipgre_tunnel_link(ign, nt);
1501
1502out:
1503 return err;
1504}
1505
1506static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1507 struct nlattr *data[])
1508{
1509 struct ip_tunnel *t, *nt;
1510 struct net *net = dev_net(dev);
1511 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1512 struct ip_tunnel_parm p;
1513 int mtu;
1514
1515 if (dev == ign->fb_tunnel_dev)
1516 return -EINVAL;
1517
1518 nt = netdev_priv(dev);
1519 ipgre_netlink_parms(data, &p);
1520
1521 t = ipgre_tunnel_locate(net, &p, 0);
1522
1523 if (t) {
1524 if (t->dev != dev)
1525 return -EEXIST;
1526 } else {
1527 unsigned nflags = 0;
1528
1529 t = nt;
1530
1531 if (ipv4_is_multicast(p.iph.daddr))
1532 nflags = IFF_BROADCAST;
1533 else if (p.iph.daddr)
1534 nflags = IFF_POINTOPOINT;
1535
1536 if ((dev->flags ^ nflags) &
1537 (IFF_POINTOPOINT | IFF_BROADCAST))
1538 return -EINVAL;
1539
1540 ipgre_tunnel_unlink(ign, t);
1541 t->parms.iph.saddr = p.iph.saddr;
1542 t->parms.iph.daddr = p.iph.daddr;
1543 t->parms.i_key = p.i_key;
1544 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1545 memcpy(dev->broadcast, &p.iph.daddr, 4);
1546 ipgre_tunnel_link(ign, t);
1547 netdev_state_change(dev);
1548 }
1549
1550 t->parms.o_key = p.o_key;
1551 t->parms.iph.ttl = p.iph.ttl;
1552 t->parms.iph.tos = p.iph.tos;
1553 t->parms.iph.frag_off = p.iph.frag_off;
1554
1555 if (t->parms.link != p.link) {
1556 t->parms.link = p.link;
1557 mtu = ipgre_tunnel_bind_dev(dev);
1558 if (!tb[IFLA_MTU])
1559 dev->mtu = mtu;
1560 netdev_state_change(dev);
1561 }
1562
1563 return 0;
1564}
1565
1566static size_t ipgre_get_size(const struct net_device *dev)
1567{
1568 return
1569 /* IFLA_GRE_LINK */
1570 nla_total_size(4) +
1571 /* IFLA_GRE_IFLAGS */
1572 nla_total_size(2) +
1573 /* IFLA_GRE_OFLAGS */
1574 nla_total_size(2) +
1575 /* IFLA_GRE_IKEY */
1576 nla_total_size(4) +
1577 /* IFLA_GRE_OKEY */
1578 nla_total_size(4) +
1579 /* IFLA_GRE_LOCAL */
1580 nla_total_size(4) +
1581 /* IFLA_GRE_REMOTE */
1582 nla_total_size(4) +
1583 /* IFLA_GRE_TTL */
1584 nla_total_size(1) +
1585 /* IFLA_GRE_TOS */
1586 nla_total_size(1) +
1587 /* IFLA_GRE_PMTUDISC */
1588 nla_total_size(1) +
1589 0;
1590}
1591
1592static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1593{
1594 struct ip_tunnel *t = netdev_priv(dev);
1595 struct ip_tunnel_parm *p = &t->parms;
1596
1597 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1598 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1599 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1600 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1601 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1602 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1603 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1604 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1605 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1606 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1607
1608 return 0;
1609
1610nla_put_failure:
1611 return -EMSGSIZE;
1612}
1613
1614static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1615 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1616 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1617 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1618 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1619 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1620 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1621 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1622 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1623 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1624 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1625};
1626
1627static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1628 .kind = "gre",
1629 .maxtype = IFLA_GRE_MAX,
1630 .policy = ipgre_policy,
1631 .priv_size = sizeof(struct ip_tunnel),
1632 .setup = ipgre_tunnel_setup,
1633 .validate = ipgre_tunnel_validate,
1634 .newlink = ipgre_newlink,
1635 .changelink = ipgre_changelink,
1636 .get_size = ipgre_get_size,
1637 .fill_info = ipgre_fill_info,
1638};
1639
e1a80002
HX
1640static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1641 .kind = "gretap",
1642 .maxtype = IFLA_GRE_MAX,
1643 .policy = ipgre_policy,
1644 .priv_size = sizeof(struct ip_tunnel),
1645 .setup = ipgre_tap_setup,
1646 .validate = ipgre_tap_validate,
1647 .newlink = ipgre_newlink,
1648 .changelink = ipgre_changelink,
1649 .get_size = ipgre_get_size,
1650 .fill_info = ipgre_fill_info,
1651};
1652
1da177e4
LT
1653/*
1654 * And now the modules code and kernel interface.
1655 */
1656
1657static int __init ipgre_init(void)
1658{
1659 int err;
1660
1661 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1662
1663 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1664 printk(KERN_INFO "ipgre init: can't add protocol\n");
1665 return -EAGAIN;
1666 }
1667
59a4c759
PE
1668 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1669 if (err < 0)
c19e654d 1670 goto gen_device_failed;
7daa0004 1671
c19e654d
HX
1672 err = rtnl_link_register(&ipgre_link_ops);
1673 if (err < 0)
1674 goto rtnl_link_failed;
1675
e1a80002
HX
1676 err = rtnl_link_register(&ipgre_tap_ops);
1677 if (err < 0)
1678 goto tap_ops_failed;
1679
c19e654d 1680out:
1da177e4 1681 return err;
c19e654d 1682
e1a80002
HX
1683tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops);
c19e654d
HX
1685rtnl_link_failed:
1686 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1687gen_device_failed:
1688 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1689 goto out;
1da177e4
LT
1690}
1691
db44575f 1692static void __exit ipgre_fini(void)
1da177e4 1693{
e1a80002 1694 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d
HX
1695 rtnl_link_unregister(&ipgre_link_ops);
1696 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1697 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1698 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1da177e4
LT
1699}
1700
1701module_init(ipgre_init);
1702module_exit(ipgre_fini);
1703MODULE_LICENSE("GPL");
4d74f8ba
PM
1704MODULE_ALIAS_RTNL_LINK("gre");
1705MODULE_ALIAS_RTNL_LINK("gretap");