]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
gre: Add Transparent Ethernet Bridging
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
e1a80002 30#include <linux/etherdevice.h>
46f25dff 31#include <linux/if_ether.h>
1da177e4
LT
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
59a4c759
PE
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
c19e654d 45#include <net/rtnetlink.h>
1da177e4
LT
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
e905a9ed 69 Current solution: t->recursion lock breaks dead loops. It looks
1da177e4
LT
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
c19e654d 122static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
123static int ipgre_tunnel_init(struct net_device *dev);
124static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 125static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
126
127/* Fallback tunnel: no source, no destination, no key, no options */
128
129static int ipgre_fb_tunnel_init(struct net_device *dev);
130
eb8ce741
PE
131#define HASH_SIZE 16
132
59a4c759
PE
133static int ipgre_net_id;
134struct ipgre_net {
eb8ce741
PE
135 struct ip_tunnel *tunnels[4][HASH_SIZE];
136
7daa0004 137 struct net_device *fb_tunnel_dev;
59a4c759
PE
138};
139
1da177e4
LT
140/* Tunnel hash table */
141
142/*
143 4 hash tables:
144
145 3: (remote,local)
146 2: (remote,*)
147 1: (*,local)
148 0: (*,*)
149
150 We require exact key match i.e. if a key is present in packet
151 it will match only tunnel with the same key; if it is not present,
152 it will match only keyless tunnel.
153
154 All keysless packets, if not matched configured keyless tunnels
155 will match fallback tunnel.
156 */
157
d5a0a1e3 158#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 159
eb8ce741
PE
160#define tunnels_r_l tunnels[3]
161#define tunnels_r tunnels[2]
162#define tunnels_l tunnels[1]
163#define tunnels_wc tunnels[0]
1da177e4
LT
164
165static DEFINE_RWLOCK(ipgre_lock);
166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
f57e7d5a 169static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
e1a80002
HX
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
1da177e4
LT
172{
173 unsigned h0 = HASH(remote);
174 unsigned h1 = HASH(key);
175 struct ip_tunnel *t;
e1a80002 176 struct ip_tunnel *t2 = NULL;
7daa0004 177 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
178 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
179 ARPHRD_ETHER : ARPHRD_IPGRE;
1da177e4 180
eb8ce741 181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
1da177e4 182 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
e1a80002
HX
183 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
184 if (t->dev->type == dev_type)
185 return t;
186 if (t->dev->type == ARPHRD_IPGRE && !t2)
187 t2 = t;
188 }
1da177e4
LT
189 }
190 }
e1a80002 191
eb8ce741 192 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
1da177e4 193 if (remote == t->parms.iph.daddr) {
e1a80002
HX
194 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
195 if (t->dev->type == dev_type)
196 return t;
197 if (t->dev->type == ARPHRD_IPGRE && !t2)
198 t2 = t;
199 }
1da177e4
LT
200 }
201 }
e1a80002 202
eb8ce741 203 for (t = ign->tunnels_l[h1]; t; t = t->next) {
1da177e4 204 if (local == t->parms.iph.saddr ||
f97c1e0c
JP
205 (local == t->parms.iph.daddr &&
206 ipv4_is_multicast(local))) {
e1a80002
HX
207 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
208 if (t->dev->type == dev_type)
209 return t;
210 if (t->dev->type == ARPHRD_IPGRE && !t2)
211 t2 = t;
212 }
1da177e4
LT
213 }
214 }
e1a80002 215
eb8ce741 216 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
e1a80002
HX
217 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
218 if (t->dev->type == dev_type)
219 return t;
220 if (t->dev->type == ARPHRD_IPGRE && !t2)
221 t2 = t;
222 }
1da177e4
LT
223 }
224
e1a80002
HX
225 if (t2)
226 return t2;
227
7daa0004
PE
228 if (ign->fb_tunnel_dev->flags&IFF_UP)
229 return netdev_priv(ign->fb_tunnel_dev);
1da177e4
LT
230 return NULL;
231}
232
f57e7d5a
PE
233static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
234 struct ip_tunnel_parm *parms)
1da177e4 235{
5056a1ef
YH
236 __be32 remote = parms->iph.daddr;
237 __be32 local = parms->iph.saddr;
238 __be32 key = parms->i_key;
1da177e4
LT
239 unsigned h = HASH(key);
240 int prio = 0;
241
242 if (local)
243 prio |= 1;
f97c1e0c 244 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
245 prio |= 2;
246 h ^= HASH(remote);
247 }
248
eb8ce741 249 return &ign->tunnels[prio][h];
1da177e4
LT
250}
251
f57e7d5a
PE
252static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
253 struct ip_tunnel *t)
5056a1ef 254{
f57e7d5a 255 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
256}
257
f57e7d5a 258static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 259{
f57e7d5a 260 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4
LT
261
262 t->next = *tp;
263 write_lock_bh(&ipgre_lock);
264 *tp = t;
265 write_unlock_bh(&ipgre_lock);
266}
267
f57e7d5a 268static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
269{
270 struct ip_tunnel **tp;
271
f57e7d5a 272 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4
LT
273 if (t == *tp) {
274 write_lock_bh(&ipgre_lock);
275 *tp = t->next;
276 write_unlock_bh(&ipgre_lock);
277 break;
278 }
279 }
280}
281
e1a80002
HX
282static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
283 struct ip_tunnel_parm *parms,
284 int type)
1da177e4 285{
d5a0a1e3
AV
286 __be32 remote = parms->iph.daddr;
287 __be32 local = parms->iph.saddr;
288 __be32 key = parms->i_key;
e1a80002
HX
289 struct ip_tunnel *t, **tp;
290 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
291
292 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
293 if (local == t->parms.iph.saddr &&
294 remote == t->parms.iph.daddr &&
295 key == t->parms.i_key &&
296 type == t->dev->type)
297 break;
298
299 return t;
300}
301
302static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
303 struct ip_tunnel_parm *parms, int create)
304{
305 struct ip_tunnel *t, *nt;
1da177e4 306 struct net_device *dev;
1da177e4 307 char name[IFNAMSIZ];
f57e7d5a 308 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 309
e1a80002
HX
310 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
311 if (t || !create)
312 return t;
1da177e4
LT
313
314 if (parms->name[0])
315 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
316 else
317 sprintf(name, "gre%%d");
1da177e4
LT
318
319 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
320 if (!dev)
321 return NULL;
322
0b67eceb
PE
323 dev_net_set(dev, net);
324
b37d428b
PE
325 if (strchr(name, '%')) {
326 if (dev_alloc_name(dev, name) < 0)
327 goto failed_free;
328 }
329
2941a486 330 nt = netdev_priv(dev);
1da177e4 331 nt->parms = *parms;
c19e654d 332 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 333
42aa9162
HX
334 dev->mtu = ipgre_tunnel_bind_dev(dev);
335
b37d428b
PE
336 if (register_netdevice(dev) < 0)
337 goto failed_free;
1da177e4 338
1da177e4 339 dev_hold(dev);
f57e7d5a 340 ipgre_tunnel_link(ign, nt);
1da177e4
LT
341 return nt;
342
b37d428b
PE
343failed_free:
344 free_netdev(dev);
1da177e4
LT
345 return NULL;
346}
347
348static void ipgre_tunnel_uninit(struct net_device *dev)
349{
f57e7d5a
PE
350 struct net *net = dev_net(dev);
351 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
352
353 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
354 dev_put(dev);
355}
356
357
358static void ipgre_err(struct sk_buff *skb, u32 info)
359{
1da177e4 360
071f92d0 361/* All the routers (except for Linux) return only
1da177e4
LT
362 8 bytes of packet payload. It means, that precise relaying of
363 ICMP in the real Internet is absolutely infeasible.
364
365 Moreover, Cisco "wise men" put GRE key to the third word
366 in GRE header. It makes impossible maintaining even soft state for keyed
367 GRE tunnels with enabled checksum. Tell them "thank you".
368
369 Well, I wonder, rfc1812 was written by Cisco employee,
370 what the hell these idiots break standrads established
371 by themself???
372 */
373
374 struct iphdr *iph = (struct iphdr*)skb->data;
d5a0a1e3 375 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 376 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
377 const int type = icmp_hdr(skb)->type;
378 const int code = icmp_hdr(skb)->code;
1da177e4 379 struct ip_tunnel *t;
d5a0a1e3 380 __be16 flags;
1da177e4
LT
381
382 flags = p[0];
383 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
384 if (flags&(GRE_VERSION|GRE_ROUTING))
385 return;
386 if (flags&GRE_KEY) {
387 grehlen += 4;
388 if (flags&GRE_CSUM)
389 grehlen += 4;
390 }
391 }
392
393 /* If only 8 bytes returned, keyed message will be dropped here */
394 if (skb_headlen(skb) < grehlen)
395 return;
396
397 switch (type) {
398 default:
399 case ICMP_PARAMETERPROB:
400 return;
401
402 case ICMP_DEST_UNREACH:
403 switch (code) {
404 case ICMP_SR_FAILED:
405 case ICMP_PORT_UNREACH:
406 /* Impossible event. */
407 return;
408 case ICMP_FRAG_NEEDED:
409 /* Soft state for pmtu is maintained by IP core. */
410 return;
411 default:
412 /* All others are translated to HOST_UNREACH.
413 rfc2003 contains "deep thoughts" about NET_UNREACH,
414 I believe they are just ether pollution. --ANK
415 */
416 break;
417 }
418 break;
419 case ICMP_TIME_EXCEEDED:
420 if (code != ICMP_EXC_TTL)
421 return;
422 break;
423 }
424
425 read_lock(&ipgre_lock);
3b4667f3 426 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
e1a80002
HX
427 flags & GRE_KEY ?
428 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
429 p[1]);
f97c1e0c
JP
430 if (t == NULL || t->parms.iph.daddr == 0 ||
431 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
432 goto out;
433
434 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
435 goto out;
436
437 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
438 t->err_count++;
439 else
440 t->err_count = 1;
441 t->err_time = jiffies;
442out:
443 read_unlock(&ipgre_lock);
444 return;
1da177e4
LT
445}
446
447static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
448{
449 if (INET_ECN_is_ce(iph->tos)) {
450 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 451 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 452 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 453 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
454 }
455 }
456}
457
458static inline u8
459ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
460{
461 u8 inner = 0;
462 if (skb->protocol == htons(ETH_P_IP))
463 inner = old_iph->tos;
464 else if (skb->protocol == htons(ETH_P_IPV6))
465 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
466 return INET_ECN_encapsulate(tos, inner);
467}
468
469static int ipgre_rcv(struct sk_buff *skb)
470{
471 struct iphdr *iph;
472 u8 *h;
d5a0a1e3 473 __be16 flags;
d3bc23e7 474 __sum16 csum = 0;
d5a0a1e3 475 __be32 key = 0;
1da177e4
LT
476 u32 seqno = 0;
477 struct ip_tunnel *tunnel;
478 int offset = 4;
e1a80002 479 __be16 gre_proto;
1da177e4
LT
480
481 if (!pskb_may_pull(skb, 16))
482 goto drop_nolock;
483
eddc9ec5 484 iph = ip_hdr(skb);
1da177e4 485 h = skb->data;
d5a0a1e3 486 flags = *(__be16*)h;
1da177e4
LT
487
488 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
489 /* - Version must be 0.
490 - We do not support routing headers.
491 */
492 if (flags&(GRE_VERSION|GRE_ROUTING))
493 goto drop_nolock;
494
495 if (flags&GRE_CSUM) {
fb286bb2 496 switch (skb->ip_summed) {
84fa7933 497 case CHECKSUM_COMPLETE:
d3bc23e7 498 csum = csum_fold(skb->csum);
fb286bb2
HX
499 if (!csum)
500 break;
501 /* fall through */
502 case CHECKSUM_NONE:
503 skb->csum = 0;
504 csum = __skb_checksum_complete(skb);
84fa7933 505 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
506 }
507 offset += 4;
508 }
509 if (flags&GRE_KEY) {
d5a0a1e3 510 key = *(__be32*)(h + offset);
1da177e4
LT
511 offset += 4;
512 }
513 if (flags&GRE_SEQ) {
d5a0a1e3 514 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
515 offset += 4;
516 }
517 }
518
e1a80002
HX
519 gre_proto = *(__be16 *)(h + 2);
520
1da177e4 521 read_lock(&ipgre_lock);
3b4667f3 522 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
e1a80002
HX
523 iph->saddr, iph->daddr, key,
524 gre_proto))) {
addd68eb
PE
525 struct net_device_stats *stats = &tunnel->dev->stats;
526
1da177e4
LT
527 secpath_reset(skb);
528
e1a80002 529 skb->protocol = gre_proto;
1da177e4
LT
530 /* WCCP version 1 and 2 protocol decoding.
531 * - Change protocol to IP
532 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
533 */
e1a80002 534 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 535 skb->protocol = htons(ETH_P_IP);
e905a9ed 536 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
537 offset += 4;
538 }
539
1d069167 540 skb->mac_header = skb->network_header;
4209fb60 541 __pskb_pull(skb, offset);
9c70220b 542 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
543 skb->pkt_type = PACKET_HOST;
544#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 545 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 546 /* Looped back packet, drop it! */
ee6b9673 547 if (skb->rtable->fl.iif == 0)
1da177e4 548 goto drop;
addd68eb 549 stats->multicast++;
1da177e4
LT
550 skb->pkt_type = PACKET_BROADCAST;
551 }
552#endif
553
554 if (((flags&GRE_CSUM) && csum) ||
555 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
556 stats->rx_crc_errors++;
557 stats->rx_errors++;
1da177e4
LT
558 goto drop;
559 }
560 if (tunnel->parms.i_flags&GRE_SEQ) {
561 if (!(flags&GRE_SEQ) ||
562 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
563 stats->rx_fifo_errors++;
564 stats->rx_errors++;
1da177e4
LT
565 goto drop;
566 }
567 tunnel->i_seqno = seqno + 1;
568 }
e1a80002
HX
569
570 /* Warning: All skb pointers will be invalidated! */
571 if (tunnel->dev->type == ARPHRD_ETHER) {
572 if (!pskb_may_pull(skb, ETH_HLEN)) {
573 stats->rx_length_errors++;
574 stats->rx_errors++;
575 goto drop;
576 }
577
578 iph = ip_hdr(skb);
579 skb->protocol = eth_type_trans(skb, tunnel->dev);
580 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
581 }
582
addd68eb
PE
583 stats->rx_packets++;
584 stats->rx_bytes += skb->len;
1da177e4
LT
585 skb->dev = tunnel->dev;
586 dst_release(skb->dst);
587 skb->dst = NULL;
588 nf_reset(skb);
e1a80002
HX
589
590 skb_reset_network_header(skb);
1da177e4 591 ipgre_ecn_decapsulate(iph, skb);
e1a80002 592
1da177e4
LT
593 netif_rx(skb);
594 read_unlock(&ipgre_lock);
595 return(0);
596 }
45af08be 597 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
598
599drop:
600 read_unlock(&ipgre_lock);
601drop_nolock:
602 kfree_skb(skb);
603 return(0);
604}
605
606static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
607{
2941a486 608 struct ip_tunnel *tunnel = netdev_priv(dev);
addd68eb 609 struct net_device_stats *stats = &tunnel->dev->stats;
eddc9ec5 610 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
611 struct iphdr *tiph;
612 u8 tos;
d5a0a1e3 613 __be16 df;
1da177e4
LT
614 struct rtable *rt; /* Route to the other host */
615 struct net_device *tdev; /* Device to other host */
616 struct iphdr *iph; /* Our new IP header */
c2636b4d 617 unsigned int max_headroom; /* The extra header space needed */
1da177e4 618 int gre_hlen;
d5a0a1e3 619 __be32 dst;
1da177e4
LT
620 int mtu;
621
622 if (tunnel->recursion++) {
addd68eb 623 stats->collisions++;
1da177e4
LT
624 goto tx_error;
625 }
626
e1a80002
HX
627 if (dev->type == ARPHRD_ETHER)
628 IPCB(skb)->flags = 0;
629
630 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4
LT
631 gre_hlen = 0;
632 tiph = (struct iphdr*)skb->data;
633 } else {
634 gre_hlen = tunnel->hlen;
635 tiph = &tunnel->parms.iph;
636 }
637
638 if ((dst = tiph->daddr) == 0) {
639 /* NBMA tunnel */
640
641 if (skb->dst == NULL) {
addd68eb 642 stats->tx_fifo_errors++;
1da177e4
LT
643 goto tx_error;
644 }
645
646 if (skb->protocol == htons(ETH_P_IP)) {
ee6b9673 647 rt = skb->rtable;
1da177e4
LT
648 if ((dst = rt->rt_gateway) == 0)
649 goto tx_error_icmp;
650 }
651#ifdef CONFIG_IPV6
652 else if (skb->protocol == htons(ETH_P_IPV6)) {
653 struct in6_addr *addr6;
654 int addr_type;
655 struct neighbour *neigh = skb->dst->neighbour;
656
657 if (neigh == NULL)
658 goto tx_error;
659
660 addr6 = (struct in6_addr*)&neigh->primary_key;
661 addr_type = ipv6_addr_type(addr6);
662
663 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 664 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
665 addr_type = ipv6_addr_type(addr6);
666 }
667
668 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
669 goto tx_error_icmp;
670
671 dst = addr6->s6_addr32[3];
672 }
673#endif
674 else
675 goto tx_error;
676 }
677
678 tos = tiph->tos;
679 if (tos&1) {
680 if (skb->protocol == htons(ETH_P_IP))
681 tos = old_iph->tos;
682 tos &= ~1;
683 }
684
685 {
686 struct flowi fl = { .oif = tunnel->parms.link,
687 .nl_u = { .ip4_u =
688 { .daddr = dst,
689 .saddr = tiph->saddr,
690 .tos = RT_TOS(tos) } },
691 .proto = IPPROTO_GRE };
96635522 692 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 693 stats->tx_carrier_errors++;
1da177e4
LT
694 goto tx_error;
695 }
696 }
697 tdev = rt->u.dst.dev;
698
699 if (tdev == dev) {
700 ip_rt_put(rt);
addd68eb 701 stats->collisions++;
1da177e4
LT
702 goto tx_error;
703 }
704
705 df = tiph->frag_off;
706 if (df)
c95b819a 707 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4
LT
708 else
709 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
710
711 if (skb->dst)
712 skb->dst->ops->update_pmtu(skb->dst, mtu);
713
714 if (skb->protocol == htons(ETH_P_IP)) {
715 df |= (old_iph->frag_off&htons(IP_DF));
716
717 if ((old_iph->frag_off&htons(IP_DF)) &&
718 mtu < ntohs(old_iph->tot_len)) {
719 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
720 ip_rt_put(rt);
721 goto tx_error;
722 }
723 }
724#ifdef CONFIG_IPV6
725 else if (skb->protocol == htons(ETH_P_IPV6)) {
726 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
727
728 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
729 if ((tunnel->parms.iph.daddr &&
730 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
731 rt6->rt6i_dst.plen == 128) {
732 rt6->rt6i_flags |= RTF_MODIFIED;
733 skb->dst->metrics[RTAX_MTU-1] = mtu;
734 }
735 }
736
737 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
738 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
739 ip_rt_put(rt);
740 goto tx_error;
741 }
742 }
743#endif
744
745 if (tunnel->err_count > 0) {
746 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
747 tunnel->err_count--;
748
749 dst_link_failure(skb);
750 } else
751 tunnel->err_count = 0;
752 }
753
754 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
755
cfbba49d
PM
756 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
757 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
758 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
759 if (!new_skb) {
760 ip_rt_put(rt);
e905a9ed 761 stats->tx_dropped++;
1da177e4
LT
762 dev_kfree_skb(skb);
763 tunnel->recursion--;
764 return 0;
765 }
766 if (skb->sk)
767 skb_set_owner_w(new_skb, skb->sk);
768 dev_kfree_skb(skb);
769 skb = new_skb;
eddc9ec5 770 old_iph = ip_hdr(skb);
1da177e4
LT
771 }
772
b0e380b1 773 skb->transport_header = skb->network_header;
e2d1bca7
ACM
774 skb_push(skb, gre_hlen);
775 skb_reset_network_header(skb);
1da177e4 776 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
777 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
778 IPSKB_REROUTED);
1da177e4
LT
779 dst_release(skb->dst);
780 skb->dst = &rt->u.dst;
781
782 /*
783 * Push down and install the IPIP header.
784 */
785
eddc9ec5 786 iph = ip_hdr(skb);
1da177e4
LT
787 iph->version = 4;
788 iph->ihl = sizeof(struct iphdr) >> 2;
789 iph->frag_off = df;
790 iph->protocol = IPPROTO_GRE;
791 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
792 iph->daddr = rt->rt_dst;
793 iph->saddr = rt->rt_src;
794
795 if ((iph->ttl = tiph->ttl) == 0) {
796 if (skb->protocol == htons(ETH_P_IP))
797 iph->ttl = old_iph->ttl;
798#ifdef CONFIG_IPV6
799 else if (skb->protocol == htons(ETH_P_IPV6))
800 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
801#endif
802 else
803 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
804 }
805
e1a80002
HX
806 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
807 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
808 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
809
810 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 811 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
812
813 if (tunnel->parms.o_flags&GRE_SEQ) {
814 ++tunnel->o_seqno;
815 *ptr = htonl(tunnel->o_seqno);
816 ptr--;
817 }
818 if (tunnel->parms.o_flags&GRE_KEY) {
819 *ptr = tunnel->parms.o_key;
820 ptr--;
821 }
822 if (tunnel->parms.o_flags&GRE_CSUM) {
823 *ptr = 0;
5f92a738 824 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
825 }
826 }
827
828 nf_reset(skb);
829
830 IPTUNNEL_XMIT();
831 tunnel->recursion--;
832 return 0;
833
834tx_error_icmp:
835 dst_link_failure(skb);
836
837tx_error:
838 stats->tx_errors++;
839 dev_kfree_skb(skb);
840 tunnel->recursion--;
841 return 0;
842}
843
42aa9162 844static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
845{
846 struct net_device *tdev = NULL;
847 struct ip_tunnel *tunnel;
848 struct iphdr *iph;
849 int hlen = LL_MAX_HEADER;
850 int mtu = ETH_DATA_LEN;
851 int addend = sizeof(struct iphdr) + 4;
852
853 tunnel = netdev_priv(dev);
854 iph = &tunnel->parms.iph;
855
c95b819a 856 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
857
858 if (iph->daddr) {
859 struct flowi fl = { .oif = tunnel->parms.link,
860 .nl_u = { .ip4_u =
861 { .daddr = iph->daddr,
862 .saddr = iph->saddr,
863 .tos = RT_TOS(iph->tos) } },
864 .proto = IPPROTO_GRE };
865 struct rtable *rt;
96635522 866 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
867 tdev = rt->u.dst.dev;
868 ip_rt_put(rt);
869 }
e1a80002
HX
870
871 if (dev->type != ARPHRD_ETHER)
872 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
873 }
874
875 if (!tdev && tunnel->parms.link)
96635522 876 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
877
878 if (tdev) {
c95b819a 879 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
880 mtu = tdev->mtu;
881 }
882 dev->iflink = tunnel->parms.link;
883
884 /* Precalculate GRE options length */
885 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
886 if (tunnel->parms.o_flags&GRE_CSUM)
887 addend += 4;
888 if (tunnel->parms.o_flags&GRE_KEY)
889 addend += 4;
890 if (tunnel->parms.o_flags&GRE_SEQ)
891 addend += 4;
892 }
c95b819a 893 dev->needed_headroom = addend + hlen;
42aa9162
HX
894 mtu -= dev->hard_header_len - addend;
895
896 if (mtu < 68)
897 mtu = 68;
898
ee34c1eb
MS
899 tunnel->hlen = addend;
900
42aa9162 901 return mtu;
ee34c1eb
MS
902}
903
1da177e4
LT
904static int
905ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
906{
907 int err = 0;
908 struct ip_tunnel_parm p;
909 struct ip_tunnel *t;
f57e7d5a
PE
910 struct net *net = dev_net(dev);
911 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
912
913 switch (cmd) {
914 case SIOCGETTUNNEL:
915 t = NULL;
7daa0004 916 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
917 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
918 err = -EFAULT;
919 break;
920 }
f57e7d5a 921 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
922 }
923 if (t == NULL)
2941a486 924 t = netdev_priv(dev);
1da177e4
LT
925 memcpy(&p, &t->parms, sizeof(p));
926 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
927 err = -EFAULT;
928 break;
929
930 case SIOCADDTUNNEL:
931 case SIOCCHGTUNNEL:
932 err = -EPERM;
933 if (!capable(CAP_NET_ADMIN))
934 goto done;
935
936 err = -EFAULT;
937 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
938 goto done;
939
940 err = -EINVAL;
941 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
942 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
943 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
944 goto done;
945 if (p.iph.ttl)
946 p.iph.frag_off |= htons(IP_DF);
947
948 if (!(p.i_flags&GRE_KEY))
949 p.i_key = 0;
950 if (!(p.o_flags&GRE_KEY))
951 p.o_key = 0;
952
f57e7d5a 953 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 954
7daa0004 955 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
956 if (t != NULL) {
957 if (t->dev != dev) {
958 err = -EEXIST;
959 break;
960 }
961 } else {
962 unsigned nflags=0;
963
2941a486 964 t = netdev_priv(dev);
1da177e4 965
f97c1e0c 966 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
967 nflags = IFF_BROADCAST;
968 else if (p.iph.daddr)
969 nflags = IFF_POINTOPOINT;
970
971 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
972 err = -EINVAL;
973 break;
974 }
f57e7d5a 975 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
976 t->parms.iph.saddr = p.iph.saddr;
977 t->parms.iph.daddr = p.iph.daddr;
978 t->parms.i_key = p.i_key;
979 t->parms.o_key = p.o_key;
980 memcpy(dev->dev_addr, &p.iph.saddr, 4);
981 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 982 ipgre_tunnel_link(ign, t);
1da177e4
LT
983 netdev_state_change(dev);
984 }
985 }
986
987 if (t) {
988 err = 0;
989 if (cmd == SIOCCHGTUNNEL) {
990 t->parms.iph.ttl = p.iph.ttl;
991 t->parms.iph.tos = p.iph.tos;
992 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
993 if (t->parms.link != p.link) {
994 t->parms.link = p.link;
42aa9162 995 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
996 netdev_state_change(dev);
997 }
1da177e4
LT
998 }
999 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1000 err = -EFAULT;
1001 } else
1002 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1003 break;
1004
1005 case SIOCDELTUNNEL:
1006 err = -EPERM;
1007 if (!capable(CAP_NET_ADMIN))
1008 goto done;
1009
7daa0004 1010 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1011 err = -EFAULT;
1012 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1013 goto done;
1014 err = -ENOENT;
f57e7d5a 1015 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1016 goto done;
1017 err = -EPERM;
7daa0004 1018 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1019 goto done;
1020 dev = t->dev;
1021 }
22f8cde5
SH
1022 unregister_netdevice(dev);
1023 err = 0;
1da177e4
LT
1024 break;
1025
1026 default:
1027 err = -EINVAL;
1028 }
1029
1030done:
1031 return err;
1032}
1033
1da177e4
LT
1034static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1035{
2941a486 1036 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1037 if (new_mtu < 68 ||
1038 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1039 return -EINVAL;
1040 dev->mtu = new_mtu;
1041 return 0;
1042}
1043
1da177e4
LT
1044/* Nice toy. Unfortunately, useless in real life :-)
1045 It allows to construct virtual multiprotocol broadcast "LAN"
1046 over the Internet, provided multicast routing is tuned.
1047
1048
1049 I have no idea was this bicycle invented before me,
1050 so that I had to set ARPHRD_IPGRE to a random value.
1051 I have an impression, that Cisco could make something similar,
1052 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1053
1da177e4
LT
1054 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1055 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1056
1057 ping -t 255 224.66.66.66
1058
1059 If nobody answers, mbone does not work.
1060
1061 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1062 ip addr add 10.66.66.<somewhat>/24 dev Universe
1063 ifconfig Universe up
1064 ifconfig Universe add fe80::<Your_real_addr>/10
1065 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1066 ftp 10.66.66.66
1067 ...
1068 ftp fec0:6666:6666::193.233.7.65
1069 ...
1070
1071 */
1072
3b04ddde
SH
1073static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1074 unsigned short type,
1075 const void *daddr, const void *saddr, unsigned len)
1da177e4 1076{
2941a486 1077 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1078 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1079 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1080
1081 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1082 p[0] = t->parms.o_flags;
1083 p[1] = htons(type);
1084
1085 /*
e905a9ed 1086 * Set the source hardware address.
1da177e4 1087 */
e905a9ed 1088
1da177e4
LT
1089 if (saddr)
1090 memcpy(&iph->saddr, saddr, 4);
1091
1092 if (daddr) {
1093 memcpy(&iph->daddr, daddr, 4);
1094 return t->hlen;
1095 }
f97c1e0c 1096 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1097 return t->hlen;
e905a9ed 1098
1da177e4
LT
1099 return -t->hlen;
1100}
1101
6a5f44d7
TT
1102static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1103{
1104 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1105 memcpy(haddr, &iph->saddr, 4);
1106 return 4;
1107}
1108
3b04ddde
SH
1109static const struct header_ops ipgre_header_ops = {
1110 .create = ipgre_header,
6a5f44d7 1111 .parse = ipgre_header_parse,
3b04ddde
SH
1112};
1113
6a5f44d7 1114#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1115static int ipgre_open(struct net_device *dev)
1116{
2941a486 1117 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1118
f97c1e0c 1119 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1120 struct flowi fl = { .oif = t->parms.link,
1121 .nl_u = { .ip4_u =
1122 { .daddr = t->parms.iph.daddr,
1123 .saddr = t->parms.iph.saddr,
1124 .tos = RT_TOS(t->parms.iph.tos) } },
1125 .proto = IPPROTO_GRE };
1126 struct rtable *rt;
96635522 1127 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1128 return -EADDRNOTAVAIL;
1129 dev = rt->u.dst.dev;
1130 ip_rt_put(rt);
e5ed6399 1131 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1132 return -EADDRNOTAVAIL;
1133 t->mlink = dev->ifindex;
e5ed6399 1134 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1135 }
1136 return 0;
1137}
1138
1139static int ipgre_close(struct net_device *dev)
1140{
2941a486 1141 struct ip_tunnel *t = netdev_priv(dev);
f97c1e0c 1142 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1143 struct in_device *in_dev;
c346dca1 1144 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1145 if (in_dev) {
1146 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1147 in_dev_put(in_dev);
1148 }
1149 }
1150 return 0;
1151}
1152
1153#endif
1154
1155static void ipgre_tunnel_setup(struct net_device *dev)
1156{
c19e654d 1157 dev->init = ipgre_tunnel_init;
1da177e4
LT
1158 dev->uninit = ipgre_tunnel_uninit;
1159 dev->destructor = free_netdev;
1160 dev->hard_start_xmit = ipgre_tunnel_xmit;
1da177e4
LT
1161 dev->do_ioctl = ipgre_tunnel_ioctl;
1162 dev->change_mtu = ipgre_tunnel_change_mtu;
1163
1164 dev->type = ARPHRD_IPGRE;
c95b819a 1165 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1166 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1167 dev->flags = IFF_NOARP;
1168 dev->iflink = 0;
1169 dev->addr_len = 4;
0b67eceb 1170 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
1171}
1172
1173static int ipgre_tunnel_init(struct net_device *dev)
1174{
1da177e4
LT
1175 struct ip_tunnel *tunnel;
1176 struct iphdr *iph;
1da177e4 1177
2941a486 1178 tunnel = netdev_priv(dev);
1da177e4
LT
1179 iph = &tunnel->parms.iph;
1180
1181 tunnel->dev = dev;
1182 strcpy(tunnel->parms.name, dev->name);
1183
1184 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1185 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1186
1da177e4 1187 if (iph->daddr) {
1da177e4 1188#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1189 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1190 if (!iph->saddr)
1191 return -EINVAL;
1192 dev->flags = IFF_BROADCAST;
3b04ddde 1193 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1194 dev->open = ipgre_open;
1195 dev->stop = ipgre_close;
1196 }
1197#endif
ee34c1eb 1198 } else
6a5f44d7 1199 dev->header_ops = &ipgre_header_ops;
1da177e4 1200
1da177e4
LT
1201 return 0;
1202}
1203
7daa0004 1204static int ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1205{
2941a486 1206 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1207 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1208 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1209
1210 tunnel->dev = dev;
1211 strcpy(tunnel->parms.name, dev->name);
1212
1213 iph->version = 4;
1214 iph->protocol = IPPROTO_GRE;
1215 iph->ihl = 5;
1216 tunnel->hlen = sizeof(struct iphdr) + 4;
1217
1218 dev_hold(dev);
eb8ce741 1219 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1220 return 0;
1221}
1222
1223
1224static struct net_protocol ipgre_protocol = {
1225 .handler = ipgre_rcv,
1226 .err_handler = ipgre_err,
f96c148f 1227 .netns_ok = 1,
1da177e4
LT
1228};
1229
eb8ce741
PE
1230static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1231{
1232 int prio;
1233
1234 for (prio = 0; prio < 4; prio++) {
1235 int h;
1236 for (h = 0; h < HASH_SIZE; h++) {
1237 struct ip_tunnel *t;
1238 while ((t = ign->tunnels[prio][h]) != NULL)
1239 unregister_netdevice(t->dev);
1240 }
1241 }
1242}
1243
59a4c759
PE
1244static int ipgre_init_net(struct net *net)
1245{
1246 int err;
1247 struct ipgre_net *ign;
1248
1249 err = -ENOMEM;
eb8ce741 1250 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
59a4c759
PE
1251 if (ign == NULL)
1252 goto err_alloc;
1253
1254 err = net_assign_generic(net, ipgre_net_id, ign);
1255 if (err < 0)
1256 goto err_assign;
1257
7daa0004
PE
1258 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1259 ipgre_tunnel_setup);
1260 if (!ign->fb_tunnel_dev) {
1261 err = -ENOMEM;
1262 goto err_alloc_dev;
1263 }
1264
1265 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1266 dev_net_set(ign->fb_tunnel_dev, net);
c19e654d 1267 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1268
1269 if ((err = register_netdev(ign->fb_tunnel_dev)))
1270 goto err_reg_dev;
1271
59a4c759
PE
1272 return 0;
1273
7daa0004
PE
1274err_reg_dev:
1275 free_netdev(ign->fb_tunnel_dev);
1276err_alloc_dev:
1277 /* nothing */
59a4c759
PE
1278err_assign:
1279 kfree(ign);
1280err_alloc:
1281 return err;
1282}
1283
1284static void ipgre_exit_net(struct net *net)
1285{
1286 struct ipgre_net *ign;
1287
1288 ign = net_generic(net, ipgre_net_id);
7daa0004 1289 rtnl_lock();
eb8ce741 1290 ipgre_destroy_tunnels(ign);
7daa0004 1291 rtnl_unlock();
59a4c759
PE
1292 kfree(ign);
1293}
1294
1295static struct pernet_operations ipgre_net_ops = {
1296 .init = ipgre_init_net,
1297 .exit = ipgre_exit_net,
1298};
1da177e4 1299
c19e654d
HX
1300static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1301{
1302 __be16 flags;
1303
1304 if (!data)
1305 return 0;
1306
1307 flags = 0;
1308 if (data[IFLA_GRE_IFLAGS])
1309 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1310 if (data[IFLA_GRE_OFLAGS])
1311 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1312 if (flags & (GRE_VERSION|GRE_ROUTING))
1313 return -EINVAL;
1314
1315 return 0;
1316}
1317
e1a80002
HX
1318static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1319{
1320 __be32 daddr;
1321
1322 if (tb[IFLA_ADDRESS]) {
1323 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1324 return -EINVAL;
1325 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1326 return -EADDRNOTAVAIL;
1327 }
1328
1329 if (!data)
1330 goto out;
1331
1332 if (data[IFLA_GRE_REMOTE]) {
1333 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1334 if (!daddr)
1335 return -EINVAL;
1336 }
1337
1338out:
1339 return ipgre_tunnel_validate(tb, data);
1340}
1341
c19e654d
HX
1342static void ipgre_netlink_parms(struct nlattr *data[],
1343 struct ip_tunnel_parm *parms)
1344{
1345 memset(parms, 0, sizeof(parms));
1346
1347 parms->iph.protocol = IPPROTO_GRE;
1348
1349 if (!data)
1350 return;
1351
1352 if (data[IFLA_GRE_LINK])
1353 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1354
1355 if (data[IFLA_GRE_IFLAGS])
1356 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1357
1358 if (data[IFLA_GRE_OFLAGS])
1359 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1360
1361 if (data[IFLA_GRE_IKEY])
1362 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1363
1364 if (data[IFLA_GRE_OKEY])
1365 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1366
1367 if (data[IFLA_GRE_LOCAL])
1368 memcpy(&parms->iph.saddr, nla_data(data[IFLA_GRE_LOCAL]), 4);
1369
1370 if (data[IFLA_GRE_REMOTE])
1371 memcpy(&parms->iph.daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1372
1373 if (data[IFLA_GRE_TTL])
1374 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1375
1376 if (data[IFLA_GRE_TOS])
1377 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1378
1379 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1380 parms->iph.frag_off = htons(IP_DF);
1381}
1382
e1a80002
HX
1383static int ipgre_tap_init(struct net_device *dev)
1384{
1385 struct ip_tunnel *tunnel;
1386
1387 tunnel = netdev_priv(dev);
1388
1389 tunnel->dev = dev;
1390 strcpy(tunnel->parms.name, dev->name);
1391
1392 ipgre_tunnel_bind_dev(dev);
1393
1394 return 0;
1395}
1396
1397static void ipgre_tap_setup(struct net_device *dev)
1398{
1399
1400 ether_setup(dev);
1401
1402 dev->init = ipgre_tap_init;
1403 dev->uninit = ipgre_tunnel_uninit;
1404 dev->destructor = free_netdev;
1405 dev->hard_start_xmit = ipgre_tunnel_xmit;
1406 dev->change_mtu = ipgre_tunnel_change_mtu;
1407
1408 dev->iflink = 0;
1409 dev->features |= NETIF_F_NETNS_LOCAL;
1410}
1411
c19e654d
HX
1412static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1413 struct nlattr *data[])
1414{
1415 struct ip_tunnel *nt;
1416 struct net *net = dev_net(dev);
1417 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1418 int mtu;
1419 int err;
1420
1421 nt = netdev_priv(dev);
1422 ipgre_netlink_parms(data, &nt->parms);
1423
e1a80002 1424 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1425 return -EEXIST;
1426
e1a80002
HX
1427 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1428 random_ether_addr(dev->dev_addr);
1429
c19e654d
HX
1430 mtu = ipgre_tunnel_bind_dev(dev);
1431 if (!tb[IFLA_MTU])
1432 dev->mtu = mtu;
1433
1434 err = register_netdevice(dev);
1435 if (err)
1436 goto out;
1437
1438 dev_hold(dev);
1439 ipgre_tunnel_link(ign, nt);
1440
1441out:
1442 return err;
1443}
1444
1445static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1446 struct nlattr *data[])
1447{
1448 struct ip_tunnel *t, *nt;
1449 struct net *net = dev_net(dev);
1450 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1451 struct ip_tunnel_parm p;
1452 int mtu;
1453
1454 if (dev == ign->fb_tunnel_dev)
1455 return -EINVAL;
1456
1457 nt = netdev_priv(dev);
1458 ipgre_netlink_parms(data, &p);
1459
1460 t = ipgre_tunnel_locate(net, &p, 0);
1461
1462 if (t) {
1463 if (t->dev != dev)
1464 return -EEXIST;
1465 } else {
1466 unsigned nflags = 0;
1467
1468 t = nt;
1469
1470 if (ipv4_is_multicast(p.iph.daddr))
1471 nflags = IFF_BROADCAST;
1472 else if (p.iph.daddr)
1473 nflags = IFF_POINTOPOINT;
1474
1475 if ((dev->flags ^ nflags) &
1476 (IFF_POINTOPOINT | IFF_BROADCAST))
1477 return -EINVAL;
1478
1479 ipgre_tunnel_unlink(ign, t);
1480 t->parms.iph.saddr = p.iph.saddr;
1481 t->parms.iph.daddr = p.iph.daddr;
1482 t->parms.i_key = p.i_key;
1483 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1484 memcpy(dev->broadcast, &p.iph.daddr, 4);
1485 ipgre_tunnel_link(ign, t);
1486 netdev_state_change(dev);
1487 }
1488
1489 t->parms.o_key = p.o_key;
1490 t->parms.iph.ttl = p.iph.ttl;
1491 t->parms.iph.tos = p.iph.tos;
1492 t->parms.iph.frag_off = p.iph.frag_off;
1493
1494 if (t->parms.link != p.link) {
1495 t->parms.link = p.link;
1496 mtu = ipgre_tunnel_bind_dev(dev);
1497 if (!tb[IFLA_MTU])
1498 dev->mtu = mtu;
1499 netdev_state_change(dev);
1500 }
1501
1502 return 0;
1503}
1504
1505static size_t ipgre_get_size(const struct net_device *dev)
1506{
1507 return
1508 /* IFLA_GRE_LINK */
1509 nla_total_size(4) +
1510 /* IFLA_GRE_IFLAGS */
1511 nla_total_size(2) +
1512 /* IFLA_GRE_OFLAGS */
1513 nla_total_size(2) +
1514 /* IFLA_GRE_IKEY */
1515 nla_total_size(4) +
1516 /* IFLA_GRE_OKEY */
1517 nla_total_size(4) +
1518 /* IFLA_GRE_LOCAL */
1519 nla_total_size(4) +
1520 /* IFLA_GRE_REMOTE */
1521 nla_total_size(4) +
1522 /* IFLA_GRE_TTL */
1523 nla_total_size(1) +
1524 /* IFLA_GRE_TOS */
1525 nla_total_size(1) +
1526 /* IFLA_GRE_PMTUDISC */
1527 nla_total_size(1) +
1528 0;
1529}
1530
1531static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1532{
1533 struct ip_tunnel *t = netdev_priv(dev);
1534 struct ip_tunnel_parm *p = &t->parms;
1535
1536 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1537 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1538 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1539 NLA_PUT_BE32(skb, IFLA_GRE_IFLAGS, p->i_flags);
1540 NLA_PUT_BE32(skb, IFLA_GRE_OFLAGS, p->o_flags);
1541 NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
1542 NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
1543 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1544 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1545 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1546
1547 return 0;
1548
1549nla_put_failure:
1550 return -EMSGSIZE;
1551}
1552
1553static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1554 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1555 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1556 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1557 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1558 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1559 [IFLA_GRE_LOCAL] = { .len = 4 },
1560 [IFLA_GRE_REMOTE] = { .len = 4 },
1561 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1562 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1563 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1564};
1565
1566static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1567 .kind = "gre",
1568 .maxtype = IFLA_GRE_MAX,
1569 .policy = ipgre_policy,
1570 .priv_size = sizeof(struct ip_tunnel),
1571 .setup = ipgre_tunnel_setup,
1572 .validate = ipgre_tunnel_validate,
1573 .newlink = ipgre_newlink,
1574 .changelink = ipgre_changelink,
1575 .get_size = ipgre_get_size,
1576 .fill_info = ipgre_fill_info,
1577};
1578
e1a80002
HX
1579static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1580 .kind = "gretap",
1581 .maxtype = IFLA_GRE_MAX,
1582 .policy = ipgre_policy,
1583 .priv_size = sizeof(struct ip_tunnel),
1584 .setup = ipgre_tap_setup,
1585 .validate = ipgre_tap_validate,
1586 .newlink = ipgre_newlink,
1587 .changelink = ipgre_changelink,
1588 .get_size = ipgre_get_size,
1589 .fill_info = ipgre_fill_info,
1590};
1591
1da177e4
LT
1592/*
1593 * And now the modules code and kernel interface.
1594 */
1595
1596static int __init ipgre_init(void)
1597{
1598 int err;
1599
1600 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1601
1602 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1603 printk(KERN_INFO "ipgre init: can't add protocol\n");
1604 return -EAGAIN;
1605 }
1606
59a4c759
PE
1607 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1608 if (err < 0)
c19e654d 1609 goto gen_device_failed;
7daa0004 1610
c19e654d
HX
1611 err = rtnl_link_register(&ipgre_link_ops);
1612 if (err < 0)
1613 goto rtnl_link_failed;
1614
e1a80002
HX
1615 err = rtnl_link_register(&ipgre_tap_ops);
1616 if (err < 0)
1617 goto tap_ops_failed;
1618
c19e654d 1619out:
1da177e4 1620 return err;
c19e654d 1621
e1a80002
HX
1622tap_ops_failed:
1623 rtnl_link_unregister(&ipgre_link_ops);
c19e654d
HX
1624rtnl_link_failed:
1625 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1626gen_device_failed:
1627 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1628 goto out;
1da177e4
LT
1629}
1630
db44575f 1631static void __exit ipgre_fini(void)
1da177e4 1632{
e1a80002 1633 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d
HX
1634 rtnl_link_unregister(&ipgre_link_ops);
1635 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1636 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1637 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1da177e4
LT
1638}
1639
1640module_init(ipgre_init);
1641module_exit(ipgre_fini);
1642MODULE_LICENSE("GPL");
c19e654d 1643MODULE_ALIAS("rtnl-link-gre");
e1a80002 1644MODULE_ALIAS("rtnl-link-gretap");