]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
ipv6: convert tunnels to net_device_ops
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
e1a80002 30#include <linux/etherdevice.h>
46f25dff 31#include <linux/if_ether.h>
1da177e4
LT
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
59a4c759
PE
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
c19e654d 45#include <net/rtnetlink.h>
1da177e4
LT
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
e905a9ed 69 Current solution: t->recursion lock breaks dead loops. It looks
1da177e4
LT
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
c19e654d 122static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
123static int ipgre_tunnel_init(struct net_device *dev);
124static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 125static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
126
127/* Fallback tunnel: no source, no destination, no key, no options */
128
129static int ipgre_fb_tunnel_init(struct net_device *dev);
130
eb8ce741
PE
131#define HASH_SIZE 16
132
59a4c759
PE
133static int ipgre_net_id;
134struct ipgre_net {
eb8ce741
PE
135 struct ip_tunnel *tunnels[4][HASH_SIZE];
136
7daa0004 137 struct net_device *fb_tunnel_dev;
59a4c759
PE
138};
139
1da177e4
LT
140/* Tunnel hash table */
141
142/*
143 4 hash tables:
144
145 3: (remote,local)
146 2: (remote,*)
147 1: (*,local)
148 0: (*,*)
149
150 We require exact key match i.e. if a key is present in packet
151 it will match only tunnel with the same key; if it is not present,
152 it will match only keyless tunnel.
153
154 All keysless packets, if not matched configured keyless tunnels
155 will match fallback tunnel.
156 */
157
d5a0a1e3 158#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 159
eb8ce741
PE
160#define tunnels_r_l tunnels[3]
161#define tunnels_r tunnels[2]
162#define tunnels_l tunnels[1]
163#define tunnels_wc tunnels[0]
1da177e4
LT
164
165static DEFINE_RWLOCK(ipgre_lock);
166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
f57e7d5a 169static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
e1a80002
HX
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
1da177e4
LT
172{
173 unsigned h0 = HASH(remote);
174 unsigned h1 = HASH(key);
175 struct ip_tunnel *t;
e1a80002 176 struct ip_tunnel *t2 = NULL;
7daa0004 177 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
178 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
179 ARPHRD_ETHER : ARPHRD_IPGRE;
1da177e4 180
eb8ce741 181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
1da177e4 182 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
e1a80002
HX
183 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
184 if (t->dev->type == dev_type)
185 return t;
186 if (t->dev->type == ARPHRD_IPGRE && !t2)
187 t2 = t;
188 }
1da177e4
LT
189 }
190 }
e1a80002 191
eb8ce741 192 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
1da177e4 193 if (remote == t->parms.iph.daddr) {
e1a80002
HX
194 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
195 if (t->dev->type == dev_type)
196 return t;
197 if (t->dev->type == ARPHRD_IPGRE && !t2)
198 t2 = t;
199 }
1da177e4
LT
200 }
201 }
e1a80002 202
eb8ce741 203 for (t = ign->tunnels_l[h1]; t; t = t->next) {
1da177e4 204 if (local == t->parms.iph.saddr ||
f97c1e0c
JP
205 (local == t->parms.iph.daddr &&
206 ipv4_is_multicast(local))) {
e1a80002
HX
207 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
208 if (t->dev->type == dev_type)
209 return t;
210 if (t->dev->type == ARPHRD_IPGRE && !t2)
211 t2 = t;
212 }
1da177e4
LT
213 }
214 }
e1a80002 215
eb8ce741 216 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
e1a80002
HX
217 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
218 if (t->dev->type == dev_type)
219 return t;
220 if (t->dev->type == ARPHRD_IPGRE && !t2)
221 t2 = t;
222 }
1da177e4
LT
223 }
224
e1a80002
HX
225 if (t2)
226 return t2;
227
7daa0004
PE
228 if (ign->fb_tunnel_dev->flags&IFF_UP)
229 return netdev_priv(ign->fb_tunnel_dev);
1da177e4
LT
230 return NULL;
231}
232
f57e7d5a
PE
233static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
234 struct ip_tunnel_parm *parms)
1da177e4 235{
5056a1ef
YH
236 __be32 remote = parms->iph.daddr;
237 __be32 local = parms->iph.saddr;
238 __be32 key = parms->i_key;
1da177e4
LT
239 unsigned h = HASH(key);
240 int prio = 0;
241
242 if (local)
243 prio |= 1;
f97c1e0c 244 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
245 prio |= 2;
246 h ^= HASH(remote);
247 }
248
eb8ce741 249 return &ign->tunnels[prio][h];
1da177e4
LT
250}
251
f57e7d5a
PE
252static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
253 struct ip_tunnel *t)
5056a1ef 254{
f57e7d5a 255 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
256}
257
f57e7d5a 258static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 259{
f57e7d5a 260 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4
LT
261
262 t->next = *tp;
263 write_lock_bh(&ipgre_lock);
264 *tp = t;
265 write_unlock_bh(&ipgre_lock);
266}
267
f57e7d5a 268static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
269{
270 struct ip_tunnel **tp;
271
f57e7d5a 272 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4
LT
273 if (t == *tp) {
274 write_lock_bh(&ipgre_lock);
275 *tp = t->next;
276 write_unlock_bh(&ipgre_lock);
277 break;
278 }
279 }
280}
281
e1a80002
HX
282static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
283 struct ip_tunnel_parm *parms,
284 int type)
1da177e4 285{
d5a0a1e3
AV
286 __be32 remote = parms->iph.daddr;
287 __be32 local = parms->iph.saddr;
288 __be32 key = parms->i_key;
e1a80002
HX
289 struct ip_tunnel *t, **tp;
290 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
291
292 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
293 if (local == t->parms.iph.saddr &&
294 remote == t->parms.iph.daddr &&
295 key == t->parms.i_key &&
296 type == t->dev->type)
297 break;
298
299 return t;
300}
301
302static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
303 struct ip_tunnel_parm *parms, int create)
304{
305 struct ip_tunnel *t, *nt;
1da177e4 306 struct net_device *dev;
1da177e4 307 char name[IFNAMSIZ];
f57e7d5a 308 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 309
e1a80002
HX
310 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
311 if (t || !create)
312 return t;
1da177e4
LT
313
314 if (parms->name[0])
315 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
316 else
317 sprintf(name, "gre%%d");
1da177e4
LT
318
319 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
320 if (!dev)
321 return NULL;
322
0b67eceb
PE
323 dev_net_set(dev, net);
324
b37d428b
PE
325 if (strchr(name, '%')) {
326 if (dev_alloc_name(dev, name) < 0)
327 goto failed_free;
328 }
329
2941a486 330 nt = netdev_priv(dev);
1da177e4 331 nt->parms = *parms;
c19e654d 332 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 333
42aa9162
HX
334 dev->mtu = ipgre_tunnel_bind_dev(dev);
335
b37d428b
PE
336 if (register_netdevice(dev) < 0)
337 goto failed_free;
1da177e4 338
1da177e4 339 dev_hold(dev);
f57e7d5a 340 ipgre_tunnel_link(ign, nt);
1da177e4
LT
341 return nt;
342
b37d428b
PE
343failed_free:
344 free_netdev(dev);
1da177e4
LT
345 return NULL;
346}
347
348static void ipgre_tunnel_uninit(struct net_device *dev)
349{
f57e7d5a
PE
350 struct net *net = dev_net(dev);
351 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
352
353 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
354 dev_put(dev);
355}
356
357
358static void ipgre_err(struct sk_buff *skb, u32 info)
359{
1da177e4 360
071f92d0 361/* All the routers (except for Linux) return only
1da177e4
LT
362 8 bytes of packet payload. It means, that precise relaying of
363 ICMP in the real Internet is absolutely infeasible.
364
365 Moreover, Cisco "wise men" put GRE key to the third word
366 in GRE header. It makes impossible maintaining even soft state for keyed
367 GRE tunnels with enabled checksum. Tell them "thank you".
368
369 Well, I wonder, rfc1812 was written by Cisco employee,
370 what the hell these idiots break standrads established
371 by themself???
372 */
373
6ed2533e 374 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 375 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 376 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
377 const int type = icmp_hdr(skb)->type;
378 const int code = icmp_hdr(skb)->code;
1da177e4 379 struct ip_tunnel *t;
d5a0a1e3 380 __be16 flags;
1da177e4
LT
381
382 flags = p[0];
383 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
384 if (flags&(GRE_VERSION|GRE_ROUTING))
385 return;
386 if (flags&GRE_KEY) {
387 grehlen += 4;
388 if (flags&GRE_CSUM)
389 grehlen += 4;
390 }
391 }
392
393 /* If only 8 bytes returned, keyed message will be dropped here */
394 if (skb_headlen(skb) < grehlen)
395 return;
396
397 switch (type) {
398 default:
399 case ICMP_PARAMETERPROB:
400 return;
401
402 case ICMP_DEST_UNREACH:
403 switch (code) {
404 case ICMP_SR_FAILED:
405 case ICMP_PORT_UNREACH:
406 /* Impossible event. */
407 return;
408 case ICMP_FRAG_NEEDED:
409 /* Soft state for pmtu is maintained by IP core. */
410 return;
411 default:
412 /* All others are translated to HOST_UNREACH.
413 rfc2003 contains "deep thoughts" about NET_UNREACH,
414 I believe they are just ether pollution. --ANK
415 */
416 break;
417 }
418 break;
419 case ICMP_TIME_EXCEEDED:
420 if (code != ICMP_EXC_TTL)
421 return;
422 break;
423 }
424
425 read_lock(&ipgre_lock);
3b4667f3 426 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
e1a80002
HX
427 flags & GRE_KEY ?
428 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
429 p[1]);
f97c1e0c
JP
430 if (t == NULL || t->parms.iph.daddr == 0 ||
431 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
432 goto out;
433
434 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
435 goto out;
436
437 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
438 t->err_count++;
439 else
440 t->err_count = 1;
441 t->err_time = jiffies;
442out:
443 read_unlock(&ipgre_lock);
444 return;
1da177e4
LT
445}
446
447static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
448{
449 if (INET_ECN_is_ce(iph->tos)) {
450 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 451 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 452 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 453 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
454 }
455 }
456}
457
458static inline u8
459ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
460{
461 u8 inner = 0;
462 if (skb->protocol == htons(ETH_P_IP))
463 inner = old_iph->tos;
464 else if (skb->protocol == htons(ETH_P_IPV6))
465 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
466 return INET_ECN_encapsulate(tos, inner);
467}
468
469static int ipgre_rcv(struct sk_buff *skb)
470{
471 struct iphdr *iph;
472 u8 *h;
d5a0a1e3 473 __be16 flags;
d3bc23e7 474 __sum16 csum = 0;
d5a0a1e3 475 __be32 key = 0;
1da177e4
LT
476 u32 seqno = 0;
477 struct ip_tunnel *tunnel;
478 int offset = 4;
e1a80002 479 __be16 gre_proto;
64194c31 480 unsigned int len;
1da177e4
LT
481
482 if (!pskb_may_pull(skb, 16))
483 goto drop_nolock;
484
eddc9ec5 485 iph = ip_hdr(skb);
1da177e4 486 h = skb->data;
d5a0a1e3 487 flags = *(__be16*)h;
1da177e4
LT
488
489 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
490 /* - Version must be 0.
491 - We do not support routing headers.
492 */
493 if (flags&(GRE_VERSION|GRE_ROUTING))
494 goto drop_nolock;
495
496 if (flags&GRE_CSUM) {
fb286bb2 497 switch (skb->ip_summed) {
84fa7933 498 case CHECKSUM_COMPLETE:
d3bc23e7 499 csum = csum_fold(skb->csum);
fb286bb2
HX
500 if (!csum)
501 break;
502 /* fall through */
503 case CHECKSUM_NONE:
504 skb->csum = 0;
505 csum = __skb_checksum_complete(skb);
84fa7933 506 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
507 }
508 offset += 4;
509 }
510 if (flags&GRE_KEY) {
d5a0a1e3 511 key = *(__be32*)(h + offset);
1da177e4
LT
512 offset += 4;
513 }
514 if (flags&GRE_SEQ) {
d5a0a1e3 515 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
516 offset += 4;
517 }
518 }
519
e1a80002
HX
520 gre_proto = *(__be16 *)(h + 2);
521
1da177e4 522 read_lock(&ipgre_lock);
3b4667f3 523 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
e1a80002
HX
524 iph->saddr, iph->daddr, key,
525 gre_proto))) {
addd68eb
PE
526 struct net_device_stats *stats = &tunnel->dev->stats;
527
1da177e4
LT
528 secpath_reset(skb);
529
e1a80002 530 skb->protocol = gre_proto;
1da177e4
LT
531 /* WCCP version 1 and 2 protocol decoding.
532 * - Change protocol to IP
533 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
534 */
e1a80002 535 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 536 skb->protocol = htons(ETH_P_IP);
e905a9ed 537 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
538 offset += 4;
539 }
540
1d069167 541 skb->mac_header = skb->network_header;
4209fb60 542 __pskb_pull(skb, offset);
9c70220b 543 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
544 skb->pkt_type = PACKET_HOST;
545#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 546 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 547 /* Looped back packet, drop it! */
ee6b9673 548 if (skb->rtable->fl.iif == 0)
1da177e4 549 goto drop;
addd68eb 550 stats->multicast++;
1da177e4
LT
551 skb->pkt_type = PACKET_BROADCAST;
552 }
553#endif
554
555 if (((flags&GRE_CSUM) && csum) ||
556 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
557 stats->rx_crc_errors++;
558 stats->rx_errors++;
1da177e4
LT
559 goto drop;
560 }
561 if (tunnel->parms.i_flags&GRE_SEQ) {
562 if (!(flags&GRE_SEQ) ||
563 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
564 stats->rx_fifo_errors++;
565 stats->rx_errors++;
1da177e4
LT
566 goto drop;
567 }
568 tunnel->i_seqno = seqno + 1;
569 }
e1a80002 570
64194c31
HX
571 len = skb->len;
572
e1a80002
HX
573 /* Warning: All skb pointers will be invalidated! */
574 if (tunnel->dev->type == ARPHRD_ETHER) {
575 if (!pskb_may_pull(skb, ETH_HLEN)) {
576 stats->rx_length_errors++;
577 stats->rx_errors++;
578 goto drop;
579 }
580
581 iph = ip_hdr(skb);
582 skb->protocol = eth_type_trans(skb, tunnel->dev);
583 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
584 }
585
addd68eb 586 stats->rx_packets++;
64194c31 587 stats->rx_bytes += len;
1da177e4
LT
588 skb->dev = tunnel->dev;
589 dst_release(skb->dst);
590 skb->dst = NULL;
591 nf_reset(skb);
e1a80002
HX
592
593 skb_reset_network_header(skb);
1da177e4 594 ipgre_ecn_decapsulate(iph, skb);
e1a80002 595
1da177e4
LT
596 netif_rx(skb);
597 read_unlock(&ipgre_lock);
598 return(0);
599 }
45af08be 600 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
601
602drop:
603 read_unlock(&ipgre_lock);
604drop_nolock:
605 kfree_skb(skb);
606 return(0);
607}
608
609static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
610{
2941a486 611 struct ip_tunnel *tunnel = netdev_priv(dev);
addd68eb 612 struct net_device_stats *stats = &tunnel->dev->stats;
eddc9ec5 613 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
614 struct iphdr *tiph;
615 u8 tos;
d5a0a1e3 616 __be16 df;
1da177e4
LT
617 struct rtable *rt; /* Route to the other host */
618 struct net_device *tdev; /* Device to other host */
619 struct iphdr *iph; /* Our new IP header */
c2636b4d 620 unsigned int max_headroom; /* The extra header space needed */
1da177e4 621 int gre_hlen;
d5a0a1e3 622 __be32 dst;
1da177e4
LT
623 int mtu;
624
625 if (tunnel->recursion++) {
addd68eb 626 stats->collisions++;
1da177e4
LT
627 goto tx_error;
628 }
629
e1a80002
HX
630 if (dev->type == ARPHRD_ETHER)
631 IPCB(skb)->flags = 0;
632
633 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 634 gre_hlen = 0;
6ed2533e 635 tiph = (struct iphdr *)skb->data;
1da177e4
LT
636 } else {
637 gre_hlen = tunnel->hlen;
638 tiph = &tunnel->parms.iph;
639 }
640
641 if ((dst = tiph->daddr) == 0) {
642 /* NBMA tunnel */
643
644 if (skb->dst == NULL) {
addd68eb 645 stats->tx_fifo_errors++;
1da177e4
LT
646 goto tx_error;
647 }
648
649 if (skb->protocol == htons(ETH_P_IP)) {
ee6b9673 650 rt = skb->rtable;
1da177e4
LT
651 if ((dst = rt->rt_gateway) == 0)
652 goto tx_error_icmp;
653 }
654#ifdef CONFIG_IPV6
655 else if (skb->protocol == htons(ETH_P_IPV6)) {
656 struct in6_addr *addr6;
657 int addr_type;
658 struct neighbour *neigh = skb->dst->neighbour;
659
660 if (neigh == NULL)
661 goto tx_error;
662
6ed2533e 663 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
664 addr_type = ipv6_addr_type(addr6);
665
666 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 667 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
668 addr_type = ipv6_addr_type(addr6);
669 }
670
671 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
672 goto tx_error_icmp;
673
674 dst = addr6->s6_addr32[3];
675 }
676#endif
677 else
678 goto tx_error;
679 }
680
681 tos = tiph->tos;
682 if (tos&1) {
683 if (skb->protocol == htons(ETH_P_IP))
684 tos = old_iph->tos;
685 tos &= ~1;
686 }
687
688 {
689 struct flowi fl = { .oif = tunnel->parms.link,
690 .nl_u = { .ip4_u =
691 { .daddr = dst,
692 .saddr = tiph->saddr,
693 .tos = RT_TOS(tos) } },
694 .proto = IPPROTO_GRE };
96635522 695 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 696 stats->tx_carrier_errors++;
1da177e4
LT
697 goto tx_error;
698 }
699 }
700 tdev = rt->u.dst.dev;
701
702 if (tdev == dev) {
703 ip_rt_put(rt);
addd68eb 704 stats->collisions++;
1da177e4
LT
705 goto tx_error;
706 }
707
708 df = tiph->frag_off;
709 if (df)
c95b819a 710 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4
LT
711 else
712 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
713
714 if (skb->dst)
715 skb->dst->ops->update_pmtu(skb->dst, mtu);
716
717 if (skb->protocol == htons(ETH_P_IP)) {
718 df |= (old_iph->frag_off&htons(IP_DF));
719
720 if ((old_iph->frag_off&htons(IP_DF)) &&
721 mtu < ntohs(old_iph->tot_len)) {
722 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
723 ip_rt_put(rt);
724 goto tx_error;
725 }
726 }
727#ifdef CONFIG_IPV6
728 else if (skb->protocol == htons(ETH_P_IPV6)) {
6ed2533e 729 struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
1da177e4
LT
730
731 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
732 if ((tunnel->parms.iph.daddr &&
733 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
734 rt6->rt6i_dst.plen == 128) {
735 rt6->rt6i_flags |= RTF_MODIFIED;
736 skb->dst->metrics[RTAX_MTU-1] = mtu;
737 }
738 }
739
740 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
741 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
742 ip_rt_put(rt);
743 goto tx_error;
744 }
745 }
746#endif
747
748 if (tunnel->err_count > 0) {
749 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
750 tunnel->err_count--;
751
752 dst_link_failure(skb);
753 } else
754 tunnel->err_count = 0;
755 }
756
757 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
758
cfbba49d
PM
759 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
760 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
761 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
762 if (!new_skb) {
763 ip_rt_put(rt);
e905a9ed 764 stats->tx_dropped++;
1da177e4
LT
765 dev_kfree_skb(skb);
766 tunnel->recursion--;
767 return 0;
768 }
769 if (skb->sk)
770 skb_set_owner_w(new_skb, skb->sk);
771 dev_kfree_skb(skb);
772 skb = new_skb;
eddc9ec5 773 old_iph = ip_hdr(skb);
1da177e4
LT
774 }
775
64194c31 776 skb_reset_transport_header(skb);
e2d1bca7
ACM
777 skb_push(skb, gre_hlen);
778 skb_reset_network_header(skb);
1da177e4 779 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
780 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
781 IPSKB_REROUTED);
1da177e4
LT
782 dst_release(skb->dst);
783 skb->dst = &rt->u.dst;
784
785 /*
786 * Push down and install the IPIP header.
787 */
788
eddc9ec5 789 iph = ip_hdr(skb);
1da177e4
LT
790 iph->version = 4;
791 iph->ihl = sizeof(struct iphdr) >> 2;
792 iph->frag_off = df;
793 iph->protocol = IPPROTO_GRE;
794 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
795 iph->daddr = rt->rt_dst;
796 iph->saddr = rt->rt_src;
797
798 if ((iph->ttl = tiph->ttl) == 0) {
799 if (skb->protocol == htons(ETH_P_IP))
800 iph->ttl = old_iph->ttl;
801#ifdef CONFIG_IPV6
802 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 803 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
804#endif
805 else
806 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
807 }
808
e1a80002
HX
809 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
810 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
811 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
812
813 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 814 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
815
816 if (tunnel->parms.o_flags&GRE_SEQ) {
817 ++tunnel->o_seqno;
818 *ptr = htonl(tunnel->o_seqno);
819 ptr--;
820 }
821 if (tunnel->parms.o_flags&GRE_KEY) {
822 *ptr = tunnel->parms.o_key;
823 ptr--;
824 }
825 if (tunnel->parms.o_flags&GRE_CSUM) {
826 *ptr = 0;
5f92a738 827 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
828 }
829 }
830
831 nf_reset(skb);
832
833 IPTUNNEL_XMIT();
834 tunnel->recursion--;
835 return 0;
836
837tx_error_icmp:
838 dst_link_failure(skb);
839
840tx_error:
841 stats->tx_errors++;
842 dev_kfree_skb(skb);
843 tunnel->recursion--;
844 return 0;
845}
846
42aa9162 847static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
848{
849 struct net_device *tdev = NULL;
850 struct ip_tunnel *tunnel;
851 struct iphdr *iph;
852 int hlen = LL_MAX_HEADER;
853 int mtu = ETH_DATA_LEN;
854 int addend = sizeof(struct iphdr) + 4;
855
856 tunnel = netdev_priv(dev);
857 iph = &tunnel->parms.iph;
858
c95b819a 859 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
860
861 if (iph->daddr) {
862 struct flowi fl = { .oif = tunnel->parms.link,
863 .nl_u = { .ip4_u =
864 { .daddr = iph->daddr,
865 .saddr = iph->saddr,
866 .tos = RT_TOS(iph->tos) } },
867 .proto = IPPROTO_GRE };
868 struct rtable *rt;
96635522 869 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
870 tdev = rt->u.dst.dev;
871 ip_rt_put(rt);
872 }
e1a80002
HX
873
874 if (dev->type != ARPHRD_ETHER)
875 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
876 }
877
878 if (!tdev && tunnel->parms.link)
96635522 879 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
880
881 if (tdev) {
c95b819a 882 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
883 mtu = tdev->mtu;
884 }
885 dev->iflink = tunnel->parms.link;
886
887 /* Precalculate GRE options length */
888 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
889 if (tunnel->parms.o_flags&GRE_CSUM)
890 addend += 4;
891 if (tunnel->parms.o_flags&GRE_KEY)
892 addend += 4;
893 if (tunnel->parms.o_flags&GRE_SEQ)
894 addend += 4;
895 }
c95b819a 896 dev->needed_headroom = addend + hlen;
42aa9162
HX
897 mtu -= dev->hard_header_len - addend;
898
899 if (mtu < 68)
900 mtu = 68;
901
ee34c1eb
MS
902 tunnel->hlen = addend;
903
42aa9162 904 return mtu;
ee34c1eb
MS
905}
906
1da177e4
LT
907static int
908ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
909{
910 int err = 0;
911 struct ip_tunnel_parm p;
912 struct ip_tunnel *t;
f57e7d5a
PE
913 struct net *net = dev_net(dev);
914 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
915
916 switch (cmd) {
917 case SIOCGETTUNNEL:
918 t = NULL;
7daa0004 919 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
920 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
921 err = -EFAULT;
922 break;
923 }
f57e7d5a 924 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
925 }
926 if (t == NULL)
2941a486 927 t = netdev_priv(dev);
1da177e4
LT
928 memcpy(&p, &t->parms, sizeof(p));
929 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
930 err = -EFAULT;
931 break;
932
933 case SIOCADDTUNNEL:
934 case SIOCCHGTUNNEL:
935 err = -EPERM;
936 if (!capable(CAP_NET_ADMIN))
937 goto done;
938
939 err = -EFAULT;
940 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
941 goto done;
942
943 err = -EINVAL;
944 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
945 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
946 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
947 goto done;
948 if (p.iph.ttl)
949 p.iph.frag_off |= htons(IP_DF);
950
951 if (!(p.i_flags&GRE_KEY))
952 p.i_key = 0;
953 if (!(p.o_flags&GRE_KEY))
954 p.o_key = 0;
955
f57e7d5a 956 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 957
7daa0004 958 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
959 if (t != NULL) {
960 if (t->dev != dev) {
961 err = -EEXIST;
962 break;
963 }
964 } else {
6ed2533e 965 unsigned nflags = 0;
1da177e4 966
2941a486 967 t = netdev_priv(dev);
1da177e4 968
f97c1e0c 969 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
970 nflags = IFF_BROADCAST;
971 else if (p.iph.daddr)
972 nflags = IFF_POINTOPOINT;
973
974 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
975 err = -EINVAL;
976 break;
977 }
f57e7d5a 978 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
979 t->parms.iph.saddr = p.iph.saddr;
980 t->parms.iph.daddr = p.iph.daddr;
981 t->parms.i_key = p.i_key;
982 t->parms.o_key = p.o_key;
983 memcpy(dev->dev_addr, &p.iph.saddr, 4);
984 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 985 ipgre_tunnel_link(ign, t);
1da177e4
LT
986 netdev_state_change(dev);
987 }
988 }
989
990 if (t) {
991 err = 0;
992 if (cmd == SIOCCHGTUNNEL) {
993 t->parms.iph.ttl = p.iph.ttl;
994 t->parms.iph.tos = p.iph.tos;
995 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
996 if (t->parms.link != p.link) {
997 t->parms.link = p.link;
42aa9162 998 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
999 netdev_state_change(dev);
1000 }
1da177e4
LT
1001 }
1002 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1003 err = -EFAULT;
1004 } else
1005 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1006 break;
1007
1008 case SIOCDELTUNNEL:
1009 err = -EPERM;
1010 if (!capable(CAP_NET_ADMIN))
1011 goto done;
1012
7daa0004 1013 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1014 err = -EFAULT;
1015 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1016 goto done;
1017 err = -ENOENT;
f57e7d5a 1018 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1019 goto done;
1020 err = -EPERM;
7daa0004 1021 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1022 goto done;
1023 dev = t->dev;
1024 }
22f8cde5
SH
1025 unregister_netdevice(dev);
1026 err = 0;
1da177e4
LT
1027 break;
1028
1029 default:
1030 err = -EINVAL;
1031 }
1032
1033done:
1034 return err;
1035}
1036
1da177e4
LT
1037static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1038{
2941a486 1039 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1040 if (new_mtu < 68 ||
1041 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1042 return -EINVAL;
1043 dev->mtu = new_mtu;
1044 return 0;
1045}
1046
1da177e4
LT
1047/* Nice toy. Unfortunately, useless in real life :-)
1048 It allows to construct virtual multiprotocol broadcast "LAN"
1049 over the Internet, provided multicast routing is tuned.
1050
1051
1052 I have no idea was this bicycle invented before me,
1053 so that I had to set ARPHRD_IPGRE to a random value.
1054 I have an impression, that Cisco could make something similar,
1055 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1056
1da177e4
LT
1057 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1058 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1059
1060 ping -t 255 224.66.66.66
1061
1062 If nobody answers, mbone does not work.
1063
1064 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1065 ip addr add 10.66.66.<somewhat>/24 dev Universe
1066 ifconfig Universe up
1067 ifconfig Universe add fe80::<Your_real_addr>/10
1068 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1069 ftp 10.66.66.66
1070 ...
1071 ftp fec0:6666:6666::193.233.7.65
1072 ...
1073
1074 */
1075
3b04ddde
SH
1076static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1077 unsigned short type,
1078 const void *daddr, const void *saddr, unsigned len)
1da177e4 1079{
2941a486 1080 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1081 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1082 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1083
1084 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1085 p[0] = t->parms.o_flags;
1086 p[1] = htons(type);
1087
1088 /*
e905a9ed 1089 * Set the source hardware address.
1da177e4 1090 */
e905a9ed 1091
1da177e4
LT
1092 if (saddr)
1093 memcpy(&iph->saddr, saddr, 4);
1094
1095 if (daddr) {
1096 memcpy(&iph->daddr, daddr, 4);
1097 return t->hlen;
1098 }
f97c1e0c 1099 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1100 return t->hlen;
e905a9ed 1101
1da177e4
LT
1102 return -t->hlen;
1103}
1104
6a5f44d7
TT
1105static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1106{
6ed2533e 1107 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1108 memcpy(haddr, &iph->saddr, 4);
1109 return 4;
1110}
1111
3b04ddde
SH
1112static const struct header_ops ipgre_header_ops = {
1113 .create = ipgre_header,
6a5f44d7 1114 .parse = ipgre_header_parse,
3b04ddde
SH
1115};
1116
6a5f44d7 1117#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1118static int ipgre_open(struct net_device *dev)
1119{
2941a486 1120 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1121
f97c1e0c 1122 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1123 struct flowi fl = { .oif = t->parms.link,
1124 .nl_u = { .ip4_u =
1125 { .daddr = t->parms.iph.daddr,
1126 .saddr = t->parms.iph.saddr,
1127 .tos = RT_TOS(t->parms.iph.tos) } },
1128 .proto = IPPROTO_GRE };
1129 struct rtable *rt;
96635522 1130 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1131 return -EADDRNOTAVAIL;
1132 dev = rt->u.dst.dev;
1133 ip_rt_put(rt);
e5ed6399 1134 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1135 return -EADDRNOTAVAIL;
1136 t->mlink = dev->ifindex;
e5ed6399 1137 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1138 }
1139 return 0;
1140}
1141
1142static int ipgre_close(struct net_device *dev)
1143{
2941a486 1144 struct ip_tunnel *t = netdev_priv(dev);
f97c1e0c 1145 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1146 struct in_device *in_dev;
c346dca1 1147 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1148 if (in_dev) {
1149 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1150 in_dev_put(in_dev);
1151 }
1152 }
1153 return 0;
1154}
1155
1156#endif
1157
1158static void ipgre_tunnel_setup(struct net_device *dev)
1159{
c19e654d 1160 dev->init = ipgre_tunnel_init;
1da177e4
LT
1161 dev->uninit = ipgre_tunnel_uninit;
1162 dev->destructor = free_netdev;
1163 dev->hard_start_xmit = ipgre_tunnel_xmit;
1da177e4
LT
1164 dev->do_ioctl = ipgre_tunnel_ioctl;
1165 dev->change_mtu = ipgre_tunnel_change_mtu;
1166
1167 dev->type = ARPHRD_IPGRE;
c95b819a 1168 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1169 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1170 dev->flags = IFF_NOARP;
1171 dev->iflink = 0;
1172 dev->addr_len = 4;
0b67eceb 1173 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
1174}
1175
1176static int ipgre_tunnel_init(struct net_device *dev)
1177{
1da177e4
LT
1178 struct ip_tunnel *tunnel;
1179 struct iphdr *iph;
1da177e4 1180
2941a486 1181 tunnel = netdev_priv(dev);
1da177e4
LT
1182 iph = &tunnel->parms.iph;
1183
1184 tunnel->dev = dev;
1185 strcpy(tunnel->parms.name, dev->name);
1186
1187 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1188 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1189
1da177e4 1190 if (iph->daddr) {
1da177e4 1191#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1192 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1193 if (!iph->saddr)
1194 return -EINVAL;
1195 dev->flags = IFF_BROADCAST;
3b04ddde 1196 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1197 dev->open = ipgre_open;
1198 dev->stop = ipgre_close;
1199 }
1200#endif
ee34c1eb 1201 } else
6a5f44d7 1202 dev->header_ops = &ipgre_header_ops;
1da177e4 1203
1da177e4
LT
1204 return 0;
1205}
1206
7daa0004 1207static int ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1208{
2941a486 1209 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1210 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1211 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1212
1213 tunnel->dev = dev;
1214 strcpy(tunnel->parms.name, dev->name);
1215
1216 iph->version = 4;
1217 iph->protocol = IPPROTO_GRE;
1218 iph->ihl = 5;
1219 tunnel->hlen = sizeof(struct iphdr) + 4;
1220
1221 dev_hold(dev);
eb8ce741 1222 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1223 return 0;
1224}
1225
1226
1227static struct net_protocol ipgre_protocol = {
1228 .handler = ipgre_rcv,
1229 .err_handler = ipgre_err,
f96c148f 1230 .netns_ok = 1,
1da177e4
LT
1231};
1232
eb8ce741
PE
1233static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1234{
1235 int prio;
1236
1237 for (prio = 0; prio < 4; prio++) {
1238 int h;
1239 for (h = 0; h < HASH_SIZE; h++) {
1240 struct ip_tunnel *t;
1241 while ((t = ign->tunnels[prio][h]) != NULL)
1242 unregister_netdevice(t->dev);
1243 }
1244 }
1245}
1246
59a4c759
PE
1247static int ipgre_init_net(struct net *net)
1248{
1249 int err;
1250 struct ipgre_net *ign;
1251
1252 err = -ENOMEM;
eb8ce741 1253 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
59a4c759
PE
1254 if (ign == NULL)
1255 goto err_alloc;
1256
1257 err = net_assign_generic(net, ipgre_net_id, ign);
1258 if (err < 0)
1259 goto err_assign;
1260
7daa0004
PE
1261 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1262 ipgre_tunnel_setup);
1263 if (!ign->fb_tunnel_dev) {
1264 err = -ENOMEM;
1265 goto err_alloc_dev;
1266 }
1267
1268 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1269 dev_net_set(ign->fb_tunnel_dev, net);
c19e654d 1270 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1271
1272 if ((err = register_netdev(ign->fb_tunnel_dev)))
1273 goto err_reg_dev;
1274
59a4c759
PE
1275 return 0;
1276
7daa0004
PE
1277err_reg_dev:
1278 free_netdev(ign->fb_tunnel_dev);
1279err_alloc_dev:
1280 /* nothing */
59a4c759
PE
1281err_assign:
1282 kfree(ign);
1283err_alloc:
1284 return err;
1285}
1286
1287static void ipgre_exit_net(struct net *net)
1288{
1289 struct ipgre_net *ign;
1290
1291 ign = net_generic(net, ipgre_net_id);
7daa0004 1292 rtnl_lock();
eb8ce741 1293 ipgre_destroy_tunnels(ign);
7daa0004 1294 rtnl_unlock();
59a4c759
PE
1295 kfree(ign);
1296}
1297
1298static struct pernet_operations ipgre_net_ops = {
1299 .init = ipgre_init_net,
1300 .exit = ipgre_exit_net,
1301};
1da177e4 1302
c19e654d
HX
1303static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1304{
1305 __be16 flags;
1306
1307 if (!data)
1308 return 0;
1309
1310 flags = 0;
1311 if (data[IFLA_GRE_IFLAGS])
1312 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1313 if (data[IFLA_GRE_OFLAGS])
1314 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1315 if (flags & (GRE_VERSION|GRE_ROUTING))
1316 return -EINVAL;
1317
1318 return 0;
1319}
1320
e1a80002
HX
1321static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1322{
1323 __be32 daddr;
1324
1325 if (tb[IFLA_ADDRESS]) {
1326 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1327 return -EINVAL;
1328 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1329 return -EADDRNOTAVAIL;
1330 }
1331
1332 if (!data)
1333 goto out;
1334
1335 if (data[IFLA_GRE_REMOTE]) {
1336 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1337 if (!daddr)
1338 return -EINVAL;
1339 }
1340
1341out:
1342 return ipgre_tunnel_validate(tb, data);
1343}
1344
c19e654d
HX
1345static void ipgre_netlink_parms(struct nlattr *data[],
1346 struct ip_tunnel_parm *parms)
1347{
7bb82d92 1348 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1349
1350 parms->iph.protocol = IPPROTO_GRE;
1351
1352 if (!data)
1353 return;
1354
1355 if (data[IFLA_GRE_LINK])
1356 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1357
1358 if (data[IFLA_GRE_IFLAGS])
1359 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1360
1361 if (data[IFLA_GRE_OFLAGS])
1362 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1363
1364 if (data[IFLA_GRE_IKEY])
1365 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1366
1367 if (data[IFLA_GRE_OKEY])
1368 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1369
1370 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1371 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1372
1373 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1374 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1375
1376 if (data[IFLA_GRE_TTL])
1377 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1378
1379 if (data[IFLA_GRE_TOS])
1380 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1381
1382 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1383 parms->iph.frag_off = htons(IP_DF);
1384}
1385
e1a80002
HX
1386static int ipgre_tap_init(struct net_device *dev)
1387{
1388 struct ip_tunnel *tunnel;
1389
1390 tunnel = netdev_priv(dev);
1391
1392 tunnel->dev = dev;
1393 strcpy(tunnel->parms.name, dev->name);
1394
1395 ipgre_tunnel_bind_dev(dev);
1396
1397 return 0;
1398}
1399
1400static void ipgre_tap_setup(struct net_device *dev)
1401{
1402
1403 ether_setup(dev);
1404
1405 dev->init = ipgre_tap_init;
1406 dev->uninit = ipgre_tunnel_uninit;
1407 dev->destructor = free_netdev;
1408 dev->hard_start_xmit = ipgre_tunnel_xmit;
1409 dev->change_mtu = ipgre_tunnel_change_mtu;
1410
1411 dev->iflink = 0;
1412 dev->features |= NETIF_F_NETNS_LOCAL;
1413}
1414
c19e654d
HX
1415static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1416 struct nlattr *data[])
1417{
1418 struct ip_tunnel *nt;
1419 struct net *net = dev_net(dev);
1420 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1421 int mtu;
1422 int err;
1423
1424 nt = netdev_priv(dev);
1425 ipgre_netlink_parms(data, &nt->parms);
1426
e1a80002 1427 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1428 return -EEXIST;
1429
e1a80002
HX
1430 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1431 random_ether_addr(dev->dev_addr);
1432
c19e654d
HX
1433 mtu = ipgre_tunnel_bind_dev(dev);
1434 if (!tb[IFLA_MTU])
1435 dev->mtu = mtu;
1436
1437 err = register_netdevice(dev);
1438 if (err)
1439 goto out;
1440
1441 dev_hold(dev);
1442 ipgre_tunnel_link(ign, nt);
1443
1444out:
1445 return err;
1446}
1447
1448static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1449 struct nlattr *data[])
1450{
1451 struct ip_tunnel *t, *nt;
1452 struct net *net = dev_net(dev);
1453 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1454 struct ip_tunnel_parm p;
1455 int mtu;
1456
1457 if (dev == ign->fb_tunnel_dev)
1458 return -EINVAL;
1459
1460 nt = netdev_priv(dev);
1461 ipgre_netlink_parms(data, &p);
1462
1463 t = ipgre_tunnel_locate(net, &p, 0);
1464
1465 if (t) {
1466 if (t->dev != dev)
1467 return -EEXIST;
1468 } else {
1469 unsigned nflags = 0;
1470
1471 t = nt;
1472
1473 if (ipv4_is_multicast(p.iph.daddr))
1474 nflags = IFF_BROADCAST;
1475 else if (p.iph.daddr)
1476 nflags = IFF_POINTOPOINT;
1477
1478 if ((dev->flags ^ nflags) &
1479 (IFF_POINTOPOINT | IFF_BROADCAST))
1480 return -EINVAL;
1481
1482 ipgre_tunnel_unlink(ign, t);
1483 t->parms.iph.saddr = p.iph.saddr;
1484 t->parms.iph.daddr = p.iph.daddr;
1485 t->parms.i_key = p.i_key;
1486 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1487 memcpy(dev->broadcast, &p.iph.daddr, 4);
1488 ipgre_tunnel_link(ign, t);
1489 netdev_state_change(dev);
1490 }
1491
1492 t->parms.o_key = p.o_key;
1493 t->parms.iph.ttl = p.iph.ttl;
1494 t->parms.iph.tos = p.iph.tos;
1495 t->parms.iph.frag_off = p.iph.frag_off;
1496
1497 if (t->parms.link != p.link) {
1498 t->parms.link = p.link;
1499 mtu = ipgre_tunnel_bind_dev(dev);
1500 if (!tb[IFLA_MTU])
1501 dev->mtu = mtu;
1502 netdev_state_change(dev);
1503 }
1504
1505 return 0;
1506}
1507
1508static size_t ipgre_get_size(const struct net_device *dev)
1509{
1510 return
1511 /* IFLA_GRE_LINK */
1512 nla_total_size(4) +
1513 /* IFLA_GRE_IFLAGS */
1514 nla_total_size(2) +
1515 /* IFLA_GRE_OFLAGS */
1516 nla_total_size(2) +
1517 /* IFLA_GRE_IKEY */
1518 nla_total_size(4) +
1519 /* IFLA_GRE_OKEY */
1520 nla_total_size(4) +
1521 /* IFLA_GRE_LOCAL */
1522 nla_total_size(4) +
1523 /* IFLA_GRE_REMOTE */
1524 nla_total_size(4) +
1525 /* IFLA_GRE_TTL */
1526 nla_total_size(1) +
1527 /* IFLA_GRE_TOS */
1528 nla_total_size(1) +
1529 /* IFLA_GRE_PMTUDISC */
1530 nla_total_size(1) +
1531 0;
1532}
1533
1534static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1535{
1536 struct ip_tunnel *t = netdev_priv(dev);
1537 struct ip_tunnel_parm *p = &t->parms;
1538
1539 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1540 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1541 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1542 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1543 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1544 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1545 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1546 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1547 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1548 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1549
1550 return 0;
1551
1552nla_put_failure:
1553 return -EMSGSIZE;
1554}
1555
1556static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1557 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1558 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1559 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1560 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1561 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1562 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1563 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1564 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1565 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1566 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1567};
1568
1569static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1570 .kind = "gre",
1571 .maxtype = IFLA_GRE_MAX,
1572 .policy = ipgre_policy,
1573 .priv_size = sizeof(struct ip_tunnel),
1574 .setup = ipgre_tunnel_setup,
1575 .validate = ipgre_tunnel_validate,
1576 .newlink = ipgre_newlink,
1577 .changelink = ipgre_changelink,
1578 .get_size = ipgre_get_size,
1579 .fill_info = ipgre_fill_info,
1580};
1581
e1a80002
HX
1582static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1583 .kind = "gretap",
1584 .maxtype = IFLA_GRE_MAX,
1585 .policy = ipgre_policy,
1586 .priv_size = sizeof(struct ip_tunnel),
1587 .setup = ipgre_tap_setup,
1588 .validate = ipgre_tap_validate,
1589 .newlink = ipgre_newlink,
1590 .changelink = ipgre_changelink,
1591 .get_size = ipgre_get_size,
1592 .fill_info = ipgre_fill_info,
1593};
1594
1da177e4
LT
1595/*
1596 * And now the modules code and kernel interface.
1597 */
1598
1599static int __init ipgre_init(void)
1600{
1601 int err;
1602
1603 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1604
1605 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1606 printk(KERN_INFO "ipgre init: can't add protocol\n");
1607 return -EAGAIN;
1608 }
1609
59a4c759
PE
1610 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1611 if (err < 0)
c19e654d 1612 goto gen_device_failed;
7daa0004 1613
c19e654d
HX
1614 err = rtnl_link_register(&ipgre_link_ops);
1615 if (err < 0)
1616 goto rtnl_link_failed;
1617
e1a80002
HX
1618 err = rtnl_link_register(&ipgre_tap_ops);
1619 if (err < 0)
1620 goto tap_ops_failed;
1621
c19e654d 1622out:
1da177e4 1623 return err;
c19e654d 1624
e1a80002
HX
1625tap_ops_failed:
1626 rtnl_link_unregister(&ipgre_link_ops);
c19e654d
HX
1627rtnl_link_failed:
1628 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1629gen_device_failed:
1630 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1631 goto out;
1da177e4
LT
1632}
1633
db44575f 1634static void __exit ipgre_fini(void)
1da177e4 1635{
e1a80002 1636 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d
HX
1637 rtnl_link_unregister(&ipgre_link_ops);
1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1639 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1640 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1da177e4
LT
1641}
1642
1643module_init(ipgre_init);
1644module_exit(ipgre_fini);
1645MODULE_LICENSE("GPL");
4d74f8ba
PM
1646MODULE_ALIAS_RTNL_LINK("gre");
1647MODULE_ALIAS_RTNL_LINK("gretap");