]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
phylib: unsigneds go unnoticed
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
e1a80002 30#include <linux/etherdevice.h>
46f25dff 31#include <linux/if_ether.h>
1da177e4
LT
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
59a4c759
PE
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
c19e654d 45#include <net/rtnetlink.h>
1da177e4
LT
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
e905a9ed 69 Current solution: t->recursion lock breaks dead loops. It looks
1da177e4
LT
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
c19e654d 122static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
123static int ipgre_tunnel_init(struct net_device *dev);
124static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 125static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
126
127/* Fallback tunnel: no source, no destination, no key, no options */
128
eb8ce741
PE
129#define HASH_SIZE 16
130
59a4c759
PE
131static int ipgre_net_id;
132struct ipgre_net {
eb8ce741
PE
133 struct ip_tunnel *tunnels[4][HASH_SIZE];
134
7daa0004 135 struct net_device *fb_tunnel_dev;
59a4c759
PE
136};
137
1da177e4
LT
138/* Tunnel hash table */
139
140/*
141 4 hash tables:
142
143 3: (remote,local)
144 2: (remote,*)
145 1: (*,local)
146 0: (*,*)
147
148 We require exact key match i.e. if a key is present in packet
149 it will match only tunnel with the same key; if it is not present,
150 it will match only keyless tunnel.
151
152 All keysless packets, if not matched configured keyless tunnels
153 will match fallback tunnel.
154 */
155
d5a0a1e3 156#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 157
eb8ce741
PE
158#define tunnels_r_l tunnels[3]
159#define tunnels_r tunnels[2]
160#define tunnels_l tunnels[1]
161#define tunnels_wc tunnels[0]
1da177e4
LT
162
163static DEFINE_RWLOCK(ipgre_lock);
164
165/* Given src, dst and key, find appropriate for input tunnel. */
166
f57e7d5a 167static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
e1a80002
HX
168 __be32 remote, __be32 local,
169 __be32 key, __be16 gre_proto)
1da177e4
LT
170{
171 unsigned h0 = HASH(remote);
172 unsigned h1 = HASH(key);
173 struct ip_tunnel *t;
e1a80002 174 struct ip_tunnel *t2 = NULL;
7daa0004 175 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
e1a80002
HX
176 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
177 ARPHRD_ETHER : ARPHRD_IPGRE;
1da177e4 178
eb8ce741 179 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
1da177e4 180 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
e1a80002
HX
181 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
182 if (t->dev->type == dev_type)
183 return t;
184 if (t->dev->type == ARPHRD_IPGRE && !t2)
185 t2 = t;
186 }
1da177e4
LT
187 }
188 }
e1a80002 189
eb8ce741 190 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
1da177e4 191 if (remote == t->parms.iph.daddr) {
e1a80002
HX
192 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
193 if (t->dev->type == dev_type)
194 return t;
195 if (t->dev->type == ARPHRD_IPGRE && !t2)
196 t2 = t;
197 }
1da177e4
LT
198 }
199 }
e1a80002 200
eb8ce741 201 for (t = ign->tunnels_l[h1]; t; t = t->next) {
1da177e4 202 if (local == t->parms.iph.saddr ||
f97c1e0c
JP
203 (local == t->parms.iph.daddr &&
204 ipv4_is_multicast(local))) {
e1a80002
HX
205 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
206 if (t->dev->type == dev_type)
207 return t;
208 if (t->dev->type == ARPHRD_IPGRE && !t2)
209 t2 = t;
210 }
1da177e4
LT
211 }
212 }
e1a80002 213
eb8ce741 214 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
e1a80002
HX
215 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
216 if (t->dev->type == dev_type)
217 return t;
218 if (t->dev->type == ARPHRD_IPGRE && !t2)
219 t2 = t;
220 }
1da177e4
LT
221 }
222
e1a80002
HX
223 if (t2)
224 return t2;
225
7daa0004
PE
226 if (ign->fb_tunnel_dev->flags&IFF_UP)
227 return netdev_priv(ign->fb_tunnel_dev);
1da177e4
LT
228 return NULL;
229}
230
f57e7d5a
PE
231static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
232 struct ip_tunnel_parm *parms)
1da177e4 233{
5056a1ef
YH
234 __be32 remote = parms->iph.daddr;
235 __be32 local = parms->iph.saddr;
236 __be32 key = parms->i_key;
1da177e4
LT
237 unsigned h = HASH(key);
238 int prio = 0;
239
240 if (local)
241 prio |= 1;
f97c1e0c 242 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
243 prio |= 2;
244 h ^= HASH(remote);
245 }
246
eb8ce741 247 return &ign->tunnels[prio][h];
1da177e4
LT
248}
249
f57e7d5a
PE
250static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
251 struct ip_tunnel *t)
5056a1ef 252{
f57e7d5a 253 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
254}
255
f57e7d5a 256static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 257{
f57e7d5a 258 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4
LT
259
260 t->next = *tp;
261 write_lock_bh(&ipgre_lock);
262 *tp = t;
263 write_unlock_bh(&ipgre_lock);
264}
265
f57e7d5a 266static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
267{
268 struct ip_tunnel **tp;
269
f57e7d5a 270 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4
LT
271 if (t == *tp) {
272 write_lock_bh(&ipgre_lock);
273 *tp = t->next;
274 write_unlock_bh(&ipgre_lock);
275 break;
276 }
277 }
278}
279
e1a80002
HX
280static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
281 struct ip_tunnel_parm *parms,
282 int type)
1da177e4 283{
d5a0a1e3
AV
284 __be32 remote = parms->iph.daddr;
285 __be32 local = parms->iph.saddr;
286 __be32 key = parms->i_key;
e1a80002
HX
287 struct ip_tunnel *t, **tp;
288 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
289
290 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
291 if (local == t->parms.iph.saddr &&
292 remote == t->parms.iph.daddr &&
293 key == t->parms.i_key &&
294 type == t->dev->type)
295 break;
296
297 return t;
298}
299
300static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
301 struct ip_tunnel_parm *parms, int create)
302{
303 struct ip_tunnel *t, *nt;
1da177e4 304 struct net_device *dev;
1da177e4 305 char name[IFNAMSIZ];
f57e7d5a 306 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 307
e1a80002
HX
308 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
309 if (t || !create)
310 return t;
1da177e4
LT
311
312 if (parms->name[0])
313 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
314 else
315 sprintf(name, "gre%%d");
1da177e4
LT
316
317 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
318 if (!dev)
319 return NULL;
320
0b67eceb
PE
321 dev_net_set(dev, net);
322
b37d428b
PE
323 if (strchr(name, '%')) {
324 if (dev_alloc_name(dev, name) < 0)
325 goto failed_free;
326 }
327
2941a486 328 nt = netdev_priv(dev);
1da177e4 329 nt->parms = *parms;
c19e654d 330 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 331
42aa9162
HX
332 dev->mtu = ipgre_tunnel_bind_dev(dev);
333
b37d428b
PE
334 if (register_netdevice(dev) < 0)
335 goto failed_free;
1da177e4 336
1da177e4 337 dev_hold(dev);
f57e7d5a 338 ipgre_tunnel_link(ign, nt);
1da177e4
LT
339 return nt;
340
b37d428b
PE
341failed_free:
342 free_netdev(dev);
1da177e4
LT
343 return NULL;
344}
345
346static void ipgre_tunnel_uninit(struct net_device *dev)
347{
f57e7d5a
PE
348 struct net *net = dev_net(dev);
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350
351 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
352 dev_put(dev);
353}
354
355
356static void ipgre_err(struct sk_buff *skb, u32 info)
357{
1da177e4 358
071f92d0 359/* All the routers (except for Linux) return only
1da177e4
LT
360 8 bytes of packet payload. It means, that precise relaying of
361 ICMP in the real Internet is absolutely infeasible.
362
363 Moreover, Cisco "wise men" put GRE key to the third word
364 in GRE header. It makes impossible maintaining even soft state for keyed
365 GRE tunnels with enabled checksum. Tell them "thank you".
366
367 Well, I wonder, rfc1812 was written by Cisco employee,
368 what the hell these idiots break standrads established
369 by themself???
370 */
371
6ed2533e 372 struct iphdr *iph = (struct iphdr *)skb->data;
d5a0a1e3 373 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 374 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
375 const int type = icmp_hdr(skb)->type;
376 const int code = icmp_hdr(skb)->code;
1da177e4 377 struct ip_tunnel *t;
d5a0a1e3 378 __be16 flags;
1da177e4
LT
379
380 flags = p[0];
381 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
382 if (flags&(GRE_VERSION|GRE_ROUTING))
383 return;
384 if (flags&GRE_KEY) {
385 grehlen += 4;
386 if (flags&GRE_CSUM)
387 grehlen += 4;
388 }
389 }
390
391 /* If only 8 bytes returned, keyed message will be dropped here */
392 if (skb_headlen(skb) < grehlen)
393 return;
394
395 switch (type) {
396 default:
397 case ICMP_PARAMETERPROB:
398 return;
399
400 case ICMP_DEST_UNREACH:
401 switch (code) {
402 case ICMP_SR_FAILED:
403 case ICMP_PORT_UNREACH:
404 /* Impossible event. */
405 return;
406 case ICMP_FRAG_NEEDED:
407 /* Soft state for pmtu is maintained by IP core. */
408 return;
409 default:
410 /* All others are translated to HOST_UNREACH.
411 rfc2003 contains "deep thoughts" about NET_UNREACH,
412 I believe they are just ether pollution. --ANK
413 */
414 break;
415 }
416 break;
417 case ICMP_TIME_EXCEEDED:
418 if (code != ICMP_EXC_TTL)
419 return;
420 break;
421 }
422
423 read_lock(&ipgre_lock);
3b4667f3 424 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
e1a80002
HX
425 flags & GRE_KEY ?
426 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
427 p[1]);
f97c1e0c
JP
428 if (t == NULL || t->parms.iph.daddr == 0 ||
429 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
430 goto out;
431
432 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
433 goto out;
434
435 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
436 t->err_count++;
437 else
438 t->err_count = 1;
439 t->err_time = jiffies;
440out:
441 read_unlock(&ipgre_lock);
442 return;
1da177e4
LT
443}
444
445static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
446{
447 if (INET_ECN_is_ce(iph->tos)) {
448 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 449 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 450 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 451 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
452 }
453 }
454}
455
456static inline u8
457ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
458{
459 u8 inner = 0;
460 if (skb->protocol == htons(ETH_P_IP))
461 inner = old_iph->tos;
462 else if (skb->protocol == htons(ETH_P_IPV6))
463 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
464 return INET_ECN_encapsulate(tos, inner);
465}
466
467static int ipgre_rcv(struct sk_buff *skb)
468{
469 struct iphdr *iph;
470 u8 *h;
d5a0a1e3 471 __be16 flags;
d3bc23e7 472 __sum16 csum = 0;
d5a0a1e3 473 __be32 key = 0;
1da177e4
LT
474 u32 seqno = 0;
475 struct ip_tunnel *tunnel;
476 int offset = 4;
e1a80002 477 __be16 gre_proto;
64194c31 478 unsigned int len;
1da177e4
LT
479
480 if (!pskb_may_pull(skb, 16))
481 goto drop_nolock;
482
eddc9ec5 483 iph = ip_hdr(skb);
1da177e4 484 h = skb->data;
d5a0a1e3 485 flags = *(__be16*)h;
1da177e4
LT
486
487 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
488 /* - Version must be 0.
489 - We do not support routing headers.
490 */
491 if (flags&(GRE_VERSION|GRE_ROUTING))
492 goto drop_nolock;
493
494 if (flags&GRE_CSUM) {
fb286bb2 495 switch (skb->ip_summed) {
84fa7933 496 case CHECKSUM_COMPLETE:
d3bc23e7 497 csum = csum_fold(skb->csum);
fb286bb2
HX
498 if (!csum)
499 break;
500 /* fall through */
501 case CHECKSUM_NONE:
502 skb->csum = 0;
503 csum = __skb_checksum_complete(skb);
84fa7933 504 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
505 }
506 offset += 4;
507 }
508 if (flags&GRE_KEY) {
d5a0a1e3 509 key = *(__be32*)(h + offset);
1da177e4
LT
510 offset += 4;
511 }
512 if (flags&GRE_SEQ) {
d5a0a1e3 513 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
514 offset += 4;
515 }
516 }
517
e1a80002
HX
518 gre_proto = *(__be16 *)(h + 2);
519
1da177e4 520 read_lock(&ipgre_lock);
3b4667f3 521 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
e1a80002
HX
522 iph->saddr, iph->daddr, key,
523 gre_proto))) {
addd68eb
PE
524 struct net_device_stats *stats = &tunnel->dev->stats;
525
1da177e4
LT
526 secpath_reset(skb);
527
e1a80002 528 skb->protocol = gre_proto;
1da177e4
LT
529 /* WCCP version 1 and 2 protocol decoding.
530 * - Change protocol to IP
531 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
532 */
e1a80002 533 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
496c98df 534 skb->protocol = htons(ETH_P_IP);
e905a9ed 535 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
536 offset += 4;
537 }
538
1d069167 539 skb->mac_header = skb->network_header;
4209fb60 540 __pskb_pull(skb, offset);
9c70220b 541 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
542 skb->pkt_type = PACKET_HOST;
543#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 544 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 545 /* Looped back packet, drop it! */
ee6b9673 546 if (skb->rtable->fl.iif == 0)
1da177e4 547 goto drop;
addd68eb 548 stats->multicast++;
1da177e4
LT
549 skb->pkt_type = PACKET_BROADCAST;
550 }
551#endif
552
553 if (((flags&GRE_CSUM) && csum) ||
554 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
555 stats->rx_crc_errors++;
556 stats->rx_errors++;
1da177e4
LT
557 goto drop;
558 }
559 if (tunnel->parms.i_flags&GRE_SEQ) {
560 if (!(flags&GRE_SEQ) ||
561 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
562 stats->rx_fifo_errors++;
563 stats->rx_errors++;
1da177e4
LT
564 goto drop;
565 }
566 tunnel->i_seqno = seqno + 1;
567 }
e1a80002 568
64194c31
HX
569 len = skb->len;
570
e1a80002
HX
571 /* Warning: All skb pointers will be invalidated! */
572 if (tunnel->dev->type == ARPHRD_ETHER) {
573 if (!pskb_may_pull(skb, ETH_HLEN)) {
574 stats->rx_length_errors++;
575 stats->rx_errors++;
576 goto drop;
577 }
578
579 iph = ip_hdr(skb);
580 skb->protocol = eth_type_trans(skb, tunnel->dev);
581 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
582 }
583
addd68eb 584 stats->rx_packets++;
64194c31 585 stats->rx_bytes += len;
1da177e4
LT
586 skb->dev = tunnel->dev;
587 dst_release(skb->dst);
588 skb->dst = NULL;
589 nf_reset(skb);
e1a80002
HX
590
591 skb_reset_network_header(skb);
1da177e4 592 ipgre_ecn_decapsulate(iph, skb);
e1a80002 593
1da177e4
LT
594 netif_rx(skb);
595 read_unlock(&ipgre_lock);
596 return(0);
597 }
45af08be 598 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
599
600drop:
601 read_unlock(&ipgre_lock);
602drop_nolock:
603 kfree_skb(skb);
604 return(0);
605}
606
607static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
608{
2941a486 609 struct ip_tunnel *tunnel = netdev_priv(dev);
addd68eb 610 struct net_device_stats *stats = &tunnel->dev->stats;
eddc9ec5 611 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
612 struct iphdr *tiph;
613 u8 tos;
d5a0a1e3 614 __be16 df;
1da177e4
LT
615 struct rtable *rt; /* Route to the other host */
616 struct net_device *tdev; /* Device to other host */
617 struct iphdr *iph; /* Our new IP header */
c2636b4d 618 unsigned int max_headroom; /* The extra header space needed */
1da177e4 619 int gre_hlen;
d5a0a1e3 620 __be32 dst;
1da177e4
LT
621 int mtu;
622
623 if (tunnel->recursion++) {
addd68eb 624 stats->collisions++;
1da177e4
LT
625 goto tx_error;
626 }
627
e1a80002
HX
628 if (dev->type == ARPHRD_ETHER)
629 IPCB(skb)->flags = 0;
630
631 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
1da177e4 632 gre_hlen = 0;
6ed2533e 633 tiph = (struct iphdr *)skb->data;
1da177e4
LT
634 } else {
635 gre_hlen = tunnel->hlen;
636 tiph = &tunnel->parms.iph;
637 }
638
639 if ((dst = tiph->daddr) == 0) {
640 /* NBMA tunnel */
641
642 if (skb->dst == NULL) {
addd68eb 643 stats->tx_fifo_errors++;
1da177e4
LT
644 goto tx_error;
645 }
646
647 if (skb->protocol == htons(ETH_P_IP)) {
ee6b9673 648 rt = skb->rtable;
1da177e4
LT
649 if ((dst = rt->rt_gateway) == 0)
650 goto tx_error_icmp;
651 }
652#ifdef CONFIG_IPV6
653 else if (skb->protocol == htons(ETH_P_IPV6)) {
654 struct in6_addr *addr6;
655 int addr_type;
656 struct neighbour *neigh = skb->dst->neighbour;
657
658 if (neigh == NULL)
659 goto tx_error;
660
6ed2533e 661 addr6 = (struct in6_addr *)&neigh->primary_key;
1da177e4
LT
662 addr_type = ipv6_addr_type(addr6);
663
664 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 665 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
666 addr_type = ipv6_addr_type(addr6);
667 }
668
669 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
670 goto tx_error_icmp;
671
672 dst = addr6->s6_addr32[3];
673 }
674#endif
675 else
676 goto tx_error;
677 }
678
679 tos = tiph->tos;
680 if (tos&1) {
681 if (skb->protocol == htons(ETH_P_IP))
682 tos = old_iph->tos;
683 tos &= ~1;
684 }
685
686 {
687 struct flowi fl = { .oif = tunnel->parms.link,
688 .nl_u = { .ip4_u =
689 { .daddr = dst,
690 .saddr = tiph->saddr,
691 .tos = RT_TOS(tos) } },
692 .proto = IPPROTO_GRE };
96635522 693 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 694 stats->tx_carrier_errors++;
1da177e4
LT
695 goto tx_error;
696 }
697 }
698 tdev = rt->u.dst.dev;
699
700 if (tdev == dev) {
701 ip_rt_put(rt);
addd68eb 702 stats->collisions++;
1da177e4
LT
703 goto tx_error;
704 }
705
706 df = tiph->frag_off;
707 if (df)
c95b819a 708 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4
LT
709 else
710 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
711
712 if (skb->dst)
713 skb->dst->ops->update_pmtu(skb->dst, mtu);
714
715 if (skb->protocol == htons(ETH_P_IP)) {
716 df |= (old_iph->frag_off&htons(IP_DF));
717
718 if ((old_iph->frag_off&htons(IP_DF)) &&
719 mtu < ntohs(old_iph->tot_len)) {
720 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
721 ip_rt_put(rt);
722 goto tx_error;
723 }
724 }
725#ifdef CONFIG_IPV6
726 else if (skb->protocol == htons(ETH_P_IPV6)) {
6ed2533e 727 struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
1da177e4
LT
728
729 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
730 if ((tunnel->parms.iph.daddr &&
731 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
732 rt6->rt6i_dst.plen == 128) {
733 rt6->rt6i_flags |= RTF_MODIFIED;
734 skb->dst->metrics[RTAX_MTU-1] = mtu;
735 }
736 }
737
738 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
739 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
740 ip_rt_put(rt);
741 goto tx_error;
742 }
743 }
744#endif
745
746 if (tunnel->err_count > 0) {
747 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
748 tunnel->err_count--;
749
750 dst_link_failure(skb);
751 } else
752 tunnel->err_count = 0;
753 }
754
755 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
756
cfbba49d
PM
757 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
758 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
759 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
760 if (!new_skb) {
761 ip_rt_put(rt);
e905a9ed 762 stats->tx_dropped++;
1da177e4
LT
763 dev_kfree_skb(skb);
764 tunnel->recursion--;
765 return 0;
766 }
767 if (skb->sk)
768 skb_set_owner_w(new_skb, skb->sk);
769 dev_kfree_skb(skb);
770 skb = new_skb;
eddc9ec5 771 old_iph = ip_hdr(skb);
1da177e4
LT
772 }
773
64194c31 774 skb_reset_transport_header(skb);
e2d1bca7
ACM
775 skb_push(skb, gre_hlen);
776 skb_reset_network_header(skb);
1da177e4 777 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
778 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
779 IPSKB_REROUTED);
1da177e4
LT
780 dst_release(skb->dst);
781 skb->dst = &rt->u.dst;
782
783 /*
784 * Push down and install the IPIP header.
785 */
786
eddc9ec5 787 iph = ip_hdr(skb);
1da177e4
LT
788 iph->version = 4;
789 iph->ihl = sizeof(struct iphdr) >> 2;
790 iph->frag_off = df;
791 iph->protocol = IPPROTO_GRE;
792 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
793 iph->daddr = rt->rt_dst;
794 iph->saddr = rt->rt_src;
795
796 if ((iph->ttl = tiph->ttl) == 0) {
797 if (skb->protocol == htons(ETH_P_IP))
798 iph->ttl = old_iph->ttl;
799#ifdef CONFIG_IPV6
800 else if (skb->protocol == htons(ETH_P_IPV6))
6ed2533e 801 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
1da177e4
LT
802#endif
803 else
804 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
805 }
806
e1a80002
HX
807 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
808 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
809 htons(ETH_P_TEB) : skb->protocol;
1da177e4
LT
810
811 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 812 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
813
814 if (tunnel->parms.o_flags&GRE_SEQ) {
815 ++tunnel->o_seqno;
816 *ptr = htonl(tunnel->o_seqno);
817 ptr--;
818 }
819 if (tunnel->parms.o_flags&GRE_KEY) {
820 *ptr = tunnel->parms.o_key;
821 ptr--;
822 }
823 if (tunnel->parms.o_flags&GRE_CSUM) {
824 *ptr = 0;
5f92a738 825 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
826 }
827 }
828
829 nf_reset(skb);
830
831 IPTUNNEL_XMIT();
832 tunnel->recursion--;
833 return 0;
834
835tx_error_icmp:
836 dst_link_failure(skb);
837
838tx_error:
839 stats->tx_errors++;
840 dev_kfree_skb(skb);
841 tunnel->recursion--;
842 return 0;
843}
844
42aa9162 845static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
846{
847 struct net_device *tdev = NULL;
848 struct ip_tunnel *tunnel;
849 struct iphdr *iph;
850 int hlen = LL_MAX_HEADER;
851 int mtu = ETH_DATA_LEN;
852 int addend = sizeof(struct iphdr) + 4;
853
854 tunnel = netdev_priv(dev);
855 iph = &tunnel->parms.iph;
856
c95b819a 857 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
858
859 if (iph->daddr) {
860 struct flowi fl = { .oif = tunnel->parms.link,
861 .nl_u = { .ip4_u =
862 { .daddr = iph->daddr,
863 .saddr = iph->saddr,
864 .tos = RT_TOS(iph->tos) } },
865 .proto = IPPROTO_GRE };
866 struct rtable *rt;
96635522 867 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
868 tdev = rt->u.dst.dev;
869 ip_rt_put(rt);
870 }
e1a80002
HX
871
872 if (dev->type != ARPHRD_ETHER)
873 dev->flags |= IFF_POINTOPOINT;
ee34c1eb
MS
874 }
875
876 if (!tdev && tunnel->parms.link)
96635522 877 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
878
879 if (tdev) {
c95b819a 880 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
881 mtu = tdev->mtu;
882 }
883 dev->iflink = tunnel->parms.link;
884
885 /* Precalculate GRE options length */
886 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
887 if (tunnel->parms.o_flags&GRE_CSUM)
888 addend += 4;
889 if (tunnel->parms.o_flags&GRE_KEY)
890 addend += 4;
891 if (tunnel->parms.o_flags&GRE_SEQ)
892 addend += 4;
893 }
c95b819a 894 dev->needed_headroom = addend + hlen;
42aa9162
HX
895 mtu -= dev->hard_header_len - addend;
896
897 if (mtu < 68)
898 mtu = 68;
899
ee34c1eb
MS
900 tunnel->hlen = addend;
901
42aa9162 902 return mtu;
ee34c1eb
MS
903}
904
1da177e4
LT
905static int
906ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
907{
908 int err = 0;
909 struct ip_tunnel_parm p;
910 struct ip_tunnel *t;
f57e7d5a
PE
911 struct net *net = dev_net(dev);
912 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
913
914 switch (cmd) {
915 case SIOCGETTUNNEL:
916 t = NULL;
7daa0004 917 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
918 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
919 err = -EFAULT;
920 break;
921 }
f57e7d5a 922 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
923 }
924 if (t == NULL)
2941a486 925 t = netdev_priv(dev);
1da177e4
LT
926 memcpy(&p, &t->parms, sizeof(p));
927 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
928 err = -EFAULT;
929 break;
930
931 case SIOCADDTUNNEL:
932 case SIOCCHGTUNNEL:
933 err = -EPERM;
934 if (!capable(CAP_NET_ADMIN))
935 goto done;
936
937 err = -EFAULT;
938 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
939 goto done;
940
941 err = -EINVAL;
942 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
943 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
944 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
945 goto done;
946 if (p.iph.ttl)
947 p.iph.frag_off |= htons(IP_DF);
948
949 if (!(p.i_flags&GRE_KEY))
950 p.i_key = 0;
951 if (!(p.o_flags&GRE_KEY))
952 p.o_key = 0;
953
f57e7d5a 954 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 955
7daa0004 956 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
957 if (t != NULL) {
958 if (t->dev != dev) {
959 err = -EEXIST;
960 break;
961 }
962 } else {
6ed2533e 963 unsigned nflags = 0;
1da177e4 964
2941a486 965 t = netdev_priv(dev);
1da177e4 966
f97c1e0c 967 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
968 nflags = IFF_BROADCAST;
969 else if (p.iph.daddr)
970 nflags = IFF_POINTOPOINT;
971
972 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
973 err = -EINVAL;
974 break;
975 }
f57e7d5a 976 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
977 t->parms.iph.saddr = p.iph.saddr;
978 t->parms.iph.daddr = p.iph.daddr;
979 t->parms.i_key = p.i_key;
980 t->parms.o_key = p.o_key;
981 memcpy(dev->dev_addr, &p.iph.saddr, 4);
982 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 983 ipgre_tunnel_link(ign, t);
1da177e4
LT
984 netdev_state_change(dev);
985 }
986 }
987
988 if (t) {
989 err = 0;
990 if (cmd == SIOCCHGTUNNEL) {
991 t->parms.iph.ttl = p.iph.ttl;
992 t->parms.iph.tos = p.iph.tos;
993 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
994 if (t->parms.link != p.link) {
995 t->parms.link = p.link;
42aa9162 996 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
997 netdev_state_change(dev);
998 }
1da177e4
LT
999 }
1000 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1001 err = -EFAULT;
1002 } else
1003 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1004 break;
1005
1006 case SIOCDELTUNNEL:
1007 err = -EPERM;
1008 if (!capable(CAP_NET_ADMIN))
1009 goto done;
1010
7daa0004 1011 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
1012 err = -EFAULT;
1013 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1014 goto done;
1015 err = -ENOENT;
f57e7d5a 1016 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
1017 goto done;
1018 err = -EPERM;
7daa0004 1019 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
1020 goto done;
1021 dev = t->dev;
1022 }
22f8cde5
SH
1023 unregister_netdevice(dev);
1024 err = 0;
1da177e4
LT
1025 break;
1026
1027 default:
1028 err = -EINVAL;
1029 }
1030
1031done:
1032 return err;
1033}
1034
1da177e4
LT
1035static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1036{
2941a486 1037 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
1038 if (new_mtu < 68 ||
1039 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
1040 return -EINVAL;
1041 dev->mtu = new_mtu;
1042 return 0;
1043}
1044
1da177e4
LT
1045/* Nice toy. Unfortunately, useless in real life :-)
1046 It allows to construct virtual multiprotocol broadcast "LAN"
1047 over the Internet, provided multicast routing is tuned.
1048
1049
1050 I have no idea was this bicycle invented before me,
1051 so that I had to set ARPHRD_IPGRE to a random value.
1052 I have an impression, that Cisco could make something similar,
1053 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 1054
1da177e4
LT
1055 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1056 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1057
1058 ping -t 255 224.66.66.66
1059
1060 If nobody answers, mbone does not work.
1061
1062 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1063 ip addr add 10.66.66.<somewhat>/24 dev Universe
1064 ifconfig Universe up
1065 ifconfig Universe add fe80::<Your_real_addr>/10
1066 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1067 ftp 10.66.66.66
1068 ...
1069 ftp fec0:6666:6666::193.233.7.65
1070 ...
1071
1072 */
1073
3b04ddde
SH
1074static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1075 unsigned short type,
1076 const void *daddr, const void *saddr, unsigned len)
1da177e4 1077{
2941a486 1078 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1079 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1080 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1081
1082 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1083 p[0] = t->parms.o_flags;
1084 p[1] = htons(type);
1085
1086 /*
e905a9ed 1087 * Set the source hardware address.
1da177e4 1088 */
e905a9ed 1089
1da177e4
LT
1090 if (saddr)
1091 memcpy(&iph->saddr, saddr, 4);
1092
1093 if (daddr) {
1094 memcpy(&iph->daddr, daddr, 4);
1095 return t->hlen;
1096 }
f97c1e0c 1097 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1098 return t->hlen;
e905a9ed 1099
1da177e4
LT
1100 return -t->hlen;
1101}
1102
6a5f44d7
TT
1103static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1104{
6ed2533e 1105 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
1106 memcpy(haddr, &iph->saddr, 4);
1107 return 4;
1108}
1109
3b04ddde
SH
1110static const struct header_ops ipgre_header_ops = {
1111 .create = ipgre_header,
6a5f44d7 1112 .parse = ipgre_header_parse,
3b04ddde
SH
1113};
1114
6a5f44d7 1115#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1116static int ipgre_open(struct net_device *dev)
1117{
2941a486 1118 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1119
f97c1e0c 1120 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1121 struct flowi fl = { .oif = t->parms.link,
1122 .nl_u = { .ip4_u =
1123 { .daddr = t->parms.iph.daddr,
1124 .saddr = t->parms.iph.saddr,
1125 .tos = RT_TOS(t->parms.iph.tos) } },
1126 .proto = IPPROTO_GRE };
1127 struct rtable *rt;
96635522 1128 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1129 return -EADDRNOTAVAIL;
1130 dev = rt->u.dst.dev;
1131 ip_rt_put(rt);
e5ed6399 1132 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1133 return -EADDRNOTAVAIL;
1134 t->mlink = dev->ifindex;
e5ed6399 1135 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1136 }
1137 return 0;
1138}
1139
1140static int ipgre_close(struct net_device *dev)
1141{
2941a486 1142 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 1143
f97c1e0c 1144 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1145 struct in_device *in_dev;
c346dca1 1146 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1147 if (in_dev) {
1148 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1149 in_dev_put(in_dev);
1150 }
1151 }
1152 return 0;
1153}
1154
1155#endif
1156
b8c26a33
SH
1157static const struct net_device_ops ipgre_netdev_ops = {
1158 .ndo_init = ipgre_tunnel_init,
1159 .ndo_uninit = ipgre_tunnel_uninit,
1160#ifdef CONFIG_NET_IPGRE_BROADCAST
1161 .ndo_open = ipgre_open,
1162 .ndo_stop = ipgre_close,
1163#endif
1164 .ndo_start_xmit = ipgre_tunnel_xmit,
1165 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1166 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1167};
1168
1da177e4
LT
1169static void ipgre_tunnel_setup(struct net_device *dev)
1170{
b8c26a33 1171 dev->netdev_ops = &ipgre_netdev_ops;
1da177e4 1172 dev->destructor = free_netdev;
1da177e4
LT
1173
1174 dev->type = ARPHRD_IPGRE;
c95b819a 1175 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1176 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1177 dev->flags = IFF_NOARP;
1178 dev->iflink = 0;
1179 dev->addr_len = 4;
0b67eceb 1180 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
1181}
1182
1183static int ipgre_tunnel_init(struct net_device *dev)
1184{
1da177e4
LT
1185 struct ip_tunnel *tunnel;
1186 struct iphdr *iph;
1da177e4 1187
2941a486 1188 tunnel = netdev_priv(dev);
1da177e4
LT
1189 iph = &tunnel->parms.iph;
1190
1191 tunnel->dev = dev;
1192 strcpy(tunnel->parms.name, dev->name);
1193
1194 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1195 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1196
1da177e4 1197 if (iph->daddr) {
1da177e4 1198#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1199 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1200 if (!iph->saddr)
1201 return -EINVAL;
1202 dev->flags = IFF_BROADCAST;
3b04ddde 1203 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1204 }
1205#endif
ee34c1eb 1206 } else
6a5f44d7 1207 dev->header_ops = &ipgre_header_ops;
1da177e4 1208
1da177e4
LT
1209 return 0;
1210}
1211
b8c26a33 1212static void ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1213{
2941a486 1214 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1215 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1216 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1217
1218 tunnel->dev = dev;
1219 strcpy(tunnel->parms.name, dev->name);
1220
1221 iph->version = 4;
1222 iph->protocol = IPPROTO_GRE;
1223 iph->ihl = 5;
1224 tunnel->hlen = sizeof(struct iphdr) + 4;
1225
1226 dev_hold(dev);
eb8ce741 1227 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1228}
1229
1230
1231static struct net_protocol ipgre_protocol = {
1232 .handler = ipgre_rcv,
1233 .err_handler = ipgre_err,
f96c148f 1234 .netns_ok = 1,
1da177e4
LT
1235};
1236
eb8ce741
PE
1237static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1238{
1239 int prio;
1240
1241 for (prio = 0; prio < 4; prio++) {
1242 int h;
1243 for (h = 0; h < HASH_SIZE; h++) {
1244 struct ip_tunnel *t;
1245 while ((t = ign->tunnels[prio][h]) != NULL)
1246 unregister_netdevice(t->dev);
1247 }
1248 }
1249}
1250
59a4c759
PE
1251static int ipgre_init_net(struct net *net)
1252{
1253 int err;
1254 struct ipgre_net *ign;
1255
1256 err = -ENOMEM;
eb8ce741 1257 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
59a4c759
PE
1258 if (ign == NULL)
1259 goto err_alloc;
1260
1261 err = net_assign_generic(net, ipgre_net_id, ign);
1262 if (err < 0)
1263 goto err_assign;
1264
7daa0004
PE
1265 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1266 ipgre_tunnel_setup);
1267 if (!ign->fb_tunnel_dev) {
1268 err = -ENOMEM;
1269 goto err_alloc_dev;
1270 }
be77e593 1271 dev_net_set(ign->fb_tunnel_dev, net);
7daa0004 1272
b8c26a33 1273 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
c19e654d 1274 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1275
1276 if ((err = register_netdev(ign->fb_tunnel_dev)))
1277 goto err_reg_dev;
1278
59a4c759
PE
1279 return 0;
1280
7daa0004
PE
1281err_reg_dev:
1282 free_netdev(ign->fb_tunnel_dev);
1283err_alloc_dev:
1284 /* nothing */
59a4c759
PE
1285err_assign:
1286 kfree(ign);
1287err_alloc:
1288 return err;
1289}
1290
1291static void ipgre_exit_net(struct net *net)
1292{
1293 struct ipgre_net *ign;
1294
1295 ign = net_generic(net, ipgre_net_id);
7daa0004 1296 rtnl_lock();
eb8ce741 1297 ipgre_destroy_tunnels(ign);
7daa0004 1298 rtnl_unlock();
59a4c759
PE
1299 kfree(ign);
1300}
1301
1302static struct pernet_operations ipgre_net_ops = {
1303 .init = ipgre_init_net,
1304 .exit = ipgre_exit_net,
1305};
1da177e4 1306
c19e654d
HX
1307static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1308{
1309 __be16 flags;
1310
1311 if (!data)
1312 return 0;
1313
1314 flags = 0;
1315 if (data[IFLA_GRE_IFLAGS])
1316 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1317 if (data[IFLA_GRE_OFLAGS])
1318 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1319 if (flags & (GRE_VERSION|GRE_ROUTING))
1320 return -EINVAL;
1321
1322 return 0;
1323}
1324
e1a80002
HX
1325static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1326{
1327 __be32 daddr;
1328
1329 if (tb[IFLA_ADDRESS]) {
1330 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1331 return -EINVAL;
1332 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1333 return -EADDRNOTAVAIL;
1334 }
1335
1336 if (!data)
1337 goto out;
1338
1339 if (data[IFLA_GRE_REMOTE]) {
1340 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1341 if (!daddr)
1342 return -EINVAL;
1343 }
1344
1345out:
1346 return ipgre_tunnel_validate(tb, data);
1347}
1348
c19e654d
HX
1349static void ipgre_netlink_parms(struct nlattr *data[],
1350 struct ip_tunnel_parm *parms)
1351{
7bb82d92 1352 memset(parms, 0, sizeof(*parms));
c19e654d
HX
1353
1354 parms->iph.protocol = IPPROTO_GRE;
1355
1356 if (!data)
1357 return;
1358
1359 if (data[IFLA_GRE_LINK])
1360 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1361
1362 if (data[IFLA_GRE_IFLAGS])
1363 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1364
1365 if (data[IFLA_GRE_OFLAGS])
1366 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1367
1368 if (data[IFLA_GRE_IKEY])
1369 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1370
1371 if (data[IFLA_GRE_OKEY])
1372 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1373
1374 if (data[IFLA_GRE_LOCAL])
4d74f8ba 1375 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
1376
1377 if (data[IFLA_GRE_REMOTE])
4d74f8ba 1378 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
1379
1380 if (data[IFLA_GRE_TTL])
1381 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1382
1383 if (data[IFLA_GRE_TOS])
1384 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1385
1386 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1387 parms->iph.frag_off = htons(IP_DF);
1388}
1389
e1a80002
HX
1390static int ipgre_tap_init(struct net_device *dev)
1391{
1392 struct ip_tunnel *tunnel;
1393
1394 tunnel = netdev_priv(dev);
1395
1396 tunnel->dev = dev;
1397 strcpy(tunnel->parms.name, dev->name);
1398
1399 ipgre_tunnel_bind_dev(dev);
1400
1401 return 0;
1402}
1403
b8c26a33
SH
1404static const struct net_device_ops ipgre_tap_netdev_ops = {
1405 .ndo_init = ipgre_tap_init,
1406 .ndo_uninit = ipgre_tunnel_uninit,
1407 .ndo_start_xmit = ipgre_tunnel_xmit,
1408 .ndo_set_mac_address = eth_mac_addr,
1409 .ndo_validate_addr = eth_validate_addr,
1410 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1411};
1412
e1a80002
HX
1413static void ipgre_tap_setup(struct net_device *dev)
1414{
1415
1416 ether_setup(dev);
1417
b8c26a33 1418 dev->netdev_ops = &ipgre_netdev_ops;
e1a80002 1419 dev->destructor = free_netdev;
e1a80002
HX
1420
1421 dev->iflink = 0;
1422 dev->features |= NETIF_F_NETNS_LOCAL;
1423}
1424
c19e654d
HX
1425static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1426 struct nlattr *data[])
1427{
1428 struct ip_tunnel *nt;
1429 struct net *net = dev_net(dev);
1430 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1431 int mtu;
1432 int err;
1433
1434 nt = netdev_priv(dev);
1435 ipgre_netlink_parms(data, &nt->parms);
1436
e1a80002 1437 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
c19e654d
HX
1438 return -EEXIST;
1439
e1a80002
HX
1440 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1441 random_ether_addr(dev->dev_addr);
1442
c19e654d
HX
1443 mtu = ipgre_tunnel_bind_dev(dev);
1444 if (!tb[IFLA_MTU])
1445 dev->mtu = mtu;
1446
1447 err = register_netdevice(dev);
1448 if (err)
1449 goto out;
1450
1451 dev_hold(dev);
1452 ipgre_tunnel_link(ign, nt);
1453
1454out:
1455 return err;
1456}
1457
1458static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1459 struct nlattr *data[])
1460{
1461 struct ip_tunnel *t, *nt;
1462 struct net *net = dev_net(dev);
1463 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1464 struct ip_tunnel_parm p;
1465 int mtu;
1466
1467 if (dev == ign->fb_tunnel_dev)
1468 return -EINVAL;
1469
1470 nt = netdev_priv(dev);
1471 ipgre_netlink_parms(data, &p);
1472
1473 t = ipgre_tunnel_locate(net, &p, 0);
1474
1475 if (t) {
1476 if (t->dev != dev)
1477 return -EEXIST;
1478 } else {
1479 unsigned nflags = 0;
1480
1481 t = nt;
1482
1483 if (ipv4_is_multicast(p.iph.daddr))
1484 nflags = IFF_BROADCAST;
1485 else if (p.iph.daddr)
1486 nflags = IFF_POINTOPOINT;
1487
1488 if ((dev->flags ^ nflags) &
1489 (IFF_POINTOPOINT | IFF_BROADCAST))
1490 return -EINVAL;
1491
1492 ipgre_tunnel_unlink(ign, t);
1493 t->parms.iph.saddr = p.iph.saddr;
1494 t->parms.iph.daddr = p.iph.daddr;
1495 t->parms.i_key = p.i_key;
1496 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1497 memcpy(dev->broadcast, &p.iph.daddr, 4);
1498 ipgre_tunnel_link(ign, t);
1499 netdev_state_change(dev);
1500 }
1501
1502 t->parms.o_key = p.o_key;
1503 t->parms.iph.ttl = p.iph.ttl;
1504 t->parms.iph.tos = p.iph.tos;
1505 t->parms.iph.frag_off = p.iph.frag_off;
1506
1507 if (t->parms.link != p.link) {
1508 t->parms.link = p.link;
1509 mtu = ipgre_tunnel_bind_dev(dev);
1510 if (!tb[IFLA_MTU])
1511 dev->mtu = mtu;
1512 netdev_state_change(dev);
1513 }
1514
1515 return 0;
1516}
1517
1518static size_t ipgre_get_size(const struct net_device *dev)
1519{
1520 return
1521 /* IFLA_GRE_LINK */
1522 nla_total_size(4) +
1523 /* IFLA_GRE_IFLAGS */
1524 nla_total_size(2) +
1525 /* IFLA_GRE_OFLAGS */
1526 nla_total_size(2) +
1527 /* IFLA_GRE_IKEY */
1528 nla_total_size(4) +
1529 /* IFLA_GRE_OKEY */
1530 nla_total_size(4) +
1531 /* IFLA_GRE_LOCAL */
1532 nla_total_size(4) +
1533 /* IFLA_GRE_REMOTE */
1534 nla_total_size(4) +
1535 /* IFLA_GRE_TTL */
1536 nla_total_size(1) +
1537 /* IFLA_GRE_TOS */
1538 nla_total_size(1) +
1539 /* IFLA_GRE_PMTUDISC */
1540 nla_total_size(1) +
1541 0;
1542}
1543
1544static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1545{
1546 struct ip_tunnel *t = netdev_priv(dev);
1547 struct ip_tunnel_parm *p = &t->parms;
1548
1549 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1550 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1551 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
ba9e64b1
PM
1552 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1553 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
4d74f8ba
PM
1554 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1555 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
c19e654d
HX
1556 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1557 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1558 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1559
1560 return 0;
1561
1562nla_put_failure:
1563 return -EMSGSIZE;
1564}
1565
1566static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1567 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1568 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1569 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1570 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1571 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1572 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1573 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1574 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1575 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1576 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1577};
1578
1579static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1580 .kind = "gre",
1581 .maxtype = IFLA_GRE_MAX,
1582 .policy = ipgre_policy,
1583 .priv_size = sizeof(struct ip_tunnel),
1584 .setup = ipgre_tunnel_setup,
1585 .validate = ipgre_tunnel_validate,
1586 .newlink = ipgre_newlink,
1587 .changelink = ipgre_changelink,
1588 .get_size = ipgre_get_size,
1589 .fill_info = ipgre_fill_info,
1590};
1591
e1a80002
HX
1592static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1593 .kind = "gretap",
1594 .maxtype = IFLA_GRE_MAX,
1595 .policy = ipgre_policy,
1596 .priv_size = sizeof(struct ip_tunnel),
1597 .setup = ipgre_tap_setup,
1598 .validate = ipgre_tap_validate,
1599 .newlink = ipgre_newlink,
1600 .changelink = ipgre_changelink,
1601 .get_size = ipgre_get_size,
1602 .fill_info = ipgre_fill_info,
1603};
1604
1da177e4
LT
1605/*
1606 * And now the modules code and kernel interface.
1607 */
1608
1609static int __init ipgre_init(void)
1610{
1611 int err;
1612
1613 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1614
1615 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1616 printk(KERN_INFO "ipgre init: can't add protocol\n");
1617 return -EAGAIN;
1618 }
1619
59a4c759
PE
1620 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1621 if (err < 0)
c19e654d 1622 goto gen_device_failed;
7daa0004 1623
c19e654d
HX
1624 err = rtnl_link_register(&ipgre_link_ops);
1625 if (err < 0)
1626 goto rtnl_link_failed;
1627
e1a80002
HX
1628 err = rtnl_link_register(&ipgre_tap_ops);
1629 if (err < 0)
1630 goto tap_ops_failed;
1631
c19e654d 1632out:
1da177e4 1633 return err;
c19e654d 1634
e1a80002
HX
1635tap_ops_failed:
1636 rtnl_link_unregister(&ipgre_link_ops);
c19e654d
HX
1637rtnl_link_failed:
1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1639gen_device_failed:
1640 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1641 goto out;
1da177e4
LT
1642}
1643
db44575f 1644static void __exit ipgre_fini(void)
1da177e4 1645{
e1a80002 1646 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d
HX
1647 rtnl_link_unregister(&ipgre_link_ops);
1648 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1649 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1650 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1da177e4
LT
1651}
1652
1653module_init(ipgre_init);
1654module_exit(ipgre_fini);
1655MODULE_LICENSE("GPL");
4d74f8ba
PM
1656MODULE_ALIAS_RTNL_LINK("gre");
1657MODULE_ALIAS_RTNL_LINK("gretap");