]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ip_gre.c
gre: Add netlink interface
[net-next-2.6.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
4fc268d2 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
1da177e4
LT
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
46f25dff 30#include <linux/if_ether.h>
1da177e4
LT
31
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
59a4c759
PE
42#include <net/net_namespace.h>
43#include <net/netns/generic.h>
c19e654d 44#include <net/rtnetlink.h>
1da177e4
LT
45
46#ifdef CONFIG_IPV6
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#endif
51
52/*
53 Problems & solutions
54 --------------------
55
56 1. The most important issue is detecting local dead loops.
57 They would cause complete host lockup in transmit, which
58 would be "resolved" by stack overflow or, if queueing is enabled,
59 with infinite looping in net_bh.
60
61 We cannot track such dead loops during route installation,
62 it is infeasible task. The most general solutions would be
63 to keep skb->encapsulation counter (sort of local ttl),
64 and silently drop packet when it expires. It is the best
65 solution, but it supposes maintaing new variable in ALL
66 skb, even if no tunneling is used.
67
e905a9ed 68 Current solution: t->recursion lock breaks dead loops. It looks
1da177e4
LT
69 like dev->tbusy flag, but I preferred new variable, because
70 the semantics is different. One day, when hard_start_xmit
71 will be multithreaded we will have to use skb->encapsulation.
72
73
74
75 2. Networking dead loops would not kill routers, but would really
76 kill network. IP hop limit plays role of "t->recursion" in this case,
77 if we copy it from packet being encapsulated to upper header.
78 It is very good solution, but it introduces two problems:
79
80 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
81 do not work over tunnels.
82 - traceroute does not work. I planned to relay ICMP from tunnel,
83 so that this problem would be solved and traceroute output
84 would even more informative. This idea appeared to be wrong:
85 only Linux complies to rfc1812 now (yes, guys, Linux is the only
86 true router now :-)), all routers (at least, in neighbourhood of mine)
87 return only 8 bytes of payload. It is the end.
88
89 Hence, if we want that OSPF worked or traceroute said something reasonable,
90 we should search for another solution.
91
92 One of them is to parse packet trying to detect inner encapsulation
93 made by our node. It is difficult or even impossible, especially,
94 taking into account fragmentation. TO be short, tt is not solution at all.
95
96 Current solution: The solution was UNEXPECTEDLY SIMPLE.
97 We force DF flag on tunnels with preconfigured hop limit,
98 that is ALL. :-) Well, it does not remove the problem completely,
99 but exponential growth of network traffic is changed to linear
100 (branches, that exceed pmtu are pruned) and tunnel mtu
101 fastly degrades to value <68, where looping stops.
102 Yes, it is not good if there exists a router in the loop,
103 which does not force DF, even when encapsulating packets have DF set.
104 But it is not our problem! Nobody could accuse us, we made
105 all that we could make. Even if it is your gated who injected
106 fatal route to network, even if it were you who configured
107 fatal static route: you are innocent. :-)
108
109
110
111 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
112 practically identical code. It would be good to glue them
113 together, but it is not very evident, how to make them modular.
114 sit is integral part of IPv6, ipip and gre are naturally modular.
115 We could extract common parts (hash table, ioctl etc)
116 to a separate module (ip_tunnel.c).
117
118 Alexey Kuznetsov.
119 */
120
c19e654d 121static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4
LT
122static int ipgre_tunnel_init(struct net_device *dev);
123static void ipgre_tunnel_setup(struct net_device *dev);
42aa9162 124static int ipgre_tunnel_bind_dev(struct net_device *dev);
1da177e4
LT
125
126/* Fallback tunnel: no source, no destination, no key, no options */
127
128static int ipgre_fb_tunnel_init(struct net_device *dev);
129
eb8ce741
PE
130#define HASH_SIZE 16
131
59a4c759
PE
132static int ipgre_net_id;
133struct ipgre_net {
eb8ce741
PE
134 struct ip_tunnel *tunnels[4][HASH_SIZE];
135
7daa0004 136 struct net_device *fb_tunnel_dev;
59a4c759
PE
137};
138
1da177e4
LT
139/* Tunnel hash table */
140
141/*
142 4 hash tables:
143
144 3: (remote,local)
145 2: (remote,*)
146 1: (*,local)
147 0: (*,*)
148
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
152
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
155 */
156
d5a0a1e3 157#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1da177e4 158
eb8ce741
PE
159#define tunnels_r_l tunnels[3]
160#define tunnels_r tunnels[2]
161#define tunnels_l tunnels[1]
162#define tunnels_wc tunnels[0]
1da177e4
LT
163
164static DEFINE_RWLOCK(ipgre_lock);
165
166/* Given src, dst and key, find appropriate for input tunnel. */
167
f57e7d5a
PE
168static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
169 __be32 remote, __be32 local, __be32 key)
1da177e4
LT
170{
171 unsigned h0 = HASH(remote);
172 unsigned h1 = HASH(key);
173 struct ip_tunnel *t;
7daa0004 174 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 175
eb8ce741 176 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
1da177e4
LT
177 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
178 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
179 return t;
180 }
181 }
eb8ce741 182 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
1da177e4
LT
183 if (remote == t->parms.iph.daddr) {
184 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
185 return t;
186 }
187 }
eb8ce741 188 for (t = ign->tunnels_l[h1]; t; t = t->next) {
1da177e4 189 if (local == t->parms.iph.saddr ||
f97c1e0c
JP
190 (local == t->parms.iph.daddr &&
191 ipv4_is_multicast(local))) {
1da177e4
LT
192 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
193 return t;
194 }
195 }
eb8ce741 196 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
1da177e4
LT
197 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
198 return t;
199 }
200
7daa0004
PE
201 if (ign->fb_tunnel_dev->flags&IFF_UP)
202 return netdev_priv(ign->fb_tunnel_dev);
1da177e4
LT
203 return NULL;
204}
205
f57e7d5a
PE
206static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
207 struct ip_tunnel_parm *parms)
1da177e4 208{
5056a1ef
YH
209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
211 __be32 key = parms->i_key;
1da177e4
LT
212 unsigned h = HASH(key);
213 int prio = 0;
214
215 if (local)
216 prio |= 1;
f97c1e0c 217 if (remote && !ipv4_is_multicast(remote)) {
1da177e4
LT
218 prio |= 2;
219 h ^= HASH(remote);
220 }
221
eb8ce741 222 return &ign->tunnels[prio][h];
1da177e4
LT
223}
224
f57e7d5a
PE
225static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
226 struct ip_tunnel *t)
5056a1ef 227{
f57e7d5a 228 return __ipgre_bucket(ign, &t->parms);
5056a1ef
YH
229}
230
f57e7d5a 231static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4 232{
f57e7d5a 233 struct ip_tunnel **tp = ipgre_bucket(ign, t);
1da177e4
LT
234
235 t->next = *tp;
236 write_lock_bh(&ipgre_lock);
237 *tp = t;
238 write_unlock_bh(&ipgre_lock);
239}
240
f57e7d5a 241static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
1da177e4
LT
242{
243 struct ip_tunnel **tp;
244
f57e7d5a 245 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
1da177e4
LT
246 if (t == *tp) {
247 write_lock_bh(&ipgre_lock);
248 *tp = t->next;
249 write_unlock_bh(&ipgre_lock);
250 break;
251 }
252 }
253}
254
f57e7d5a
PE
255static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
256 struct ip_tunnel_parm *parms, int create)
1da177e4 257{
d5a0a1e3
AV
258 __be32 remote = parms->iph.daddr;
259 __be32 local = parms->iph.saddr;
260 __be32 key = parms->i_key;
1da177e4
LT
261 struct ip_tunnel *t, **tp, *nt;
262 struct net_device *dev;
1da177e4 263 char name[IFNAMSIZ];
f57e7d5a 264 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4 265
f57e7d5a 266 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
1da177e4
LT
267 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
268 if (key == t->parms.i_key)
269 return t;
270 }
271 }
272 if (!create)
273 return NULL;
274
275 if (parms->name[0])
276 strlcpy(name, parms->name, IFNAMSIZ);
34cc7ba6
PE
277 else
278 sprintf(name, "gre%%d");
1da177e4
LT
279
280 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
281 if (!dev)
282 return NULL;
283
0b67eceb
PE
284 dev_net_set(dev, net);
285
b37d428b
PE
286 if (strchr(name, '%')) {
287 if (dev_alloc_name(dev, name) < 0)
288 goto failed_free;
289 }
290
2941a486 291 nt = netdev_priv(dev);
1da177e4 292 nt->parms = *parms;
c19e654d 293 dev->rtnl_link_ops = &ipgre_link_ops;
1da177e4 294
42aa9162
HX
295 dev->mtu = ipgre_tunnel_bind_dev(dev);
296
b37d428b
PE
297 if (register_netdevice(dev) < 0)
298 goto failed_free;
1da177e4 299
1da177e4 300 dev_hold(dev);
f57e7d5a 301 ipgre_tunnel_link(ign, nt);
1da177e4
LT
302 return nt;
303
b37d428b
PE
304failed_free:
305 free_netdev(dev);
1da177e4
LT
306 return NULL;
307}
308
309static void ipgre_tunnel_uninit(struct net_device *dev)
310{
f57e7d5a
PE
311 struct net *net = dev_net(dev);
312 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
313
314 ipgre_tunnel_unlink(ign, netdev_priv(dev));
1da177e4
LT
315 dev_put(dev);
316}
317
318
319static void ipgre_err(struct sk_buff *skb, u32 info)
320{
1da177e4 321
071f92d0 322/* All the routers (except for Linux) return only
1da177e4
LT
323 8 bytes of packet payload. It means, that precise relaying of
324 ICMP in the real Internet is absolutely infeasible.
325
326 Moreover, Cisco "wise men" put GRE key to the third word
327 in GRE header. It makes impossible maintaining even soft state for keyed
328 GRE tunnels with enabled checksum. Tell them "thank you".
329
330 Well, I wonder, rfc1812 was written by Cisco employee,
331 what the hell these idiots break standrads established
332 by themself???
333 */
334
335 struct iphdr *iph = (struct iphdr*)skb->data;
d5a0a1e3 336 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
1da177e4 337 int grehlen = (iph->ihl<<2) + 4;
88c7664f
ACM
338 const int type = icmp_hdr(skb)->type;
339 const int code = icmp_hdr(skb)->code;
1da177e4 340 struct ip_tunnel *t;
d5a0a1e3 341 __be16 flags;
1da177e4
LT
342
343 flags = p[0];
344 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
345 if (flags&(GRE_VERSION|GRE_ROUTING))
346 return;
347 if (flags&GRE_KEY) {
348 grehlen += 4;
349 if (flags&GRE_CSUM)
350 grehlen += 4;
351 }
352 }
353
354 /* If only 8 bytes returned, keyed message will be dropped here */
355 if (skb_headlen(skb) < grehlen)
356 return;
357
358 switch (type) {
359 default:
360 case ICMP_PARAMETERPROB:
361 return;
362
363 case ICMP_DEST_UNREACH:
364 switch (code) {
365 case ICMP_SR_FAILED:
366 case ICMP_PORT_UNREACH:
367 /* Impossible event. */
368 return;
369 case ICMP_FRAG_NEEDED:
370 /* Soft state for pmtu is maintained by IP core. */
371 return;
372 default:
373 /* All others are translated to HOST_UNREACH.
374 rfc2003 contains "deep thoughts" about NET_UNREACH,
375 I believe they are just ether pollution. --ANK
376 */
377 break;
378 }
379 break;
380 case ICMP_TIME_EXCEEDED:
381 if (code != ICMP_EXC_TTL)
382 return;
383 break;
384 }
385
386 read_lock(&ipgre_lock);
3b4667f3 387 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
f57e7d5a
PE
388 (flags&GRE_KEY) ?
389 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
f97c1e0c
JP
390 if (t == NULL || t->parms.iph.daddr == 0 ||
391 ipv4_is_multicast(t->parms.iph.daddr))
1da177e4
LT
392 goto out;
393
394 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
395 goto out;
396
397 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
398 t->err_count++;
399 else
400 t->err_count = 1;
401 t->err_time = jiffies;
402out:
403 read_unlock(&ipgre_lock);
404 return;
1da177e4
LT
405}
406
407static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
408{
409 if (INET_ECN_is_ce(iph->tos)) {
410 if (skb->protocol == htons(ETH_P_IP)) {
eddc9ec5 411 IP_ECN_set_ce(ip_hdr(skb));
1da177e4 412 } else if (skb->protocol == htons(ETH_P_IPV6)) {
0660e03f 413 IP6_ECN_set_ce(ipv6_hdr(skb));
1da177e4
LT
414 }
415 }
416}
417
418static inline u8
419ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
420{
421 u8 inner = 0;
422 if (skb->protocol == htons(ETH_P_IP))
423 inner = old_iph->tos;
424 else if (skb->protocol == htons(ETH_P_IPV6))
425 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
426 return INET_ECN_encapsulate(tos, inner);
427}
428
429static int ipgre_rcv(struct sk_buff *skb)
430{
431 struct iphdr *iph;
432 u8 *h;
d5a0a1e3 433 __be16 flags;
d3bc23e7 434 __sum16 csum = 0;
d5a0a1e3 435 __be32 key = 0;
1da177e4
LT
436 u32 seqno = 0;
437 struct ip_tunnel *tunnel;
438 int offset = 4;
439
440 if (!pskb_may_pull(skb, 16))
441 goto drop_nolock;
442
eddc9ec5 443 iph = ip_hdr(skb);
1da177e4 444 h = skb->data;
d5a0a1e3 445 flags = *(__be16*)h;
1da177e4
LT
446
447 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
448 /* - Version must be 0.
449 - We do not support routing headers.
450 */
451 if (flags&(GRE_VERSION|GRE_ROUTING))
452 goto drop_nolock;
453
454 if (flags&GRE_CSUM) {
fb286bb2 455 switch (skb->ip_summed) {
84fa7933 456 case CHECKSUM_COMPLETE:
d3bc23e7 457 csum = csum_fold(skb->csum);
fb286bb2
HX
458 if (!csum)
459 break;
460 /* fall through */
461 case CHECKSUM_NONE:
462 skb->csum = 0;
463 csum = __skb_checksum_complete(skb);
84fa7933 464 skb->ip_summed = CHECKSUM_COMPLETE;
1da177e4
LT
465 }
466 offset += 4;
467 }
468 if (flags&GRE_KEY) {
d5a0a1e3 469 key = *(__be32*)(h + offset);
1da177e4
LT
470 offset += 4;
471 }
472 if (flags&GRE_SEQ) {
d5a0a1e3 473 seqno = ntohl(*(__be32*)(h + offset));
1da177e4
LT
474 offset += 4;
475 }
476 }
477
478 read_lock(&ipgre_lock);
3b4667f3 479 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
f57e7d5a 480 iph->saddr, iph->daddr, key)) != NULL) {
addd68eb
PE
481 struct net_device_stats *stats = &tunnel->dev->stats;
482
1da177e4
LT
483 secpath_reset(skb);
484
d5a0a1e3 485 skb->protocol = *(__be16*)(h + 2);
1da177e4
LT
486 /* WCCP version 1 and 2 protocol decoding.
487 * - Change protocol to IP
488 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
489 */
490 if (flags == 0 &&
496c98df
YH
491 skb->protocol == htons(ETH_P_WCCP)) {
492 skb->protocol = htons(ETH_P_IP);
e905a9ed 493 if ((*(h + offset) & 0xF0) != 0x40)
1da177e4
LT
494 offset += 4;
495 }
496
1d069167 497 skb->mac_header = skb->network_header;
4209fb60
ACM
498 __pskb_pull(skb, offset);
499 skb_reset_network_header(skb);
9c70220b 500 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1da177e4
LT
501 skb->pkt_type = PACKET_HOST;
502#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 503 if (ipv4_is_multicast(iph->daddr)) {
1da177e4 504 /* Looped back packet, drop it! */
ee6b9673 505 if (skb->rtable->fl.iif == 0)
1da177e4 506 goto drop;
addd68eb 507 stats->multicast++;
1da177e4
LT
508 skb->pkt_type = PACKET_BROADCAST;
509 }
510#endif
511
512 if (((flags&GRE_CSUM) && csum) ||
513 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
addd68eb
PE
514 stats->rx_crc_errors++;
515 stats->rx_errors++;
1da177e4
LT
516 goto drop;
517 }
518 if (tunnel->parms.i_flags&GRE_SEQ) {
519 if (!(flags&GRE_SEQ) ||
520 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
addd68eb
PE
521 stats->rx_fifo_errors++;
522 stats->rx_errors++;
1da177e4
LT
523 goto drop;
524 }
525 tunnel->i_seqno = seqno + 1;
526 }
addd68eb
PE
527 stats->rx_packets++;
528 stats->rx_bytes += skb->len;
1da177e4
LT
529 skb->dev = tunnel->dev;
530 dst_release(skb->dst);
531 skb->dst = NULL;
532 nf_reset(skb);
533 ipgre_ecn_decapsulate(iph, skb);
534 netif_rx(skb);
535 read_unlock(&ipgre_lock);
536 return(0);
537 }
45af08be 538 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4
LT
539
540drop:
541 read_unlock(&ipgre_lock);
542drop_nolock:
543 kfree_skb(skb);
544 return(0);
545}
546
547static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
548{
2941a486 549 struct ip_tunnel *tunnel = netdev_priv(dev);
addd68eb 550 struct net_device_stats *stats = &tunnel->dev->stats;
eddc9ec5 551 struct iphdr *old_iph = ip_hdr(skb);
1da177e4
LT
552 struct iphdr *tiph;
553 u8 tos;
d5a0a1e3 554 __be16 df;
1da177e4
LT
555 struct rtable *rt; /* Route to the other host */
556 struct net_device *tdev; /* Device to other host */
557 struct iphdr *iph; /* Our new IP header */
c2636b4d 558 unsigned int max_headroom; /* The extra header space needed */
1da177e4 559 int gre_hlen;
d5a0a1e3 560 __be32 dst;
1da177e4
LT
561 int mtu;
562
563 if (tunnel->recursion++) {
addd68eb 564 stats->collisions++;
1da177e4
LT
565 goto tx_error;
566 }
567
3b04ddde 568 if (dev->header_ops) {
1da177e4
LT
569 gre_hlen = 0;
570 tiph = (struct iphdr*)skb->data;
571 } else {
572 gre_hlen = tunnel->hlen;
573 tiph = &tunnel->parms.iph;
574 }
575
576 if ((dst = tiph->daddr) == 0) {
577 /* NBMA tunnel */
578
579 if (skb->dst == NULL) {
addd68eb 580 stats->tx_fifo_errors++;
1da177e4
LT
581 goto tx_error;
582 }
583
584 if (skb->protocol == htons(ETH_P_IP)) {
ee6b9673 585 rt = skb->rtable;
1da177e4
LT
586 if ((dst = rt->rt_gateway) == 0)
587 goto tx_error_icmp;
588 }
589#ifdef CONFIG_IPV6
590 else if (skb->protocol == htons(ETH_P_IPV6)) {
591 struct in6_addr *addr6;
592 int addr_type;
593 struct neighbour *neigh = skb->dst->neighbour;
594
595 if (neigh == NULL)
596 goto tx_error;
597
598 addr6 = (struct in6_addr*)&neigh->primary_key;
599 addr_type = ipv6_addr_type(addr6);
600
601 if (addr_type == IPV6_ADDR_ANY) {
0660e03f 602 addr6 = &ipv6_hdr(skb)->daddr;
1da177e4
LT
603 addr_type = ipv6_addr_type(addr6);
604 }
605
606 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
607 goto tx_error_icmp;
608
609 dst = addr6->s6_addr32[3];
610 }
611#endif
612 else
613 goto tx_error;
614 }
615
616 tos = tiph->tos;
617 if (tos&1) {
618 if (skb->protocol == htons(ETH_P_IP))
619 tos = old_iph->tos;
620 tos &= ~1;
621 }
622
623 {
624 struct flowi fl = { .oif = tunnel->parms.link,
625 .nl_u = { .ip4_u =
626 { .daddr = dst,
627 .saddr = tiph->saddr,
628 .tos = RT_TOS(tos) } },
629 .proto = IPPROTO_GRE };
96635522 630 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
addd68eb 631 stats->tx_carrier_errors++;
1da177e4
LT
632 goto tx_error;
633 }
634 }
635 tdev = rt->u.dst.dev;
636
637 if (tdev == dev) {
638 ip_rt_put(rt);
addd68eb 639 stats->collisions++;
1da177e4
LT
640 goto tx_error;
641 }
642
643 df = tiph->frag_off;
644 if (df)
c95b819a 645 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
1da177e4
LT
646 else
647 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
648
649 if (skb->dst)
650 skb->dst->ops->update_pmtu(skb->dst, mtu);
651
652 if (skb->protocol == htons(ETH_P_IP)) {
653 df |= (old_iph->frag_off&htons(IP_DF));
654
655 if ((old_iph->frag_off&htons(IP_DF)) &&
656 mtu < ntohs(old_iph->tot_len)) {
657 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
658 ip_rt_put(rt);
659 goto tx_error;
660 }
661 }
662#ifdef CONFIG_IPV6
663 else if (skb->protocol == htons(ETH_P_IPV6)) {
664 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
665
666 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
f97c1e0c
JP
667 if ((tunnel->parms.iph.daddr &&
668 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
1da177e4
LT
669 rt6->rt6i_dst.plen == 128) {
670 rt6->rt6i_flags |= RTF_MODIFIED;
671 skb->dst->metrics[RTAX_MTU-1] = mtu;
672 }
673 }
674
675 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
676 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
677 ip_rt_put(rt);
678 goto tx_error;
679 }
680 }
681#endif
682
683 if (tunnel->err_count > 0) {
684 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
685 tunnel->err_count--;
686
687 dst_link_failure(skb);
688 } else
689 tunnel->err_count = 0;
690 }
691
692 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
693
cfbba49d
PM
694 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
695 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1da177e4
LT
696 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
697 if (!new_skb) {
698 ip_rt_put(rt);
e905a9ed 699 stats->tx_dropped++;
1da177e4
LT
700 dev_kfree_skb(skb);
701 tunnel->recursion--;
702 return 0;
703 }
704 if (skb->sk)
705 skb_set_owner_w(new_skb, skb->sk);
706 dev_kfree_skb(skb);
707 skb = new_skb;
eddc9ec5 708 old_iph = ip_hdr(skb);
1da177e4
LT
709 }
710
b0e380b1 711 skb->transport_header = skb->network_header;
e2d1bca7
ACM
712 skb_push(skb, gre_hlen);
713 skb_reset_network_header(skb);
1da177e4 714 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
48d5cad8
PM
715 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
716 IPSKB_REROUTED);
1da177e4
LT
717 dst_release(skb->dst);
718 skb->dst = &rt->u.dst;
719
720 /*
721 * Push down and install the IPIP header.
722 */
723
eddc9ec5 724 iph = ip_hdr(skb);
1da177e4
LT
725 iph->version = 4;
726 iph->ihl = sizeof(struct iphdr) >> 2;
727 iph->frag_off = df;
728 iph->protocol = IPPROTO_GRE;
729 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
730 iph->daddr = rt->rt_dst;
731 iph->saddr = rt->rt_src;
732
733 if ((iph->ttl = tiph->ttl) == 0) {
734 if (skb->protocol == htons(ETH_P_IP))
735 iph->ttl = old_iph->ttl;
736#ifdef CONFIG_IPV6
737 else if (skb->protocol == htons(ETH_P_IPV6))
738 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
739#endif
740 else
741 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
742 }
743
d5a0a1e3
AV
744 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
745 ((__be16*)(iph+1))[1] = skb->protocol;
1da177e4
LT
746
747 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
d5a0a1e3 748 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1da177e4
LT
749
750 if (tunnel->parms.o_flags&GRE_SEQ) {
751 ++tunnel->o_seqno;
752 *ptr = htonl(tunnel->o_seqno);
753 ptr--;
754 }
755 if (tunnel->parms.o_flags&GRE_KEY) {
756 *ptr = tunnel->parms.o_key;
757 ptr--;
758 }
759 if (tunnel->parms.o_flags&GRE_CSUM) {
760 *ptr = 0;
5f92a738 761 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1da177e4
LT
762 }
763 }
764
765 nf_reset(skb);
766
767 IPTUNNEL_XMIT();
768 tunnel->recursion--;
769 return 0;
770
771tx_error_icmp:
772 dst_link_failure(skb);
773
774tx_error:
775 stats->tx_errors++;
776 dev_kfree_skb(skb);
777 tunnel->recursion--;
778 return 0;
779}
780
42aa9162 781static int ipgre_tunnel_bind_dev(struct net_device *dev)
ee34c1eb
MS
782{
783 struct net_device *tdev = NULL;
784 struct ip_tunnel *tunnel;
785 struct iphdr *iph;
786 int hlen = LL_MAX_HEADER;
787 int mtu = ETH_DATA_LEN;
788 int addend = sizeof(struct iphdr) + 4;
789
790 tunnel = netdev_priv(dev);
791 iph = &tunnel->parms.iph;
792
c95b819a 793 /* Guess output device to choose reasonable mtu and needed_headroom */
ee34c1eb
MS
794
795 if (iph->daddr) {
796 struct flowi fl = { .oif = tunnel->parms.link,
797 .nl_u = { .ip4_u =
798 { .daddr = iph->daddr,
799 .saddr = iph->saddr,
800 .tos = RT_TOS(iph->tos) } },
801 .proto = IPPROTO_GRE };
802 struct rtable *rt;
96635522 803 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
ee34c1eb
MS
804 tdev = rt->u.dst.dev;
805 ip_rt_put(rt);
806 }
807 dev->flags |= IFF_POINTOPOINT;
808 }
809
810 if (!tdev && tunnel->parms.link)
96635522 811 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
ee34c1eb
MS
812
813 if (tdev) {
c95b819a 814 hlen = tdev->hard_header_len + tdev->needed_headroom;
ee34c1eb
MS
815 mtu = tdev->mtu;
816 }
817 dev->iflink = tunnel->parms.link;
818
819 /* Precalculate GRE options length */
820 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
821 if (tunnel->parms.o_flags&GRE_CSUM)
822 addend += 4;
823 if (tunnel->parms.o_flags&GRE_KEY)
824 addend += 4;
825 if (tunnel->parms.o_flags&GRE_SEQ)
826 addend += 4;
827 }
c95b819a 828 dev->needed_headroom = addend + hlen;
42aa9162
HX
829 mtu -= dev->hard_header_len - addend;
830
831 if (mtu < 68)
832 mtu = 68;
833
ee34c1eb
MS
834 tunnel->hlen = addend;
835
42aa9162 836 return mtu;
ee34c1eb
MS
837}
838
1da177e4
LT
839static int
840ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
841{
842 int err = 0;
843 struct ip_tunnel_parm p;
844 struct ip_tunnel *t;
f57e7d5a
PE
845 struct net *net = dev_net(dev);
846 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1da177e4
LT
847
848 switch (cmd) {
849 case SIOCGETTUNNEL:
850 t = NULL;
7daa0004 851 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
852 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
853 err = -EFAULT;
854 break;
855 }
f57e7d5a 856 t = ipgre_tunnel_locate(net, &p, 0);
1da177e4
LT
857 }
858 if (t == NULL)
2941a486 859 t = netdev_priv(dev);
1da177e4
LT
860 memcpy(&p, &t->parms, sizeof(p));
861 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
862 err = -EFAULT;
863 break;
864
865 case SIOCADDTUNNEL:
866 case SIOCCHGTUNNEL:
867 err = -EPERM;
868 if (!capable(CAP_NET_ADMIN))
869 goto done;
870
871 err = -EFAULT;
872 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
873 goto done;
874
875 err = -EINVAL;
876 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
877 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
878 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
879 goto done;
880 if (p.iph.ttl)
881 p.iph.frag_off |= htons(IP_DF);
882
883 if (!(p.i_flags&GRE_KEY))
884 p.i_key = 0;
885 if (!(p.o_flags&GRE_KEY))
886 p.o_key = 0;
887
f57e7d5a 888 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1da177e4 889
7daa0004 890 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1da177e4
LT
891 if (t != NULL) {
892 if (t->dev != dev) {
893 err = -EEXIST;
894 break;
895 }
896 } else {
897 unsigned nflags=0;
898
2941a486 899 t = netdev_priv(dev);
1da177e4 900
f97c1e0c 901 if (ipv4_is_multicast(p.iph.daddr))
1da177e4
LT
902 nflags = IFF_BROADCAST;
903 else if (p.iph.daddr)
904 nflags = IFF_POINTOPOINT;
905
906 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
907 err = -EINVAL;
908 break;
909 }
f57e7d5a 910 ipgre_tunnel_unlink(ign, t);
1da177e4
LT
911 t->parms.iph.saddr = p.iph.saddr;
912 t->parms.iph.daddr = p.iph.daddr;
913 t->parms.i_key = p.i_key;
914 t->parms.o_key = p.o_key;
915 memcpy(dev->dev_addr, &p.iph.saddr, 4);
916 memcpy(dev->broadcast, &p.iph.daddr, 4);
f57e7d5a 917 ipgre_tunnel_link(ign, t);
1da177e4
LT
918 netdev_state_change(dev);
919 }
920 }
921
922 if (t) {
923 err = 0;
924 if (cmd == SIOCCHGTUNNEL) {
925 t->parms.iph.ttl = p.iph.ttl;
926 t->parms.iph.tos = p.iph.tos;
927 t->parms.iph.frag_off = p.iph.frag_off;
ee34c1eb
MS
928 if (t->parms.link != p.link) {
929 t->parms.link = p.link;
42aa9162 930 dev->mtu = ipgre_tunnel_bind_dev(dev);
ee34c1eb
MS
931 netdev_state_change(dev);
932 }
1da177e4
LT
933 }
934 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
935 err = -EFAULT;
936 } else
937 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
938 break;
939
940 case SIOCDELTUNNEL:
941 err = -EPERM;
942 if (!capable(CAP_NET_ADMIN))
943 goto done;
944
7daa0004 945 if (dev == ign->fb_tunnel_dev) {
1da177e4
LT
946 err = -EFAULT;
947 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
948 goto done;
949 err = -ENOENT;
f57e7d5a 950 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1da177e4
LT
951 goto done;
952 err = -EPERM;
7daa0004 953 if (t == netdev_priv(ign->fb_tunnel_dev))
1da177e4
LT
954 goto done;
955 dev = t->dev;
956 }
22f8cde5
SH
957 unregister_netdevice(dev);
958 err = 0;
1da177e4
LT
959 break;
960
961 default:
962 err = -EINVAL;
963 }
964
965done:
966 return err;
967}
968
1da177e4
LT
969static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
970{
2941a486 971 struct ip_tunnel *tunnel = netdev_priv(dev);
c95b819a
HX
972 if (new_mtu < 68 ||
973 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1da177e4
LT
974 return -EINVAL;
975 dev->mtu = new_mtu;
976 return 0;
977}
978
1da177e4
LT
979/* Nice toy. Unfortunately, useless in real life :-)
980 It allows to construct virtual multiprotocol broadcast "LAN"
981 over the Internet, provided multicast routing is tuned.
982
983
984 I have no idea was this bicycle invented before me,
985 so that I had to set ARPHRD_IPGRE to a random value.
986 I have an impression, that Cisco could make something similar,
987 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 988
1da177e4
LT
989 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
990 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
991
992 ping -t 255 224.66.66.66
993
994 If nobody answers, mbone does not work.
995
996 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
997 ip addr add 10.66.66.<somewhat>/24 dev Universe
998 ifconfig Universe up
999 ifconfig Universe add fe80::<Your_real_addr>/10
1000 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1001 ftp 10.66.66.66
1002 ...
1003 ftp fec0:6666:6666::193.233.7.65
1004 ...
1005
1006 */
1007
3b04ddde
SH
1008static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1009 unsigned short type,
1010 const void *daddr, const void *saddr, unsigned len)
1da177e4 1011{
2941a486 1012 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1013 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
d5a0a1e3 1014 __be16 *p = (__be16*)(iph+1);
1da177e4
LT
1015
1016 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1017 p[0] = t->parms.o_flags;
1018 p[1] = htons(type);
1019
1020 /*
e905a9ed 1021 * Set the source hardware address.
1da177e4 1022 */
e905a9ed 1023
1da177e4
LT
1024 if (saddr)
1025 memcpy(&iph->saddr, saddr, 4);
1026
1027 if (daddr) {
1028 memcpy(&iph->daddr, daddr, 4);
1029 return t->hlen;
1030 }
f97c1e0c 1031 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1da177e4 1032 return t->hlen;
e905a9ed 1033
1da177e4
LT
1034 return -t->hlen;
1035}
1036
6a5f44d7
TT
1037static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1038{
1039 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1040 memcpy(haddr, &iph->saddr, 4);
1041 return 4;
1042}
1043
3b04ddde
SH
1044static const struct header_ops ipgre_header_ops = {
1045 .create = ipgre_header,
6a5f44d7 1046 .parse = ipgre_header_parse,
3b04ddde
SH
1047};
1048
6a5f44d7 1049#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
1050static int ipgre_open(struct net_device *dev)
1051{
2941a486 1052 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 1053
f97c1e0c 1054 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1da177e4
LT
1055 struct flowi fl = { .oif = t->parms.link,
1056 .nl_u = { .ip4_u =
1057 { .daddr = t->parms.iph.daddr,
1058 .saddr = t->parms.iph.saddr,
1059 .tos = RT_TOS(t->parms.iph.tos) } },
1060 .proto = IPPROTO_GRE };
1061 struct rtable *rt;
96635522 1062 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1da177e4
LT
1063 return -EADDRNOTAVAIL;
1064 dev = rt->u.dst.dev;
1065 ip_rt_put(rt);
e5ed6399 1066 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
1067 return -EADDRNOTAVAIL;
1068 t->mlink = dev->ifindex;
e5ed6399 1069 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
1070 }
1071 return 0;
1072}
1073
1074static int ipgre_close(struct net_device *dev)
1075{
2941a486 1076 struct ip_tunnel *t = netdev_priv(dev);
f97c1e0c 1077 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 1078 struct in_device *in_dev;
c346dca1 1079 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1da177e4
LT
1080 if (in_dev) {
1081 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1082 in_dev_put(in_dev);
1083 }
1084 }
1085 return 0;
1086}
1087
1088#endif
1089
1090static void ipgre_tunnel_setup(struct net_device *dev)
1091{
c19e654d 1092 dev->init = ipgre_tunnel_init;
1da177e4
LT
1093 dev->uninit = ipgre_tunnel_uninit;
1094 dev->destructor = free_netdev;
1095 dev->hard_start_xmit = ipgre_tunnel_xmit;
1da177e4
LT
1096 dev->do_ioctl = ipgre_tunnel_ioctl;
1097 dev->change_mtu = ipgre_tunnel_change_mtu;
1098
1099 dev->type = ARPHRD_IPGRE;
c95b819a 1100 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 1101 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1da177e4
LT
1102 dev->flags = IFF_NOARP;
1103 dev->iflink = 0;
1104 dev->addr_len = 4;
0b67eceb 1105 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
1106}
1107
1108static int ipgre_tunnel_init(struct net_device *dev)
1109{
1da177e4
LT
1110 struct ip_tunnel *tunnel;
1111 struct iphdr *iph;
1da177e4 1112
2941a486 1113 tunnel = netdev_priv(dev);
1da177e4
LT
1114 iph = &tunnel->parms.iph;
1115
1116 tunnel->dev = dev;
1117 strcpy(tunnel->parms.name, dev->name);
1118
1119 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1120 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1121
1da177e4 1122 if (iph->daddr) {
1da177e4 1123#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 1124 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
1125 if (!iph->saddr)
1126 return -EINVAL;
1127 dev->flags = IFF_BROADCAST;
3b04ddde 1128 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
1129 dev->open = ipgre_open;
1130 dev->stop = ipgre_close;
1131 }
1132#endif
ee34c1eb 1133 } else
6a5f44d7 1134 dev->header_ops = &ipgre_header_ops;
1da177e4 1135
1da177e4
LT
1136 return 0;
1137}
1138
7daa0004 1139static int ipgre_fb_tunnel_init(struct net_device *dev)
1da177e4 1140{
2941a486 1141 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 1142 struct iphdr *iph = &tunnel->parms.iph;
eb8ce741 1143 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1da177e4
LT
1144
1145 tunnel->dev = dev;
1146 strcpy(tunnel->parms.name, dev->name);
1147
1148 iph->version = 4;
1149 iph->protocol = IPPROTO_GRE;
1150 iph->ihl = 5;
1151 tunnel->hlen = sizeof(struct iphdr) + 4;
1152
1153 dev_hold(dev);
eb8ce741 1154 ign->tunnels_wc[0] = tunnel;
1da177e4
LT
1155 return 0;
1156}
1157
1158
1159static struct net_protocol ipgre_protocol = {
1160 .handler = ipgre_rcv,
1161 .err_handler = ipgre_err,
f96c148f 1162 .netns_ok = 1,
1da177e4
LT
1163};
1164
eb8ce741
PE
1165static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1166{
1167 int prio;
1168
1169 for (prio = 0; prio < 4; prio++) {
1170 int h;
1171 for (h = 0; h < HASH_SIZE; h++) {
1172 struct ip_tunnel *t;
1173 while ((t = ign->tunnels[prio][h]) != NULL)
1174 unregister_netdevice(t->dev);
1175 }
1176 }
1177}
1178
59a4c759
PE
1179static int ipgre_init_net(struct net *net)
1180{
1181 int err;
1182 struct ipgre_net *ign;
1183
1184 err = -ENOMEM;
eb8ce741 1185 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
59a4c759
PE
1186 if (ign == NULL)
1187 goto err_alloc;
1188
1189 err = net_assign_generic(net, ipgre_net_id, ign);
1190 if (err < 0)
1191 goto err_assign;
1192
7daa0004
PE
1193 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1194 ipgre_tunnel_setup);
1195 if (!ign->fb_tunnel_dev) {
1196 err = -ENOMEM;
1197 goto err_alloc_dev;
1198 }
1199
1200 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1201 dev_net_set(ign->fb_tunnel_dev, net);
c19e654d 1202 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
7daa0004
PE
1203
1204 if ((err = register_netdev(ign->fb_tunnel_dev)))
1205 goto err_reg_dev;
1206
59a4c759
PE
1207 return 0;
1208
7daa0004
PE
1209err_reg_dev:
1210 free_netdev(ign->fb_tunnel_dev);
1211err_alloc_dev:
1212 /* nothing */
59a4c759
PE
1213err_assign:
1214 kfree(ign);
1215err_alloc:
1216 return err;
1217}
1218
1219static void ipgre_exit_net(struct net *net)
1220{
1221 struct ipgre_net *ign;
1222
1223 ign = net_generic(net, ipgre_net_id);
7daa0004 1224 rtnl_lock();
eb8ce741 1225 ipgre_destroy_tunnels(ign);
7daa0004 1226 rtnl_unlock();
59a4c759
PE
1227 kfree(ign);
1228}
1229
1230static struct pernet_operations ipgre_net_ops = {
1231 .init = ipgre_init_net,
1232 .exit = ipgre_exit_net,
1233};
1da177e4 1234
c19e654d
HX
1235static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1236{
1237 __be16 flags;
1238
1239 if (!data)
1240 return 0;
1241
1242 flags = 0;
1243 if (data[IFLA_GRE_IFLAGS])
1244 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1245 if (data[IFLA_GRE_OFLAGS])
1246 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1247 if (flags & (GRE_VERSION|GRE_ROUTING))
1248 return -EINVAL;
1249
1250 return 0;
1251}
1252
1253static void ipgre_netlink_parms(struct nlattr *data[],
1254 struct ip_tunnel_parm *parms)
1255{
1256 memset(parms, 0, sizeof(parms));
1257
1258 parms->iph.protocol = IPPROTO_GRE;
1259
1260 if (!data)
1261 return;
1262
1263 if (data[IFLA_GRE_LINK])
1264 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1265
1266 if (data[IFLA_GRE_IFLAGS])
1267 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1268
1269 if (data[IFLA_GRE_OFLAGS])
1270 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1271
1272 if (data[IFLA_GRE_IKEY])
1273 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1274
1275 if (data[IFLA_GRE_OKEY])
1276 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1277
1278 if (data[IFLA_GRE_LOCAL])
1279 memcpy(&parms->iph.saddr, nla_data(data[IFLA_GRE_LOCAL]), 4);
1280
1281 if (data[IFLA_GRE_REMOTE])
1282 memcpy(&parms->iph.daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1283
1284 if (data[IFLA_GRE_TTL])
1285 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1286
1287 if (data[IFLA_GRE_TOS])
1288 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1289
1290 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1291 parms->iph.frag_off = htons(IP_DF);
1292}
1293
1294static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1295 struct nlattr *data[])
1296{
1297 struct ip_tunnel *nt;
1298 struct net *net = dev_net(dev);
1299 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1300 int mtu;
1301 int err;
1302
1303 nt = netdev_priv(dev);
1304 ipgre_netlink_parms(data, &nt->parms);
1305
1306 if (ipgre_tunnel_locate(net, &nt->parms, 0))
1307 return -EEXIST;
1308
1309 mtu = ipgre_tunnel_bind_dev(dev);
1310 if (!tb[IFLA_MTU])
1311 dev->mtu = mtu;
1312
1313 err = register_netdevice(dev);
1314 if (err)
1315 goto out;
1316
1317 dev_hold(dev);
1318 ipgre_tunnel_link(ign, nt);
1319
1320out:
1321 return err;
1322}
1323
1324static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1325 struct nlattr *data[])
1326{
1327 struct ip_tunnel *t, *nt;
1328 struct net *net = dev_net(dev);
1329 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1330 struct ip_tunnel_parm p;
1331 int mtu;
1332
1333 if (dev == ign->fb_tunnel_dev)
1334 return -EINVAL;
1335
1336 nt = netdev_priv(dev);
1337 ipgre_netlink_parms(data, &p);
1338
1339 t = ipgre_tunnel_locate(net, &p, 0);
1340
1341 if (t) {
1342 if (t->dev != dev)
1343 return -EEXIST;
1344 } else {
1345 unsigned nflags = 0;
1346
1347 t = nt;
1348
1349 if (ipv4_is_multicast(p.iph.daddr))
1350 nflags = IFF_BROADCAST;
1351 else if (p.iph.daddr)
1352 nflags = IFF_POINTOPOINT;
1353
1354 if ((dev->flags ^ nflags) &
1355 (IFF_POINTOPOINT | IFF_BROADCAST))
1356 return -EINVAL;
1357
1358 ipgre_tunnel_unlink(ign, t);
1359 t->parms.iph.saddr = p.iph.saddr;
1360 t->parms.iph.daddr = p.iph.daddr;
1361 t->parms.i_key = p.i_key;
1362 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1363 memcpy(dev->broadcast, &p.iph.daddr, 4);
1364 ipgre_tunnel_link(ign, t);
1365 netdev_state_change(dev);
1366 }
1367
1368 t->parms.o_key = p.o_key;
1369 t->parms.iph.ttl = p.iph.ttl;
1370 t->parms.iph.tos = p.iph.tos;
1371 t->parms.iph.frag_off = p.iph.frag_off;
1372
1373 if (t->parms.link != p.link) {
1374 t->parms.link = p.link;
1375 mtu = ipgre_tunnel_bind_dev(dev);
1376 if (!tb[IFLA_MTU])
1377 dev->mtu = mtu;
1378 netdev_state_change(dev);
1379 }
1380
1381 return 0;
1382}
1383
1384static size_t ipgre_get_size(const struct net_device *dev)
1385{
1386 return
1387 /* IFLA_GRE_LINK */
1388 nla_total_size(4) +
1389 /* IFLA_GRE_IFLAGS */
1390 nla_total_size(2) +
1391 /* IFLA_GRE_OFLAGS */
1392 nla_total_size(2) +
1393 /* IFLA_GRE_IKEY */
1394 nla_total_size(4) +
1395 /* IFLA_GRE_OKEY */
1396 nla_total_size(4) +
1397 /* IFLA_GRE_LOCAL */
1398 nla_total_size(4) +
1399 /* IFLA_GRE_REMOTE */
1400 nla_total_size(4) +
1401 /* IFLA_GRE_TTL */
1402 nla_total_size(1) +
1403 /* IFLA_GRE_TOS */
1404 nla_total_size(1) +
1405 /* IFLA_GRE_PMTUDISC */
1406 nla_total_size(1) +
1407 0;
1408}
1409
1410static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1411{
1412 struct ip_tunnel *t = netdev_priv(dev);
1413 struct ip_tunnel_parm *p = &t->parms;
1414
1415 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1416 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1417 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1418 NLA_PUT_BE32(skb, IFLA_GRE_IFLAGS, p->i_flags);
1419 NLA_PUT_BE32(skb, IFLA_GRE_OFLAGS, p->o_flags);
1420 NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
1421 NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
1422 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1423 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1424 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1425
1426 return 0;
1427
1428nla_put_failure:
1429 return -EMSGSIZE;
1430}
1431
1432static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1433 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1434 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1435 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1436 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1437 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1438 [IFLA_GRE_LOCAL] = { .len = 4 },
1439 [IFLA_GRE_REMOTE] = { .len = 4 },
1440 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1441 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1442 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1443};
1444
1445static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1446 .kind = "gre",
1447 .maxtype = IFLA_GRE_MAX,
1448 .policy = ipgre_policy,
1449 .priv_size = sizeof(struct ip_tunnel),
1450 .setup = ipgre_tunnel_setup,
1451 .validate = ipgre_tunnel_validate,
1452 .newlink = ipgre_newlink,
1453 .changelink = ipgre_changelink,
1454 .get_size = ipgre_get_size,
1455 .fill_info = ipgre_fill_info,
1456};
1457
1da177e4
LT
1458/*
1459 * And now the modules code and kernel interface.
1460 */
1461
1462static int __init ipgre_init(void)
1463{
1464 int err;
1465
1466 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1467
1468 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1469 printk(KERN_INFO "ipgre init: can't add protocol\n");
1470 return -EAGAIN;
1471 }
1472
59a4c759
PE
1473 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1474 if (err < 0)
c19e654d 1475 goto gen_device_failed;
7daa0004 1476
c19e654d
HX
1477 err = rtnl_link_register(&ipgre_link_ops);
1478 if (err < 0)
1479 goto rtnl_link_failed;
1480
1481out:
1da177e4 1482 return err;
c19e654d
HX
1483
1484rtnl_link_failed:
1485 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1486gen_device_failed:
1487 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1488 goto out;
1da177e4
LT
1489}
1490
db44575f 1491static void __exit ipgre_fini(void)
1da177e4 1492{
c19e654d
HX
1493 rtnl_link_unregister(&ipgre_link_ops);
1494 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1da177e4
LT
1495 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1496 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1da177e4
LT
1497}
1498
1499module_init(ipgre_init);
1500module_exit(ipgre_fini);
1501MODULE_LICENSE("GPL");
c19e654d 1502MODULE_ALIAS("rtnl-link-gre");