]> bbs.cooldavid.org Git - net-next-2.6.git/blame_incremental - net/ipv4/ip_gre.c
gre: Add Transparent Ethernet Bridging
[net-next-2.6.git] / net / ipv4 / ip_gre.c
... / ...
CommitLineData
1/*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/capability.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
30#include <linux/etherdevice.h>
31#include <linux/if_ether.h>
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
45#include <net/rtnetlink.h>
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
69 Current solution: t->recursion lock breaks dead loops. It looks
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
122static struct rtnl_link_ops ipgre_link_ops __read_mostly;
123static int ipgre_tunnel_init(struct net_device *dev);
124static void ipgre_tunnel_setup(struct net_device *dev);
125static int ipgre_tunnel_bind_dev(struct net_device *dev);
126
127/* Fallback tunnel: no source, no destination, no key, no options */
128
129static int ipgre_fb_tunnel_init(struct net_device *dev);
130
131#define HASH_SIZE 16
132
133static int ipgre_net_id;
134struct ipgre_net {
135 struct ip_tunnel *tunnels[4][HASH_SIZE];
136
137 struct net_device *fb_tunnel_dev;
138};
139
140/* Tunnel hash table */
141
142/*
143 4 hash tables:
144
145 3: (remote,local)
146 2: (remote,*)
147 1: (*,local)
148 0: (*,*)
149
150 We require exact key match i.e. if a key is present in packet
151 it will match only tunnel with the same key; if it is not present,
152 it will match only keyless tunnel.
153
154 All keysless packets, if not matched configured keyless tunnels
155 will match fallback tunnel.
156 */
157
158#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
159
160#define tunnels_r_l tunnels[3]
161#define tunnels_r tunnels[2]
162#define tunnels_l tunnels[1]
163#define tunnels_wc tunnels[0]
164
165static DEFINE_RWLOCK(ipgre_lock);
166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
169static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
172{
173 unsigned h0 = HASH(remote);
174 unsigned h1 = HASH(key);
175 struct ip_tunnel *t;
176 struct ip_tunnel *t2 = NULL;
177 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
178 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
179 ARPHRD_ETHER : ARPHRD_IPGRE;
180
181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
182 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
183 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
184 if (t->dev->type == dev_type)
185 return t;
186 if (t->dev->type == ARPHRD_IPGRE && !t2)
187 t2 = t;
188 }
189 }
190 }
191
192 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
193 if (remote == t->parms.iph.daddr) {
194 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
195 if (t->dev->type == dev_type)
196 return t;
197 if (t->dev->type == ARPHRD_IPGRE && !t2)
198 t2 = t;
199 }
200 }
201 }
202
203 for (t = ign->tunnels_l[h1]; t; t = t->next) {
204 if (local == t->parms.iph.saddr ||
205 (local == t->parms.iph.daddr &&
206 ipv4_is_multicast(local))) {
207 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
208 if (t->dev->type == dev_type)
209 return t;
210 if (t->dev->type == ARPHRD_IPGRE && !t2)
211 t2 = t;
212 }
213 }
214 }
215
216 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
217 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
218 if (t->dev->type == dev_type)
219 return t;
220 if (t->dev->type == ARPHRD_IPGRE && !t2)
221 t2 = t;
222 }
223 }
224
225 if (t2)
226 return t2;
227
228 if (ign->fb_tunnel_dev->flags&IFF_UP)
229 return netdev_priv(ign->fb_tunnel_dev);
230 return NULL;
231}
232
233static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
234 struct ip_tunnel_parm *parms)
235{
236 __be32 remote = parms->iph.daddr;
237 __be32 local = parms->iph.saddr;
238 __be32 key = parms->i_key;
239 unsigned h = HASH(key);
240 int prio = 0;
241
242 if (local)
243 prio |= 1;
244 if (remote && !ipv4_is_multicast(remote)) {
245 prio |= 2;
246 h ^= HASH(remote);
247 }
248
249 return &ign->tunnels[prio][h];
250}
251
252static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
253 struct ip_tunnel *t)
254{
255 return __ipgre_bucket(ign, &t->parms);
256}
257
258static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
259{
260 struct ip_tunnel **tp = ipgre_bucket(ign, t);
261
262 t->next = *tp;
263 write_lock_bh(&ipgre_lock);
264 *tp = t;
265 write_unlock_bh(&ipgre_lock);
266}
267
268static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
269{
270 struct ip_tunnel **tp;
271
272 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
273 if (t == *tp) {
274 write_lock_bh(&ipgre_lock);
275 *tp = t->next;
276 write_unlock_bh(&ipgre_lock);
277 break;
278 }
279 }
280}
281
282static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
283 struct ip_tunnel_parm *parms,
284 int type)
285{
286 __be32 remote = parms->iph.daddr;
287 __be32 local = parms->iph.saddr;
288 __be32 key = parms->i_key;
289 struct ip_tunnel *t, **tp;
290 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
291
292 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
293 if (local == t->parms.iph.saddr &&
294 remote == t->parms.iph.daddr &&
295 key == t->parms.i_key &&
296 type == t->dev->type)
297 break;
298
299 return t;
300}
301
302static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
303 struct ip_tunnel_parm *parms, int create)
304{
305 struct ip_tunnel *t, *nt;
306 struct net_device *dev;
307 char name[IFNAMSIZ];
308 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
309
310 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
311 if (t || !create)
312 return t;
313
314 if (parms->name[0])
315 strlcpy(name, parms->name, IFNAMSIZ);
316 else
317 sprintf(name, "gre%%d");
318
319 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
320 if (!dev)
321 return NULL;
322
323 dev_net_set(dev, net);
324
325 if (strchr(name, '%')) {
326 if (dev_alloc_name(dev, name) < 0)
327 goto failed_free;
328 }
329
330 nt = netdev_priv(dev);
331 nt->parms = *parms;
332 dev->rtnl_link_ops = &ipgre_link_ops;
333
334 dev->mtu = ipgre_tunnel_bind_dev(dev);
335
336 if (register_netdevice(dev) < 0)
337 goto failed_free;
338
339 dev_hold(dev);
340 ipgre_tunnel_link(ign, nt);
341 return nt;
342
343failed_free:
344 free_netdev(dev);
345 return NULL;
346}
347
348static void ipgre_tunnel_uninit(struct net_device *dev)
349{
350 struct net *net = dev_net(dev);
351 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
352
353 ipgre_tunnel_unlink(ign, netdev_priv(dev));
354 dev_put(dev);
355}
356
357
358static void ipgre_err(struct sk_buff *skb, u32 info)
359{
360
361/* All the routers (except for Linux) return only
362 8 bytes of packet payload. It means, that precise relaying of
363 ICMP in the real Internet is absolutely infeasible.
364
365 Moreover, Cisco "wise men" put GRE key to the third word
366 in GRE header. It makes impossible maintaining even soft state for keyed
367 GRE tunnels with enabled checksum. Tell them "thank you".
368
369 Well, I wonder, rfc1812 was written by Cisco employee,
370 what the hell these idiots break standrads established
371 by themself???
372 */
373
374 struct iphdr *iph = (struct iphdr*)skb->data;
375 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
376 int grehlen = (iph->ihl<<2) + 4;
377 const int type = icmp_hdr(skb)->type;
378 const int code = icmp_hdr(skb)->code;
379 struct ip_tunnel *t;
380 __be16 flags;
381
382 flags = p[0];
383 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
384 if (flags&(GRE_VERSION|GRE_ROUTING))
385 return;
386 if (flags&GRE_KEY) {
387 grehlen += 4;
388 if (flags&GRE_CSUM)
389 grehlen += 4;
390 }
391 }
392
393 /* If only 8 bytes returned, keyed message will be dropped here */
394 if (skb_headlen(skb) < grehlen)
395 return;
396
397 switch (type) {
398 default:
399 case ICMP_PARAMETERPROB:
400 return;
401
402 case ICMP_DEST_UNREACH:
403 switch (code) {
404 case ICMP_SR_FAILED:
405 case ICMP_PORT_UNREACH:
406 /* Impossible event. */
407 return;
408 case ICMP_FRAG_NEEDED:
409 /* Soft state for pmtu is maintained by IP core. */
410 return;
411 default:
412 /* All others are translated to HOST_UNREACH.
413 rfc2003 contains "deep thoughts" about NET_UNREACH,
414 I believe they are just ether pollution. --ANK
415 */
416 break;
417 }
418 break;
419 case ICMP_TIME_EXCEEDED:
420 if (code != ICMP_EXC_TTL)
421 return;
422 break;
423 }
424
425 read_lock(&ipgre_lock);
426 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
427 flags & GRE_KEY ?
428 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
429 p[1]);
430 if (t == NULL || t->parms.iph.daddr == 0 ||
431 ipv4_is_multicast(t->parms.iph.daddr))
432 goto out;
433
434 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
435 goto out;
436
437 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
438 t->err_count++;
439 else
440 t->err_count = 1;
441 t->err_time = jiffies;
442out:
443 read_unlock(&ipgre_lock);
444 return;
445}
446
447static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
448{
449 if (INET_ECN_is_ce(iph->tos)) {
450 if (skb->protocol == htons(ETH_P_IP)) {
451 IP_ECN_set_ce(ip_hdr(skb));
452 } else if (skb->protocol == htons(ETH_P_IPV6)) {
453 IP6_ECN_set_ce(ipv6_hdr(skb));
454 }
455 }
456}
457
458static inline u8
459ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
460{
461 u8 inner = 0;
462 if (skb->protocol == htons(ETH_P_IP))
463 inner = old_iph->tos;
464 else if (skb->protocol == htons(ETH_P_IPV6))
465 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
466 return INET_ECN_encapsulate(tos, inner);
467}
468
469static int ipgre_rcv(struct sk_buff *skb)
470{
471 struct iphdr *iph;
472 u8 *h;
473 __be16 flags;
474 __sum16 csum = 0;
475 __be32 key = 0;
476 u32 seqno = 0;
477 struct ip_tunnel *tunnel;
478 int offset = 4;
479 __be16 gre_proto;
480
481 if (!pskb_may_pull(skb, 16))
482 goto drop_nolock;
483
484 iph = ip_hdr(skb);
485 h = skb->data;
486 flags = *(__be16*)h;
487
488 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
489 /* - Version must be 0.
490 - We do not support routing headers.
491 */
492 if (flags&(GRE_VERSION|GRE_ROUTING))
493 goto drop_nolock;
494
495 if (flags&GRE_CSUM) {
496 switch (skb->ip_summed) {
497 case CHECKSUM_COMPLETE:
498 csum = csum_fold(skb->csum);
499 if (!csum)
500 break;
501 /* fall through */
502 case CHECKSUM_NONE:
503 skb->csum = 0;
504 csum = __skb_checksum_complete(skb);
505 skb->ip_summed = CHECKSUM_COMPLETE;
506 }
507 offset += 4;
508 }
509 if (flags&GRE_KEY) {
510 key = *(__be32*)(h + offset);
511 offset += 4;
512 }
513 if (flags&GRE_SEQ) {
514 seqno = ntohl(*(__be32*)(h + offset));
515 offset += 4;
516 }
517 }
518
519 gre_proto = *(__be16 *)(h + 2);
520
521 read_lock(&ipgre_lock);
522 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
523 iph->saddr, iph->daddr, key,
524 gre_proto))) {
525 struct net_device_stats *stats = &tunnel->dev->stats;
526
527 secpath_reset(skb);
528
529 skb->protocol = gre_proto;
530 /* WCCP version 1 and 2 protocol decoding.
531 * - Change protocol to IP
532 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
533 */
534 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
535 skb->protocol = htons(ETH_P_IP);
536 if ((*(h + offset) & 0xF0) != 0x40)
537 offset += 4;
538 }
539
540 skb->mac_header = skb->network_header;
541 __pskb_pull(skb, offset);
542 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
543 skb->pkt_type = PACKET_HOST;
544#ifdef CONFIG_NET_IPGRE_BROADCAST
545 if (ipv4_is_multicast(iph->daddr)) {
546 /* Looped back packet, drop it! */
547 if (skb->rtable->fl.iif == 0)
548 goto drop;
549 stats->multicast++;
550 skb->pkt_type = PACKET_BROADCAST;
551 }
552#endif
553
554 if (((flags&GRE_CSUM) && csum) ||
555 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
556 stats->rx_crc_errors++;
557 stats->rx_errors++;
558 goto drop;
559 }
560 if (tunnel->parms.i_flags&GRE_SEQ) {
561 if (!(flags&GRE_SEQ) ||
562 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
563 stats->rx_fifo_errors++;
564 stats->rx_errors++;
565 goto drop;
566 }
567 tunnel->i_seqno = seqno + 1;
568 }
569
570 /* Warning: All skb pointers will be invalidated! */
571 if (tunnel->dev->type == ARPHRD_ETHER) {
572 if (!pskb_may_pull(skb, ETH_HLEN)) {
573 stats->rx_length_errors++;
574 stats->rx_errors++;
575 goto drop;
576 }
577
578 iph = ip_hdr(skb);
579 skb->protocol = eth_type_trans(skb, tunnel->dev);
580 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
581 }
582
583 stats->rx_packets++;
584 stats->rx_bytes += skb->len;
585 skb->dev = tunnel->dev;
586 dst_release(skb->dst);
587 skb->dst = NULL;
588 nf_reset(skb);
589
590 skb_reset_network_header(skb);
591 ipgre_ecn_decapsulate(iph, skb);
592
593 netif_rx(skb);
594 read_unlock(&ipgre_lock);
595 return(0);
596 }
597 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
598
599drop:
600 read_unlock(&ipgre_lock);
601drop_nolock:
602 kfree_skb(skb);
603 return(0);
604}
605
606static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
607{
608 struct ip_tunnel *tunnel = netdev_priv(dev);
609 struct net_device_stats *stats = &tunnel->dev->stats;
610 struct iphdr *old_iph = ip_hdr(skb);
611 struct iphdr *tiph;
612 u8 tos;
613 __be16 df;
614 struct rtable *rt; /* Route to the other host */
615 struct net_device *tdev; /* Device to other host */
616 struct iphdr *iph; /* Our new IP header */
617 unsigned int max_headroom; /* The extra header space needed */
618 int gre_hlen;
619 __be32 dst;
620 int mtu;
621
622 if (tunnel->recursion++) {
623 stats->collisions++;
624 goto tx_error;
625 }
626
627 if (dev->type == ARPHRD_ETHER)
628 IPCB(skb)->flags = 0;
629
630 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
631 gre_hlen = 0;
632 tiph = (struct iphdr*)skb->data;
633 } else {
634 gre_hlen = tunnel->hlen;
635 tiph = &tunnel->parms.iph;
636 }
637
638 if ((dst = tiph->daddr) == 0) {
639 /* NBMA tunnel */
640
641 if (skb->dst == NULL) {
642 stats->tx_fifo_errors++;
643 goto tx_error;
644 }
645
646 if (skb->protocol == htons(ETH_P_IP)) {
647 rt = skb->rtable;
648 if ((dst = rt->rt_gateway) == 0)
649 goto tx_error_icmp;
650 }
651#ifdef CONFIG_IPV6
652 else if (skb->protocol == htons(ETH_P_IPV6)) {
653 struct in6_addr *addr6;
654 int addr_type;
655 struct neighbour *neigh = skb->dst->neighbour;
656
657 if (neigh == NULL)
658 goto tx_error;
659
660 addr6 = (struct in6_addr*)&neigh->primary_key;
661 addr_type = ipv6_addr_type(addr6);
662
663 if (addr_type == IPV6_ADDR_ANY) {
664 addr6 = &ipv6_hdr(skb)->daddr;
665 addr_type = ipv6_addr_type(addr6);
666 }
667
668 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
669 goto tx_error_icmp;
670
671 dst = addr6->s6_addr32[3];
672 }
673#endif
674 else
675 goto tx_error;
676 }
677
678 tos = tiph->tos;
679 if (tos&1) {
680 if (skb->protocol == htons(ETH_P_IP))
681 tos = old_iph->tos;
682 tos &= ~1;
683 }
684
685 {
686 struct flowi fl = { .oif = tunnel->parms.link,
687 .nl_u = { .ip4_u =
688 { .daddr = dst,
689 .saddr = tiph->saddr,
690 .tos = RT_TOS(tos) } },
691 .proto = IPPROTO_GRE };
692 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
693 stats->tx_carrier_errors++;
694 goto tx_error;
695 }
696 }
697 tdev = rt->u.dst.dev;
698
699 if (tdev == dev) {
700 ip_rt_put(rt);
701 stats->collisions++;
702 goto tx_error;
703 }
704
705 df = tiph->frag_off;
706 if (df)
707 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
708 else
709 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
710
711 if (skb->dst)
712 skb->dst->ops->update_pmtu(skb->dst, mtu);
713
714 if (skb->protocol == htons(ETH_P_IP)) {
715 df |= (old_iph->frag_off&htons(IP_DF));
716
717 if ((old_iph->frag_off&htons(IP_DF)) &&
718 mtu < ntohs(old_iph->tot_len)) {
719 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
720 ip_rt_put(rt);
721 goto tx_error;
722 }
723 }
724#ifdef CONFIG_IPV6
725 else if (skb->protocol == htons(ETH_P_IPV6)) {
726 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
727
728 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
729 if ((tunnel->parms.iph.daddr &&
730 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
731 rt6->rt6i_dst.plen == 128) {
732 rt6->rt6i_flags |= RTF_MODIFIED;
733 skb->dst->metrics[RTAX_MTU-1] = mtu;
734 }
735 }
736
737 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
738 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
739 ip_rt_put(rt);
740 goto tx_error;
741 }
742 }
743#endif
744
745 if (tunnel->err_count > 0) {
746 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
747 tunnel->err_count--;
748
749 dst_link_failure(skb);
750 } else
751 tunnel->err_count = 0;
752 }
753
754 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
755
756 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
757 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
758 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
759 if (!new_skb) {
760 ip_rt_put(rt);
761 stats->tx_dropped++;
762 dev_kfree_skb(skb);
763 tunnel->recursion--;
764 return 0;
765 }
766 if (skb->sk)
767 skb_set_owner_w(new_skb, skb->sk);
768 dev_kfree_skb(skb);
769 skb = new_skb;
770 old_iph = ip_hdr(skb);
771 }
772
773 skb->transport_header = skb->network_header;
774 skb_push(skb, gre_hlen);
775 skb_reset_network_header(skb);
776 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
777 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
778 IPSKB_REROUTED);
779 dst_release(skb->dst);
780 skb->dst = &rt->u.dst;
781
782 /*
783 * Push down and install the IPIP header.
784 */
785
786 iph = ip_hdr(skb);
787 iph->version = 4;
788 iph->ihl = sizeof(struct iphdr) >> 2;
789 iph->frag_off = df;
790 iph->protocol = IPPROTO_GRE;
791 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
792 iph->daddr = rt->rt_dst;
793 iph->saddr = rt->rt_src;
794
795 if ((iph->ttl = tiph->ttl) == 0) {
796 if (skb->protocol == htons(ETH_P_IP))
797 iph->ttl = old_iph->ttl;
798#ifdef CONFIG_IPV6
799 else if (skb->protocol == htons(ETH_P_IPV6))
800 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
801#endif
802 else
803 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
804 }
805
806 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
807 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
808 htons(ETH_P_TEB) : skb->protocol;
809
810 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
811 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
812
813 if (tunnel->parms.o_flags&GRE_SEQ) {
814 ++tunnel->o_seqno;
815 *ptr = htonl(tunnel->o_seqno);
816 ptr--;
817 }
818 if (tunnel->parms.o_flags&GRE_KEY) {
819 *ptr = tunnel->parms.o_key;
820 ptr--;
821 }
822 if (tunnel->parms.o_flags&GRE_CSUM) {
823 *ptr = 0;
824 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
825 }
826 }
827
828 nf_reset(skb);
829
830 IPTUNNEL_XMIT();
831 tunnel->recursion--;
832 return 0;
833
834tx_error_icmp:
835 dst_link_failure(skb);
836
837tx_error:
838 stats->tx_errors++;
839 dev_kfree_skb(skb);
840 tunnel->recursion--;
841 return 0;
842}
843
844static int ipgre_tunnel_bind_dev(struct net_device *dev)
845{
846 struct net_device *tdev = NULL;
847 struct ip_tunnel *tunnel;
848 struct iphdr *iph;
849 int hlen = LL_MAX_HEADER;
850 int mtu = ETH_DATA_LEN;
851 int addend = sizeof(struct iphdr) + 4;
852
853 tunnel = netdev_priv(dev);
854 iph = &tunnel->parms.iph;
855
856 /* Guess output device to choose reasonable mtu and needed_headroom */
857
858 if (iph->daddr) {
859 struct flowi fl = { .oif = tunnel->parms.link,
860 .nl_u = { .ip4_u =
861 { .daddr = iph->daddr,
862 .saddr = iph->saddr,
863 .tos = RT_TOS(iph->tos) } },
864 .proto = IPPROTO_GRE };
865 struct rtable *rt;
866 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
867 tdev = rt->u.dst.dev;
868 ip_rt_put(rt);
869 }
870
871 if (dev->type != ARPHRD_ETHER)
872 dev->flags |= IFF_POINTOPOINT;
873 }
874
875 if (!tdev && tunnel->parms.link)
876 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
877
878 if (tdev) {
879 hlen = tdev->hard_header_len + tdev->needed_headroom;
880 mtu = tdev->mtu;
881 }
882 dev->iflink = tunnel->parms.link;
883
884 /* Precalculate GRE options length */
885 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
886 if (tunnel->parms.o_flags&GRE_CSUM)
887 addend += 4;
888 if (tunnel->parms.o_flags&GRE_KEY)
889 addend += 4;
890 if (tunnel->parms.o_flags&GRE_SEQ)
891 addend += 4;
892 }
893 dev->needed_headroom = addend + hlen;
894 mtu -= dev->hard_header_len - addend;
895
896 if (mtu < 68)
897 mtu = 68;
898
899 tunnel->hlen = addend;
900
901 return mtu;
902}
903
904static int
905ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
906{
907 int err = 0;
908 struct ip_tunnel_parm p;
909 struct ip_tunnel *t;
910 struct net *net = dev_net(dev);
911 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
912
913 switch (cmd) {
914 case SIOCGETTUNNEL:
915 t = NULL;
916 if (dev == ign->fb_tunnel_dev) {
917 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
918 err = -EFAULT;
919 break;
920 }
921 t = ipgre_tunnel_locate(net, &p, 0);
922 }
923 if (t == NULL)
924 t = netdev_priv(dev);
925 memcpy(&p, &t->parms, sizeof(p));
926 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
927 err = -EFAULT;
928 break;
929
930 case SIOCADDTUNNEL:
931 case SIOCCHGTUNNEL:
932 err = -EPERM;
933 if (!capable(CAP_NET_ADMIN))
934 goto done;
935
936 err = -EFAULT;
937 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
938 goto done;
939
940 err = -EINVAL;
941 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
942 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
943 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
944 goto done;
945 if (p.iph.ttl)
946 p.iph.frag_off |= htons(IP_DF);
947
948 if (!(p.i_flags&GRE_KEY))
949 p.i_key = 0;
950 if (!(p.o_flags&GRE_KEY))
951 p.o_key = 0;
952
953 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
954
955 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
956 if (t != NULL) {
957 if (t->dev != dev) {
958 err = -EEXIST;
959 break;
960 }
961 } else {
962 unsigned nflags=0;
963
964 t = netdev_priv(dev);
965
966 if (ipv4_is_multicast(p.iph.daddr))
967 nflags = IFF_BROADCAST;
968 else if (p.iph.daddr)
969 nflags = IFF_POINTOPOINT;
970
971 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
972 err = -EINVAL;
973 break;
974 }
975 ipgre_tunnel_unlink(ign, t);
976 t->parms.iph.saddr = p.iph.saddr;
977 t->parms.iph.daddr = p.iph.daddr;
978 t->parms.i_key = p.i_key;
979 t->parms.o_key = p.o_key;
980 memcpy(dev->dev_addr, &p.iph.saddr, 4);
981 memcpy(dev->broadcast, &p.iph.daddr, 4);
982 ipgre_tunnel_link(ign, t);
983 netdev_state_change(dev);
984 }
985 }
986
987 if (t) {
988 err = 0;
989 if (cmd == SIOCCHGTUNNEL) {
990 t->parms.iph.ttl = p.iph.ttl;
991 t->parms.iph.tos = p.iph.tos;
992 t->parms.iph.frag_off = p.iph.frag_off;
993 if (t->parms.link != p.link) {
994 t->parms.link = p.link;
995 dev->mtu = ipgre_tunnel_bind_dev(dev);
996 netdev_state_change(dev);
997 }
998 }
999 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1000 err = -EFAULT;
1001 } else
1002 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1003 break;
1004
1005 case SIOCDELTUNNEL:
1006 err = -EPERM;
1007 if (!capable(CAP_NET_ADMIN))
1008 goto done;
1009
1010 if (dev == ign->fb_tunnel_dev) {
1011 err = -EFAULT;
1012 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1013 goto done;
1014 err = -ENOENT;
1015 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1016 goto done;
1017 err = -EPERM;
1018 if (t == netdev_priv(ign->fb_tunnel_dev))
1019 goto done;
1020 dev = t->dev;
1021 }
1022 unregister_netdevice(dev);
1023 err = 0;
1024 break;
1025
1026 default:
1027 err = -EINVAL;
1028 }
1029
1030done:
1031 return err;
1032}
1033
1034static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1035{
1036 struct ip_tunnel *tunnel = netdev_priv(dev);
1037 if (new_mtu < 68 ||
1038 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1039 return -EINVAL;
1040 dev->mtu = new_mtu;
1041 return 0;
1042}
1043
1044/* Nice toy. Unfortunately, useless in real life :-)
1045 It allows to construct virtual multiprotocol broadcast "LAN"
1046 over the Internet, provided multicast routing is tuned.
1047
1048
1049 I have no idea was this bicycle invented before me,
1050 so that I had to set ARPHRD_IPGRE to a random value.
1051 I have an impression, that Cisco could make something similar,
1052 but this feature is apparently missing in IOS<=11.2(8).
1053
1054 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1055 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1056
1057 ping -t 255 224.66.66.66
1058
1059 If nobody answers, mbone does not work.
1060
1061 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1062 ip addr add 10.66.66.<somewhat>/24 dev Universe
1063 ifconfig Universe up
1064 ifconfig Universe add fe80::<Your_real_addr>/10
1065 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1066 ftp 10.66.66.66
1067 ...
1068 ftp fec0:6666:6666::193.233.7.65
1069 ...
1070
1071 */
1072
1073static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1074 unsigned short type,
1075 const void *daddr, const void *saddr, unsigned len)
1076{
1077 struct ip_tunnel *t = netdev_priv(dev);
1078 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1079 __be16 *p = (__be16*)(iph+1);
1080
1081 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1082 p[0] = t->parms.o_flags;
1083 p[1] = htons(type);
1084
1085 /*
1086 * Set the source hardware address.
1087 */
1088
1089 if (saddr)
1090 memcpy(&iph->saddr, saddr, 4);
1091
1092 if (daddr) {
1093 memcpy(&iph->daddr, daddr, 4);
1094 return t->hlen;
1095 }
1096 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1097 return t->hlen;
1098
1099 return -t->hlen;
1100}
1101
1102static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1103{
1104 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1105 memcpy(haddr, &iph->saddr, 4);
1106 return 4;
1107}
1108
1109static const struct header_ops ipgre_header_ops = {
1110 .create = ipgre_header,
1111 .parse = ipgre_header_parse,
1112};
1113
1114#ifdef CONFIG_NET_IPGRE_BROADCAST
1115static int ipgre_open(struct net_device *dev)
1116{
1117 struct ip_tunnel *t = netdev_priv(dev);
1118
1119 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1120 struct flowi fl = { .oif = t->parms.link,
1121 .nl_u = { .ip4_u =
1122 { .daddr = t->parms.iph.daddr,
1123 .saddr = t->parms.iph.saddr,
1124 .tos = RT_TOS(t->parms.iph.tos) } },
1125 .proto = IPPROTO_GRE };
1126 struct rtable *rt;
1127 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1128 return -EADDRNOTAVAIL;
1129 dev = rt->u.dst.dev;
1130 ip_rt_put(rt);
1131 if (__in_dev_get_rtnl(dev) == NULL)
1132 return -EADDRNOTAVAIL;
1133 t->mlink = dev->ifindex;
1134 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1135 }
1136 return 0;
1137}
1138
1139static int ipgre_close(struct net_device *dev)
1140{
1141 struct ip_tunnel *t = netdev_priv(dev);
1142 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1143 struct in_device *in_dev;
1144 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1145 if (in_dev) {
1146 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1147 in_dev_put(in_dev);
1148 }
1149 }
1150 return 0;
1151}
1152
1153#endif
1154
1155static void ipgre_tunnel_setup(struct net_device *dev)
1156{
1157 dev->init = ipgre_tunnel_init;
1158 dev->uninit = ipgre_tunnel_uninit;
1159 dev->destructor = free_netdev;
1160 dev->hard_start_xmit = ipgre_tunnel_xmit;
1161 dev->do_ioctl = ipgre_tunnel_ioctl;
1162 dev->change_mtu = ipgre_tunnel_change_mtu;
1163
1164 dev->type = ARPHRD_IPGRE;
1165 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1166 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1167 dev->flags = IFF_NOARP;
1168 dev->iflink = 0;
1169 dev->addr_len = 4;
1170 dev->features |= NETIF_F_NETNS_LOCAL;
1171}
1172
1173static int ipgre_tunnel_init(struct net_device *dev)
1174{
1175 struct ip_tunnel *tunnel;
1176 struct iphdr *iph;
1177
1178 tunnel = netdev_priv(dev);
1179 iph = &tunnel->parms.iph;
1180
1181 tunnel->dev = dev;
1182 strcpy(tunnel->parms.name, dev->name);
1183
1184 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1185 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1186
1187 if (iph->daddr) {
1188#ifdef CONFIG_NET_IPGRE_BROADCAST
1189 if (ipv4_is_multicast(iph->daddr)) {
1190 if (!iph->saddr)
1191 return -EINVAL;
1192 dev->flags = IFF_BROADCAST;
1193 dev->header_ops = &ipgre_header_ops;
1194 dev->open = ipgre_open;
1195 dev->stop = ipgre_close;
1196 }
1197#endif
1198 } else
1199 dev->header_ops = &ipgre_header_ops;
1200
1201 return 0;
1202}
1203
1204static int ipgre_fb_tunnel_init(struct net_device *dev)
1205{
1206 struct ip_tunnel *tunnel = netdev_priv(dev);
1207 struct iphdr *iph = &tunnel->parms.iph;
1208 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1209
1210 tunnel->dev = dev;
1211 strcpy(tunnel->parms.name, dev->name);
1212
1213 iph->version = 4;
1214 iph->protocol = IPPROTO_GRE;
1215 iph->ihl = 5;
1216 tunnel->hlen = sizeof(struct iphdr) + 4;
1217
1218 dev_hold(dev);
1219 ign->tunnels_wc[0] = tunnel;
1220 return 0;
1221}
1222
1223
1224static struct net_protocol ipgre_protocol = {
1225 .handler = ipgre_rcv,
1226 .err_handler = ipgre_err,
1227 .netns_ok = 1,
1228};
1229
1230static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1231{
1232 int prio;
1233
1234 for (prio = 0; prio < 4; prio++) {
1235 int h;
1236 for (h = 0; h < HASH_SIZE; h++) {
1237 struct ip_tunnel *t;
1238 while ((t = ign->tunnels[prio][h]) != NULL)
1239 unregister_netdevice(t->dev);
1240 }
1241 }
1242}
1243
1244static int ipgre_init_net(struct net *net)
1245{
1246 int err;
1247 struct ipgre_net *ign;
1248
1249 err = -ENOMEM;
1250 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1251 if (ign == NULL)
1252 goto err_alloc;
1253
1254 err = net_assign_generic(net, ipgre_net_id, ign);
1255 if (err < 0)
1256 goto err_assign;
1257
1258 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1259 ipgre_tunnel_setup);
1260 if (!ign->fb_tunnel_dev) {
1261 err = -ENOMEM;
1262 goto err_alloc_dev;
1263 }
1264
1265 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1266 dev_net_set(ign->fb_tunnel_dev, net);
1267 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1268
1269 if ((err = register_netdev(ign->fb_tunnel_dev)))
1270 goto err_reg_dev;
1271
1272 return 0;
1273
1274err_reg_dev:
1275 free_netdev(ign->fb_tunnel_dev);
1276err_alloc_dev:
1277 /* nothing */
1278err_assign:
1279 kfree(ign);
1280err_alloc:
1281 return err;
1282}
1283
1284static void ipgre_exit_net(struct net *net)
1285{
1286 struct ipgre_net *ign;
1287
1288 ign = net_generic(net, ipgre_net_id);
1289 rtnl_lock();
1290 ipgre_destroy_tunnels(ign);
1291 rtnl_unlock();
1292 kfree(ign);
1293}
1294
1295static struct pernet_operations ipgre_net_ops = {
1296 .init = ipgre_init_net,
1297 .exit = ipgre_exit_net,
1298};
1299
1300static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1301{
1302 __be16 flags;
1303
1304 if (!data)
1305 return 0;
1306
1307 flags = 0;
1308 if (data[IFLA_GRE_IFLAGS])
1309 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1310 if (data[IFLA_GRE_OFLAGS])
1311 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1312 if (flags & (GRE_VERSION|GRE_ROUTING))
1313 return -EINVAL;
1314
1315 return 0;
1316}
1317
1318static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1319{
1320 __be32 daddr;
1321
1322 if (tb[IFLA_ADDRESS]) {
1323 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1324 return -EINVAL;
1325 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1326 return -EADDRNOTAVAIL;
1327 }
1328
1329 if (!data)
1330 goto out;
1331
1332 if (data[IFLA_GRE_REMOTE]) {
1333 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1334 if (!daddr)
1335 return -EINVAL;
1336 }
1337
1338out:
1339 return ipgre_tunnel_validate(tb, data);
1340}
1341
1342static void ipgre_netlink_parms(struct nlattr *data[],
1343 struct ip_tunnel_parm *parms)
1344{
1345 memset(parms, 0, sizeof(parms));
1346
1347 parms->iph.protocol = IPPROTO_GRE;
1348
1349 if (!data)
1350 return;
1351
1352 if (data[IFLA_GRE_LINK])
1353 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1354
1355 if (data[IFLA_GRE_IFLAGS])
1356 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1357
1358 if (data[IFLA_GRE_OFLAGS])
1359 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1360
1361 if (data[IFLA_GRE_IKEY])
1362 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1363
1364 if (data[IFLA_GRE_OKEY])
1365 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1366
1367 if (data[IFLA_GRE_LOCAL])
1368 memcpy(&parms->iph.saddr, nla_data(data[IFLA_GRE_LOCAL]), 4);
1369
1370 if (data[IFLA_GRE_REMOTE])
1371 memcpy(&parms->iph.daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1372
1373 if (data[IFLA_GRE_TTL])
1374 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1375
1376 if (data[IFLA_GRE_TOS])
1377 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1378
1379 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1380 parms->iph.frag_off = htons(IP_DF);
1381}
1382
1383static int ipgre_tap_init(struct net_device *dev)
1384{
1385 struct ip_tunnel *tunnel;
1386
1387 tunnel = netdev_priv(dev);
1388
1389 tunnel->dev = dev;
1390 strcpy(tunnel->parms.name, dev->name);
1391
1392 ipgre_tunnel_bind_dev(dev);
1393
1394 return 0;
1395}
1396
1397static void ipgre_tap_setup(struct net_device *dev)
1398{
1399
1400 ether_setup(dev);
1401
1402 dev->init = ipgre_tap_init;
1403 dev->uninit = ipgre_tunnel_uninit;
1404 dev->destructor = free_netdev;
1405 dev->hard_start_xmit = ipgre_tunnel_xmit;
1406 dev->change_mtu = ipgre_tunnel_change_mtu;
1407
1408 dev->iflink = 0;
1409 dev->features |= NETIF_F_NETNS_LOCAL;
1410}
1411
1412static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1413 struct nlattr *data[])
1414{
1415 struct ip_tunnel *nt;
1416 struct net *net = dev_net(dev);
1417 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1418 int mtu;
1419 int err;
1420
1421 nt = netdev_priv(dev);
1422 ipgre_netlink_parms(data, &nt->parms);
1423
1424 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1425 return -EEXIST;
1426
1427 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1428 random_ether_addr(dev->dev_addr);
1429
1430 mtu = ipgre_tunnel_bind_dev(dev);
1431 if (!tb[IFLA_MTU])
1432 dev->mtu = mtu;
1433
1434 err = register_netdevice(dev);
1435 if (err)
1436 goto out;
1437
1438 dev_hold(dev);
1439 ipgre_tunnel_link(ign, nt);
1440
1441out:
1442 return err;
1443}
1444
1445static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1446 struct nlattr *data[])
1447{
1448 struct ip_tunnel *t, *nt;
1449 struct net *net = dev_net(dev);
1450 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1451 struct ip_tunnel_parm p;
1452 int mtu;
1453
1454 if (dev == ign->fb_tunnel_dev)
1455 return -EINVAL;
1456
1457 nt = netdev_priv(dev);
1458 ipgre_netlink_parms(data, &p);
1459
1460 t = ipgre_tunnel_locate(net, &p, 0);
1461
1462 if (t) {
1463 if (t->dev != dev)
1464 return -EEXIST;
1465 } else {
1466 unsigned nflags = 0;
1467
1468 t = nt;
1469
1470 if (ipv4_is_multicast(p.iph.daddr))
1471 nflags = IFF_BROADCAST;
1472 else if (p.iph.daddr)
1473 nflags = IFF_POINTOPOINT;
1474
1475 if ((dev->flags ^ nflags) &
1476 (IFF_POINTOPOINT | IFF_BROADCAST))
1477 return -EINVAL;
1478
1479 ipgre_tunnel_unlink(ign, t);
1480 t->parms.iph.saddr = p.iph.saddr;
1481 t->parms.iph.daddr = p.iph.daddr;
1482 t->parms.i_key = p.i_key;
1483 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1484 memcpy(dev->broadcast, &p.iph.daddr, 4);
1485 ipgre_tunnel_link(ign, t);
1486 netdev_state_change(dev);
1487 }
1488
1489 t->parms.o_key = p.o_key;
1490 t->parms.iph.ttl = p.iph.ttl;
1491 t->parms.iph.tos = p.iph.tos;
1492 t->parms.iph.frag_off = p.iph.frag_off;
1493
1494 if (t->parms.link != p.link) {
1495 t->parms.link = p.link;
1496 mtu = ipgre_tunnel_bind_dev(dev);
1497 if (!tb[IFLA_MTU])
1498 dev->mtu = mtu;
1499 netdev_state_change(dev);
1500 }
1501
1502 return 0;
1503}
1504
1505static size_t ipgre_get_size(const struct net_device *dev)
1506{
1507 return
1508 /* IFLA_GRE_LINK */
1509 nla_total_size(4) +
1510 /* IFLA_GRE_IFLAGS */
1511 nla_total_size(2) +
1512 /* IFLA_GRE_OFLAGS */
1513 nla_total_size(2) +
1514 /* IFLA_GRE_IKEY */
1515 nla_total_size(4) +
1516 /* IFLA_GRE_OKEY */
1517 nla_total_size(4) +
1518 /* IFLA_GRE_LOCAL */
1519 nla_total_size(4) +
1520 /* IFLA_GRE_REMOTE */
1521 nla_total_size(4) +
1522 /* IFLA_GRE_TTL */
1523 nla_total_size(1) +
1524 /* IFLA_GRE_TOS */
1525 nla_total_size(1) +
1526 /* IFLA_GRE_PMTUDISC */
1527 nla_total_size(1) +
1528 0;
1529}
1530
1531static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1532{
1533 struct ip_tunnel *t = netdev_priv(dev);
1534 struct ip_tunnel_parm *p = &t->parms;
1535
1536 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1537 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1538 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1539 NLA_PUT_BE32(skb, IFLA_GRE_IFLAGS, p->i_flags);
1540 NLA_PUT_BE32(skb, IFLA_GRE_OFLAGS, p->o_flags);
1541 NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
1542 NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
1543 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1544 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1545 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1546
1547 return 0;
1548
1549nla_put_failure:
1550 return -EMSGSIZE;
1551}
1552
1553static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1554 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1555 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1556 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1557 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1558 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1559 [IFLA_GRE_LOCAL] = { .len = 4 },
1560 [IFLA_GRE_REMOTE] = { .len = 4 },
1561 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1562 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1563 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1564};
1565
1566static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1567 .kind = "gre",
1568 .maxtype = IFLA_GRE_MAX,
1569 .policy = ipgre_policy,
1570 .priv_size = sizeof(struct ip_tunnel),
1571 .setup = ipgre_tunnel_setup,
1572 .validate = ipgre_tunnel_validate,
1573 .newlink = ipgre_newlink,
1574 .changelink = ipgre_changelink,
1575 .get_size = ipgre_get_size,
1576 .fill_info = ipgre_fill_info,
1577};
1578
1579static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1580 .kind = "gretap",
1581 .maxtype = IFLA_GRE_MAX,
1582 .policy = ipgre_policy,
1583 .priv_size = sizeof(struct ip_tunnel),
1584 .setup = ipgre_tap_setup,
1585 .validate = ipgre_tap_validate,
1586 .newlink = ipgre_newlink,
1587 .changelink = ipgre_changelink,
1588 .get_size = ipgre_get_size,
1589 .fill_info = ipgre_fill_info,
1590};
1591
1592/*
1593 * And now the modules code and kernel interface.
1594 */
1595
1596static int __init ipgre_init(void)
1597{
1598 int err;
1599
1600 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1601
1602 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1603 printk(KERN_INFO "ipgre init: can't add protocol\n");
1604 return -EAGAIN;
1605 }
1606
1607 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1608 if (err < 0)
1609 goto gen_device_failed;
1610
1611 err = rtnl_link_register(&ipgre_link_ops);
1612 if (err < 0)
1613 goto rtnl_link_failed;
1614
1615 err = rtnl_link_register(&ipgre_tap_ops);
1616 if (err < 0)
1617 goto tap_ops_failed;
1618
1619out:
1620 return err;
1621
1622tap_ops_failed:
1623 rtnl_link_unregister(&ipgre_link_ops);
1624rtnl_link_failed:
1625 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1626gen_device_failed:
1627 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1628 goto out;
1629}
1630
1631static void __exit ipgre_fini(void)
1632{
1633 rtnl_link_unregister(&ipgre_tap_ops);
1634 rtnl_link_unregister(&ipgre_link_ops);
1635 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1636 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1637 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1638}
1639
1640module_init(ipgre_init);
1641module_exit(ipgre_fini);
1642MODULE_LICENSE("GPL");
1643MODULE_ALIAS("rtnl-link-gre");
1644MODULE_ALIAS("rtnl-link-gretap");