]> bbs.cooldavid.org Git - net-next-2.6.git/blame_incremental - net/ipv4/ip_gre.c
ip: ip_ra_control() rcu fix
[net-next-2.6.git] / net / ipv4 / ip_gre.c
... / ...
CommitLineData
1/*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/capability.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/slab.h>
18#include <asm/uaccess.h>
19#include <linux/skbuff.h>
20#include <linux/netdevice.h>
21#include <linux/in.h>
22#include <linux/tcp.h>
23#include <linux/udp.h>
24#include <linux/if_arp.h>
25#include <linux/mroute.h>
26#include <linux/init.h>
27#include <linux/in6.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/netfilter_ipv4.h>
31#include <linux/etherdevice.h>
32#include <linux/if_ether.h>
33
34#include <net/sock.h>
35#include <net/ip.h>
36#include <net/icmp.h>
37#include <net/protocol.h>
38#include <net/ipip.h>
39#include <net/arp.h>
40#include <net/checksum.h>
41#include <net/dsfield.h>
42#include <net/inet_ecn.h>
43#include <net/xfrm.h>
44#include <net/net_namespace.h>
45#include <net/netns/generic.h>
46#include <net/rtnetlink.h>
47
48#ifdef CONFIG_IPV6
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#endif
53
54/*
55 Problems & solutions
56 --------------------
57
58 1. The most important issue is detecting local dead loops.
59 They would cause complete host lockup in transmit, which
60 would be "resolved" by stack overflow or, if queueing is enabled,
61 with infinite looping in net_bh.
62
63 We cannot track such dead loops during route installation,
64 it is infeasible task. The most general solutions would be
65 to keep skb->encapsulation counter (sort of local ttl),
66 and silently drop packet when it expires. It is the best
67 solution, but it supposes maintaing new variable in ALL
68 skb, even if no tunneling is used.
69
70 Current solution: HARD_TX_LOCK lock breaks dead loops.
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
120static struct rtnl_link_ops ipgre_link_ops __read_mostly;
121static int ipgre_tunnel_init(struct net_device *dev);
122static void ipgre_tunnel_setup(struct net_device *dev);
123static int ipgre_tunnel_bind_dev(struct net_device *dev);
124
125/* Fallback tunnel: no source, no destination, no key, no options */
126
127#define HASH_SIZE 16
128
129static int ipgre_net_id __read_mostly;
130struct ipgre_net {
131 struct ip_tunnel *tunnels[4][HASH_SIZE];
132
133 struct net_device *fb_tunnel_dev;
134};
135
136/* Tunnel hash table */
137
138/*
139 4 hash tables:
140
141 3: (remote,local)
142 2: (remote,*)
143 1: (*,local)
144 0: (*,*)
145
146 We require exact key match i.e. if a key is present in packet
147 it will match only tunnel with the same key; if it is not present,
148 it will match only keyless tunnel.
149
150 All keysless packets, if not matched configured keyless tunnels
151 will match fallback tunnel.
152 */
153
154#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
155
156#define tunnels_r_l tunnels[3]
157#define tunnels_r tunnels[2]
158#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0]
160/*
161 * Locking : hash tables are protected by RCU and a spinlock
162 */
163static DEFINE_SPINLOCK(ipgre_lock);
164
165#define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167
168/* Given src, dst and key, find appropriate for input tunnel. */
169
170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
171 __be32 remote, __be32 local,
172 __be32 key, __be16 gre_proto)
173{
174 struct net *net = dev_net(dev);
175 int link = dev->ifindex;
176 unsigned h0 = HASH(remote);
177 unsigned h1 = HASH(key);
178 struct ip_tunnel *t, *cand = NULL;
179 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181 ARPHRD_ETHER : ARPHRD_IPGRE;
182 int score, cand_score = 4;
183
184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
185 if (local != t->parms.iph.saddr ||
186 remote != t->parms.iph.daddr ||
187 key != t->parms.i_key ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (t->dev->type != ARPHRD_IPGRE &&
192 t->dev->type != dev_type)
193 continue;
194
195 score = 0;
196 if (t->parms.link != link)
197 score |= 1;
198 if (t->dev->type != dev_type)
199 score |= 2;
200 if (score == 0)
201 return t;
202
203 if (score < cand_score) {
204 cand = t;
205 cand_score = score;
206 }
207 }
208
209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
210 if (remote != t->parms.iph.daddr ||
211 key != t->parms.i_key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->dev->type != ARPHRD_IPGRE &&
216 t->dev->type != dev_type)
217 continue;
218
219 score = 0;
220 if (t->parms.link != link)
221 score |= 1;
222 if (t->dev->type != dev_type)
223 score |= 2;
224 if (score == 0)
225 return t;
226
227 if (score < cand_score) {
228 cand = t;
229 cand_score = score;
230 }
231 }
232
233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
234 if ((local != t->parms.iph.saddr &&
235 (local != t->parms.iph.daddr ||
236 !ipv4_is_multicast(local))) ||
237 key != t->parms.i_key ||
238 !(t->dev->flags & IFF_UP))
239 continue;
240
241 if (t->dev->type != ARPHRD_IPGRE &&
242 t->dev->type != dev_type)
243 continue;
244
245 score = 0;
246 if (t->parms.link != link)
247 score |= 1;
248 if (t->dev->type != dev_type)
249 score |= 2;
250 if (score == 0)
251 return t;
252
253 if (score < cand_score) {
254 cand = t;
255 cand_score = score;
256 }
257 }
258
259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
260 if (t->parms.i_key != key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
263
264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type)
266 continue;
267
268 score = 0;
269 if (t->parms.link != link)
270 score |= 1;
271 if (t->dev->type != dev_type)
272 score |= 2;
273 if (score == 0)
274 return t;
275
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
279 }
280 }
281
282 if (cand != NULL)
283 return cand;
284
285 dev = ign->fb_tunnel_dev;
286 if (dev->flags & IFF_UP)
287 return netdev_priv(dev);
288
289 return NULL;
290}
291
292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms)
294{
295 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key;
298 unsigned h = HASH(key);
299 int prio = 0;
300
301 if (local)
302 prio |= 1;
303 if (remote && !ipv4_is_multicast(remote)) {
304 prio |= 2;
305 h ^= HASH(remote);
306 }
307
308 return &ign->tunnels[prio][h];
309}
310
311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t)
313{
314 return __ipgre_bucket(ign, &t->parms);
315}
316
317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318{
319 struct ip_tunnel **tp = ipgre_bucket(ign, t);
320
321 spin_lock_bh(&ipgre_lock);
322 t->next = *tp;
323 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock);
325}
326
327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
328{
329 struct ip_tunnel **tp;
330
331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
332 if (t == *tp) {
333 spin_lock_bh(&ipgre_lock);
334 *tp = t->next;
335 spin_unlock_bh(&ipgre_lock);
336 break;
337 }
338 }
339}
340
341static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
342 struct ip_tunnel_parm *parms,
343 int type)
344{
345 __be32 remote = parms->iph.daddr;
346 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key;
348 int link = parms->link;
349 struct ip_tunnel *t, **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
353 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key &&
356 link == t->parms.link &&
357 type == t->dev->type)
358 break;
359
360 return t;
361}
362
363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create)
365{
366 struct ip_tunnel *t, *nt;
367 struct net_device *dev;
368 char name[IFNAMSIZ];
369 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
370
371 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
372 if (t || !create)
373 return t;
374
375 if (parms->name[0])
376 strlcpy(name, parms->name, IFNAMSIZ);
377 else
378 sprintf(name, "gre%%d");
379
380 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
381 if (!dev)
382 return NULL;
383
384 dev_net_set(dev, net);
385
386 if (strchr(name, '%')) {
387 if (dev_alloc_name(dev, name) < 0)
388 goto failed_free;
389 }
390
391 nt = netdev_priv(dev);
392 nt->parms = *parms;
393 dev->rtnl_link_ops = &ipgre_link_ops;
394
395 dev->mtu = ipgre_tunnel_bind_dev(dev);
396
397 if (register_netdevice(dev) < 0)
398 goto failed_free;
399
400 dev_hold(dev);
401 ipgre_tunnel_link(ign, nt);
402 return nt;
403
404failed_free:
405 free_netdev(dev);
406 return NULL;
407}
408
409static void ipgre_tunnel_uninit(struct net_device *dev)
410{
411 struct net *net = dev_net(dev);
412 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
413
414 ipgre_tunnel_unlink(ign, netdev_priv(dev));
415 dev_put(dev);
416}
417
418
419static void ipgre_err(struct sk_buff *skb, u32 info)
420{
421
422/* All the routers (except for Linux) return only
423 8 bytes of packet payload. It means, that precise relaying of
424 ICMP in the real Internet is absolutely infeasible.
425
426 Moreover, Cisco "wise men" put GRE key to the third word
427 in GRE header. It makes impossible maintaining even soft state for keyed
428 GRE tunnels with enabled checksum. Tell them "thank you".
429
430 Well, I wonder, rfc1812 was written by Cisco employee,
431 what the hell these idiots break standrads established
432 by themself???
433 */
434
435 struct iphdr *iph = (struct iphdr *)skb->data;
436 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
437 int grehlen = (iph->ihl<<2) + 4;
438 const int type = icmp_hdr(skb)->type;
439 const int code = icmp_hdr(skb)->code;
440 struct ip_tunnel *t;
441 __be16 flags;
442
443 flags = p[0];
444 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
445 if (flags&(GRE_VERSION|GRE_ROUTING))
446 return;
447 if (flags&GRE_KEY) {
448 grehlen += 4;
449 if (flags&GRE_CSUM)
450 grehlen += 4;
451 }
452 }
453
454 /* If only 8 bytes returned, keyed message will be dropped here */
455 if (skb_headlen(skb) < grehlen)
456 return;
457
458 switch (type) {
459 default:
460 case ICMP_PARAMETERPROB:
461 return;
462
463 case ICMP_DEST_UNREACH:
464 switch (code) {
465 case ICMP_SR_FAILED:
466 case ICMP_PORT_UNREACH:
467 /* Impossible event. */
468 return;
469 case ICMP_FRAG_NEEDED:
470 /* Soft state for pmtu is maintained by IP core. */
471 return;
472 default:
473 /* All others are translated to HOST_UNREACH.
474 rfc2003 contains "deep thoughts" about NET_UNREACH,
475 I believe they are just ether pollution. --ANK
476 */
477 break;
478 }
479 break;
480 case ICMP_TIME_EXCEEDED:
481 if (code != ICMP_EXC_TTL)
482 return;
483 break;
484 }
485
486 rcu_read_lock();
487 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
488 flags & GRE_KEY ?
489 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
490 p[1]);
491 if (t == NULL || t->parms.iph.daddr == 0 ||
492 ipv4_is_multicast(t->parms.iph.daddr))
493 goto out;
494
495 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
496 goto out;
497
498 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
499 t->err_count++;
500 else
501 t->err_count = 1;
502 t->err_time = jiffies;
503out:
504 rcu_read_unlock();
505}
506
507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
508{
509 if (INET_ECN_is_ce(iph->tos)) {
510 if (skb->protocol == htons(ETH_P_IP)) {
511 IP_ECN_set_ce(ip_hdr(skb));
512 } else if (skb->protocol == htons(ETH_P_IPV6)) {
513 IP6_ECN_set_ce(ipv6_hdr(skb));
514 }
515 }
516}
517
518static inline u8
519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
520{
521 u8 inner = 0;
522 if (skb->protocol == htons(ETH_P_IP))
523 inner = old_iph->tos;
524 else if (skb->protocol == htons(ETH_P_IPV6))
525 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
526 return INET_ECN_encapsulate(tos, inner);
527}
528
529static int ipgre_rcv(struct sk_buff *skb)
530{
531 struct iphdr *iph;
532 u8 *h;
533 __be16 flags;
534 __sum16 csum = 0;
535 __be32 key = 0;
536 u32 seqno = 0;
537 struct ip_tunnel *tunnel;
538 int offset = 4;
539 __be16 gre_proto;
540
541 if (!pskb_may_pull(skb, 16))
542 goto drop_nolock;
543
544 iph = ip_hdr(skb);
545 h = skb->data;
546 flags = *(__be16*)h;
547
548 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
549 /* - Version must be 0.
550 - We do not support routing headers.
551 */
552 if (flags&(GRE_VERSION|GRE_ROUTING))
553 goto drop_nolock;
554
555 if (flags&GRE_CSUM) {
556 switch (skb->ip_summed) {
557 case CHECKSUM_COMPLETE:
558 csum = csum_fold(skb->csum);
559 if (!csum)
560 break;
561 /* fall through */
562 case CHECKSUM_NONE:
563 skb->csum = 0;
564 csum = __skb_checksum_complete(skb);
565 skb->ip_summed = CHECKSUM_COMPLETE;
566 }
567 offset += 4;
568 }
569 if (flags&GRE_KEY) {
570 key = *(__be32*)(h + offset);
571 offset += 4;
572 }
573 if (flags&GRE_SEQ) {
574 seqno = ntohl(*(__be32*)(h + offset));
575 offset += 4;
576 }
577 }
578
579 gre_proto = *(__be16 *)(h + 2);
580
581 rcu_read_lock();
582 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
583 iph->saddr, iph->daddr, key,
584 gre_proto))) {
585 struct net_device_stats *stats = &tunnel->dev->stats;
586
587 secpath_reset(skb);
588
589 skb->protocol = gre_proto;
590 /* WCCP version 1 and 2 protocol decoding.
591 * - Change protocol to IP
592 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
593 */
594 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
595 skb->protocol = htons(ETH_P_IP);
596 if ((*(h + offset) & 0xF0) != 0x40)
597 offset += 4;
598 }
599
600 skb->mac_header = skb->network_header;
601 __pskb_pull(skb, offset);
602 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
603 skb->pkt_type = PACKET_HOST;
604#ifdef CONFIG_NET_IPGRE_BROADCAST
605 if (ipv4_is_multicast(iph->daddr)) {
606 /* Looped back packet, drop it! */
607 if (skb_rtable(skb)->fl.iif == 0)
608 goto drop;
609 stats->multicast++;
610 skb->pkt_type = PACKET_BROADCAST;
611 }
612#endif
613
614 if (((flags&GRE_CSUM) && csum) ||
615 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
616 stats->rx_crc_errors++;
617 stats->rx_errors++;
618 goto drop;
619 }
620 if (tunnel->parms.i_flags&GRE_SEQ) {
621 if (!(flags&GRE_SEQ) ||
622 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
623 stats->rx_fifo_errors++;
624 stats->rx_errors++;
625 goto drop;
626 }
627 tunnel->i_seqno = seqno + 1;
628 }
629
630 /* Warning: All skb pointers will be invalidated! */
631 if (tunnel->dev->type == ARPHRD_ETHER) {
632 if (!pskb_may_pull(skb, ETH_HLEN)) {
633 stats->rx_length_errors++;
634 stats->rx_errors++;
635 goto drop;
636 }
637
638 iph = ip_hdr(skb);
639 skb->protocol = eth_type_trans(skb, tunnel->dev);
640 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
641 }
642
643 skb_tunnel_rx(skb, tunnel->dev);
644
645 skb_reset_network_header(skb);
646 ipgre_ecn_decapsulate(iph, skb);
647
648 netif_rx(skb);
649 rcu_read_unlock();
650 return(0);
651 }
652 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
653
654drop:
655 rcu_read_unlock();
656drop_nolock:
657 kfree_skb(skb);
658 return(0);
659}
660
661static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
662{
663 struct ip_tunnel *tunnel = netdev_priv(dev);
664 struct net_device_stats *stats = &dev->stats;
665 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
666 struct iphdr *old_iph = ip_hdr(skb);
667 struct iphdr *tiph;
668 u8 tos;
669 __be16 df;
670 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */
673 unsigned int max_headroom; /* The extra header space needed */
674 int gre_hlen;
675 __be32 dst;
676 int mtu;
677
678 if (dev->type == ARPHRD_ETHER)
679 IPCB(skb)->flags = 0;
680
681 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
682 gre_hlen = 0;
683 tiph = (struct iphdr *)skb->data;
684 } else {
685 gre_hlen = tunnel->hlen;
686 tiph = &tunnel->parms.iph;
687 }
688
689 if ((dst = tiph->daddr) == 0) {
690 /* NBMA tunnel */
691
692 if (skb_dst(skb) == NULL) {
693 stats->tx_fifo_errors++;
694 goto tx_error;
695 }
696
697 if (skb->protocol == htons(ETH_P_IP)) {
698 rt = skb_rtable(skb);
699 if ((dst = rt->rt_gateway) == 0)
700 goto tx_error_icmp;
701 }
702#ifdef CONFIG_IPV6
703 else if (skb->protocol == htons(ETH_P_IPV6)) {
704 struct in6_addr *addr6;
705 int addr_type;
706 struct neighbour *neigh = skb_dst(skb)->neighbour;
707
708 if (neigh == NULL)
709 goto tx_error;
710
711 addr6 = (struct in6_addr *)&neigh->primary_key;
712 addr_type = ipv6_addr_type(addr6);
713
714 if (addr_type == IPV6_ADDR_ANY) {
715 addr6 = &ipv6_hdr(skb)->daddr;
716 addr_type = ipv6_addr_type(addr6);
717 }
718
719 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
720 goto tx_error_icmp;
721
722 dst = addr6->s6_addr32[3];
723 }
724#endif
725 else
726 goto tx_error;
727 }
728
729 tos = tiph->tos;
730 if (tos == 1) {
731 tos = 0;
732 if (skb->protocol == htons(ETH_P_IP))
733 tos = old_iph->tos;
734 }
735
736 {
737 struct flowi fl = { .oif = tunnel->parms.link,
738 .nl_u = { .ip4_u =
739 { .daddr = dst,
740 .saddr = tiph->saddr,
741 .tos = RT_TOS(tos) } },
742 .proto = IPPROTO_GRE };
743 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
744 stats->tx_carrier_errors++;
745 goto tx_error;
746 }
747 }
748 tdev = rt->u.dst.dev;
749
750 if (tdev == dev) {
751 ip_rt_put(rt);
752 stats->collisions++;
753 goto tx_error;
754 }
755
756 df = tiph->frag_off;
757 if (df)
758 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
759 else
760 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
761
762 if (skb_dst(skb))
763 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
764
765 if (skb->protocol == htons(ETH_P_IP)) {
766 df |= (old_iph->frag_off&htons(IP_DF));
767
768 if ((old_iph->frag_off&htons(IP_DF)) &&
769 mtu < ntohs(old_iph->tot_len)) {
770 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
771 ip_rt_put(rt);
772 goto tx_error;
773 }
774 }
775#ifdef CONFIG_IPV6
776 else if (skb->protocol == htons(ETH_P_IPV6)) {
777 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
778
779 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
780 if ((tunnel->parms.iph.daddr &&
781 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
782 rt6->rt6i_dst.plen == 128) {
783 rt6->rt6i_flags |= RTF_MODIFIED;
784 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
785 }
786 }
787
788 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
789 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
790 ip_rt_put(rt);
791 goto tx_error;
792 }
793 }
794#endif
795
796 if (tunnel->err_count > 0) {
797 if (time_before(jiffies,
798 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
799 tunnel->err_count--;
800
801 dst_link_failure(skb);
802 } else
803 tunnel->err_count = 0;
804 }
805
806 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
807
808 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
809 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
810 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
811 if (max_headroom > dev->needed_headroom)
812 dev->needed_headroom = max_headroom;
813 if (!new_skb) {
814 ip_rt_put(rt);
815 txq->tx_dropped++;
816 dev_kfree_skb(skb);
817 return NETDEV_TX_OK;
818 }
819 if (skb->sk)
820 skb_set_owner_w(new_skb, skb->sk);
821 dev_kfree_skb(skb);
822 skb = new_skb;
823 old_iph = ip_hdr(skb);
824 }
825
826 skb_reset_transport_header(skb);
827 skb_push(skb, gre_hlen);
828 skb_reset_network_header(skb);
829 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
830 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
831 IPSKB_REROUTED);
832 skb_dst_drop(skb);
833 skb_dst_set(skb, &rt->u.dst);
834
835 /*
836 * Push down and install the IPIP header.
837 */
838
839 iph = ip_hdr(skb);
840 iph->version = 4;
841 iph->ihl = sizeof(struct iphdr) >> 2;
842 iph->frag_off = df;
843 iph->protocol = IPPROTO_GRE;
844 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
845 iph->daddr = rt->rt_dst;
846 iph->saddr = rt->rt_src;
847
848 if ((iph->ttl = tiph->ttl) == 0) {
849 if (skb->protocol == htons(ETH_P_IP))
850 iph->ttl = old_iph->ttl;
851#ifdef CONFIG_IPV6
852 else if (skb->protocol == htons(ETH_P_IPV6))
853 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
854#endif
855 else
856 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
857 }
858
859 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
860 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
861 htons(ETH_P_TEB) : skb->protocol;
862
863 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
864 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
865
866 if (tunnel->parms.o_flags&GRE_SEQ) {
867 ++tunnel->o_seqno;
868 *ptr = htonl(tunnel->o_seqno);
869 ptr--;
870 }
871 if (tunnel->parms.o_flags&GRE_KEY) {
872 *ptr = tunnel->parms.o_key;
873 ptr--;
874 }
875 if (tunnel->parms.o_flags&GRE_CSUM) {
876 *ptr = 0;
877 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
878 }
879 }
880
881 nf_reset(skb);
882
883 IPTUNNEL_XMIT();
884 return NETDEV_TX_OK;
885
886tx_error_icmp:
887 dst_link_failure(skb);
888
889tx_error:
890 stats->tx_errors++;
891 dev_kfree_skb(skb);
892 return NETDEV_TX_OK;
893}
894
895static int ipgre_tunnel_bind_dev(struct net_device *dev)
896{
897 struct net_device *tdev = NULL;
898 struct ip_tunnel *tunnel;
899 struct iphdr *iph;
900 int hlen = LL_MAX_HEADER;
901 int mtu = ETH_DATA_LEN;
902 int addend = sizeof(struct iphdr) + 4;
903
904 tunnel = netdev_priv(dev);
905 iph = &tunnel->parms.iph;
906
907 /* Guess output device to choose reasonable mtu and needed_headroom */
908
909 if (iph->daddr) {
910 struct flowi fl = { .oif = tunnel->parms.link,
911 .nl_u = { .ip4_u =
912 { .daddr = iph->daddr,
913 .saddr = iph->saddr,
914 .tos = RT_TOS(iph->tos) } },
915 .proto = IPPROTO_GRE };
916 struct rtable *rt;
917 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
918 tdev = rt->u.dst.dev;
919 ip_rt_put(rt);
920 }
921
922 if (dev->type != ARPHRD_ETHER)
923 dev->flags |= IFF_POINTOPOINT;
924 }
925
926 if (!tdev && tunnel->parms.link)
927 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
928
929 if (tdev) {
930 hlen = tdev->hard_header_len + tdev->needed_headroom;
931 mtu = tdev->mtu;
932 }
933 dev->iflink = tunnel->parms.link;
934
935 /* Precalculate GRE options length */
936 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
937 if (tunnel->parms.o_flags&GRE_CSUM)
938 addend += 4;
939 if (tunnel->parms.o_flags&GRE_KEY)
940 addend += 4;
941 if (tunnel->parms.o_flags&GRE_SEQ)
942 addend += 4;
943 }
944 dev->needed_headroom = addend + hlen;
945 mtu -= dev->hard_header_len + addend;
946
947 if (mtu < 68)
948 mtu = 68;
949
950 tunnel->hlen = addend;
951
952 return mtu;
953}
954
955static int
956ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
957{
958 int err = 0;
959 struct ip_tunnel_parm p;
960 struct ip_tunnel *t;
961 struct net *net = dev_net(dev);
962 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
963
964 switch (cmd) {
965 case SIOCGETTUNNEL:
966 t = NULL;
967 if (dev == ign->fb_tunnel_dev) {
968 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
969 err = -EFAULT;
970 break;
971 }
972 t = ipgre_tunnel_locate(net, &p, 0);
973 }
974 if (t == NULL)
975 t = netdev_priv(dev);
976 memcpy(&p, &t->parms, sizeof(p));
977 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
978 err = -EFAULT;
979 break;
980
981 case SIOCADDTUNNEL:
982 case SIOCCHGTUNNEL:
983 err = -EPERM;
984 if (!capable(CAP_NET_ADMIN))
985 goto done;
986
987 err = -EFAULT;
988 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
989 goto done;
990
991 err = -EINVAL;
992 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
993 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
994 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
995 goto done;
996 if (p.iph.ttl)
997 p.iph.frag_off |= htons(IP_DF);
998
999 if (!(p.i_flags&GRE_KEY))
1000 p.i_key = 0;
1001 if (!(p.o_flags&GRE_KEY))
1002 p.o_key = 0;
1003
1004 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1005
1006 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1007 if (t != NULL) {
1008 if (t->dev != dev) {
1009 err = -EEXIST;
1010 break;
1011 }
1012 } else {
1013 unsigned nflags = 0;
1014
1015 t = netdev_priv(dev);
1016
1017 if (ipv4_is_multicast(p.iph.daddr))
1018 nflags = IFF_BROADCAST;
1019 else if (p.iph.daddr)
1020 nflags = IFF_POINTOPOINT;
1021
1022 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1023 err = -EINVAL;
1024 break;
1025 }
1026 ipgre_tunnel_unlink(ign, t);
1027 t->parms.iph.saddr = p.iph.saddr;
1028 t->parms.iph.daddr = p.iph.daddr;
1029 t->parms.i_key = p.i_key;
1030 t->parms.o_key = p.o_key;
1031 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1032 memcpy(dev->broadcast, &p.iph.daddr, 4);
1033 ipgre_tunnel_link(ign, t);
1034 netdev_state_change(dev);
1035 }
1036 }
1037
1038 if (t) {
1039 err = 0;
1040 if (cmd == SIOCCHGTUNNEL) {
1041 t->parms.iph.ttl = p.iph.ttl;
1042 t->parms.iph.tos = p.iph.tos;
1043 t->parms.iph.frag_off = p.iph.frag_off;
1044 if (t->parms.link != p.link) {
1045 t->parms.link = p.link;
1046 dev->mtu = ipgre_tunnel_bind_dev(dev);
1047 netdev_state_change(dev);
1048 }
1049 }
1050 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1051 err = -EFAULT;
1052 } else
1053 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1054 break;
1055
1056 case SIOCDELTUNNEL:
1057 err = -EPERM;
1058 if (!capable(CAP_NET_ADMIN))
1059 goto done;
1060
1061 if (dev == ign->fb_tunnel_dev) {
1062 err = -EFAULT;
1063 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1064 goto done;
1065 err = -ENOENT;
1066 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1067 goto done;
1068 err = -EPERM;
1069 if (t == netdev_priv(ign->fb_tunnel_dev))
1070 goto done;
1071 dev = t->dev;
1072 }
1073 unregister_netdevice(dev);
1074 err = 0;
1075 break;
1076
1077 default:
1078 err = -EINVAL;
1079 }
1080
1081done:
1082 return err;
1083}
1084
1085static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1086{
1087 struct ip_tunnel *tunnel = netdev_priv(dev);
1088 if (new_mtu < 68 ||
1089 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1090 return -EINVAL;
1091 dev->mtu = new_mtu;
1092 return 0;
1093}
1094
1095/* Nice toy. Unfortunately, useless in real life :-)
1096 It allows to construct virtual multiprotocol broadcast "LAN"
1097 over the Internet, provided multicast routing is tuned.
1098
1099
1100 I have no idea was this bicycle invented before me,
1101 so that I had to set ARPHRD_IPGRE to a random value.
1102 I have an impression, that Cisco could make something similar,
1103 but this feature is apparently missing in IOS<=11.2(8).
1104
1105 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1106 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1107
1108 ping -t 255 224.66.66.66
1109
1110 If nobody answers, mbone does not work.
1111
1112 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1113 ip addr add 10.66.66.<somewhat>/24 dev Universe
1114 ifconfig Universe up
1115 ifconfig Universe add fe80::<Your_real_addr>/10
1116 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1117 ftp 10.66.66.66
1118 ...
1119 ftp fec0:6666:6666::193.233.7.65
1120 ...
1121
1122 */
1123
1124static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1125 unsigned short type,
1126 const void *daddr, const void *saddr, unsigned len)
1127{
1128 struct ip_tunnel *t = netdev_priv(dev);
1129 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1130 __be16 *p = (__be16*)(iph+1);
1131
1132 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1133 p[0] = t->parms.o_flags;
1134 p[1] = htons(type);
1135
1136 /*
1137 * Set the source hardware address.
1138 */
1139
1140 if (saddr)
1141 memcpy(&iph->saddr, saddr, 4);
1142 if (daddr)
1143 memcpy(&iph->daddr, daddr, 4);
1144 if (iph->daddr)
1145 return t->hlen;
1146
1147 return -t->hlen;
1148}
1149
1150static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1151{
1152 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1153 memcpy(haddr, &iph->saddr, 4);
1154 return 4;
1155}
1156
1157static const struct header_ops ipgre_header_ops = {
1158 .create = ipgre_header,
1159 .parse = ipgre_header_parse,
1160};
1161
1162#ifdef CONFIG_NET_IPGRE_BROADCAST
1163static int ipgre_open(struct net_device *dev)
1164{
1165 struct ip_tunnel *t = netdev_priv(dev);
1166
1167 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1168 struct flowi fl = { .oif = t->parms.link,
1169 .nl_u = { .ip4_u =
1170 { .daddr = t->parms.iph.daddr,
1171 .saddr = t->parms.iph.saddr,
1172 .tos = RT_TOS(t->parms.iph.tos) } },
1173 .proto = IPPROTO_GRE };
1174 struct rtable *rt;
1175 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1176 return -EADDRNOTAVAIL;
1177 dev = rt->u.dst.dev;
1178 ip_rt_put(rt);
1179 if (__in_dev_get_rtnl(dev) == NULL)
1180 return -EADDRNOTAVAIL;
1181 t->mlink = dev->ifindex;
1182 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1183 }
1184 return 0;
1185}
1186
1187static int ipgre_close(struct net_device *dev)
1188{
1189 struct ip_tunnel *t = netdev_priv(dev);
1190
1191 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1192 struct in_device *in_dev;
1193 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1194 if (in_dev) {
1195 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1196 in_dev_put(in_dev);
1197 }
1198 }
1199 return 0;
1200}
1201
1202#endif
1203
1204static const struct net_device_ops ipgre_netdev_ops = {
1205 .ndo_init = ipgre_tunnel_init,
1206 .ndo_uninit = ipgre_tunnel_uninit,
1207#ifdef CONFIG_NET_IPGRE_BROADCAST
1208 .ndo_open = ipgre_open,
1209 .ndo_stop = ipgre_close,
1210#endif
1211 .ndo_start_xmit = ipgre_tunnel_xmit,
1212 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1213 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1214};
1215
1216static void ipgre_tunnel_setup(struct net_device *dev)
1217{
1218 dev->netdev_ops = &ipgre_netdev_ops;
1219 dev->destructor = free_netdev;
1220
1221 dev->type = ARPHRD_IPGRE;
1222 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1223 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1224 dev->flags = IFF_NOARP;
1225 dev->iflink = 0;
1226 dev->addr_len = 4;
1227 dev->features |= NETIF_F_NETNS_LOCAL;
1228 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1229}
1230
1231static int ipgre_tunnel_init(struct net_device *dev)
1232{
1233 struct ip_tunnel *tunnel;
1234 struct iphdr *iph;
1235
1236 tunnel = netdev_priv(dev);
1237 iph = &tunnel->parms.iph;
1238
1239 tunnel->dev = dev;
1240 strcpy(tunnel->parms.name, dev->name);
1241
1242 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1243 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1244
1245 if (iph->daddr) {
1246#ifdef CONFIG_NET_IPGRE_BROADCAST
1247 if (ipv4_is_multicast(iph->daddr)) {
1248 if (!iph->saddr)
1249 return -EINVAL;
1250 dev->flags = IFF_BROADCAST;
1251 dev->header_ops = &ipgre_header_ops;
1252 }
1253#endif
1254 } else
1255 dev->header_ops = &ipgre_header_ops;
1256
1257 return 0;
1258}
1259
1260static void ipgre_fb_tunnel_init(struct net_device *dev)
1261{
1262 struct ip_tunnel *tunnel = netdev_priv(dev);
1263 struct iphdr *iph = &tunnel->parms.iph;
1264 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1265
1266 tunnel->dev = dev;
1267 strcpy(tunnel->parms.name, dev->name);
1268
1269 iph->version = 4;
1270 iph->protocol = IPPROTO_GRE;
1271 iph->ihl = 5;
1272 tunnel->hlen = sizeof(struct iphdr) + 4;
1273
1274 dev_hold(dev);
1275 ign->tunnels_wc[0] = tunnel;
1276}
1277
1278
1279static const struct net_protocol ipgre_protocol = {
1280 .handler = ipgre_rcv,
1281 .err_handler = ipgre_err,
1282 .netns_ok = 1,
1283};
1284
1285static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1286{
1287 int prio;
1288
1289 for (prio = 0; prio < 4; prio++) {
1290 int h;
1291 for (h = 0; h < HASH_SIZE; h++) {
1292 struct ip_tunnel *t = ign->tunnels[prio][h];
1293
1294 while (t != NULL) {
1295 unregister_netdevice_queue(t->dev, head);
1296 t = t->next;
1297 }
1298 }
1299 }
1300}
1301
1302static int __net_init ipgre_init_net(struct net *net)
1303{
1304 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1305 int err;
1306
1307 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1308 ipgre_tunnel_setup);
1309 if (!ign->fb_tunnel_dev) {
1310 err = -ENOMEM;
1311 goto err_alloc_dev;
1312 }
1313 dev_net_set(ign->fb_tunnel_dev, net);
1314
1315 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1316 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1317
1318 if ((err = register_netdev(ign->fb_tunnel_dev)))
1319 goto err_reg_dev;
1320
1321 return 0;
1322
1323err_reg_dev:
1324 free_netdev(ign->fb_tunnel_dev);
1325err_alloc_dev:
1326 return err;
1327}
1328
1329static void __net_exit ipgre_exit_net(struct net *net)
1330{
1331 struct ipgre_net *ign;
1332 LIST_HEAD(list);
1333
1334 ign = net_generic(net, ipgre_net_id);
1335 rtnl_lock();
1336 ipgre_destroy_tunnels(ign, &list);
1337 unregister_netdevice_many(&list);
1338 rtnl_unlock();
1339}
1340
1341static struct pernet_operations ipgre_net_ops = {
1342 .init = ipgre_init_net,
1343 .exit = ipgre_exit_net,
1344 .id = &ipgre_net_id,
1345 .size = sizeof(struct ipgre_net),
1346};
1347
1348static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1349{
1350 __be16 flags;
1351
1352 if (!data)
1353 return 0;
1354
1355 flags = 0;
1356 if (data[IFLA_GRE_IFLAGS])
1357 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1358 if (data[IFLA_GRE_OFLAGS])
1359 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1360 if (flags & (GRE_VERSION|GRE_ROUTING))
1361 return -EINVAL;
1362
1363 return 0;
1364}
1365
1366static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1367{
1368 __be32 daddr;
1369
1370 if (tb[IFLA_ADDRESS]) {
1371 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1372 return -EINVAL;
1373 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1374 return -EADDRNOTAVAIL;
1375 }
1376
1377 if (!data)
1378 goto out;
1379
1380 if (data[IFLA_GRE_REMOTE]) {
1381 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1382 if (!daddr)
1383 return -EINVAL;
1384 }
1385
1386out:
1387 return ipgre_tunnel_validate(tb, data);
1388}
1389
1390static void ipgre_netlink_parms(struct nlattr *data[],
1391 struct ip_tunnel_parm *parms)
1392{
1393 memset(parms, 0, sizeof(*parms));
1394
1395 parms->iph.protocol = IPPROTO_GRE;
1396
1397 if (!data)
1398 return;
1399
1400 if (data[IFLA_GRE_LINK])
1401 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1402
1403 if (data[IFLA_GRE_IFLAGS])
1404 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1405
1406 if (data[IFLA_GRE_OFLAGS])
1407 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1408
1409 if (data[IFLA_GRE_IKEY])
1410 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1411
1412 if (data[IFLA_GRE_OKEY])
1413 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1414
1415 if (data[IFLA_GRE_LOCAL])
1416 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1417
1418 if (data[IFLA_GRE_REMOTE])
1419 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1420
1421 if (data[IFLA_GRE_TTL])
1422 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1423
1424 if (data[IFLA_GRE_TOS])
1425 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1426
1427 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1428 parms->iph.frag_off = htons(IP_DF);
1429}
1430
1431static int ipgre_tap_init(struct net_device *dev)
1432{
1433 struct ip_tunnel *tunnel;
1434
1435 tunnel = netdev_priv(dev);
1436
1437 tunnel->dev = dev;
1438 strcpy(tunnel->parms.name, dev->name);
1439
1440 ipgre_tunnel_bind_dev(dev);
1441
1442 return 0;
1443}
1444
1445static const struct net_device_ops ipgre_tap_netdev_ops = {
1446 .ndo_init = ipgre_tap_init,
1447 .ndo_uninit = ipgre_tunnel_uninit,
1448 .ndo_start_xmit = ipgre_tunnel_xmit,
1449 .ndo_set_mac_address = eth_mac_addr,
1450 .ndo_validate_addr = eth_validate_addr,
1451 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1452};
1453
1454static void ipgre_tap_setup(struct net_device *dev)
1455{
1456
1457 ether_setup(dev);
1458
1459 dev->netdev_ops = &ipgre_tap_netdev_ops;
1460 dev->destructor = free_netdev;
1461
1462 dev->iflink = 0;
1463 dev->features |= NETIF_F_NETNS_LOCAL;
1464}
1465
1466static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1467 struct nlattr *data[])
1468{
1469 struct ip_tunnel *nt;
1470 struct net *net = dev_net(dev);
1471 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1472 int mtu;
1473 int err;
1474
1475 nt = netdev_priv(dev);
1476 ipgre_netlink_parms(data, &nt->parms);
1477
1478 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1479 return -EEXIST;
1480
1481 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1482 random_ether_addr(dev->dev_addr);
1483
1484 mtu = ipgre_tunnel_bind_dev(dev);
1485 if (!tb[IFLA_MTU])
1486 dev->mtu = mtu;
1487
1488 err = register_netdevice(dev);
1489 if (err)
1490 goto out;
1491
1492 dev_hold(dev);
1493 ipgre_tunnel_link(ign, nt);
1494
1495out:
1496 return err;
1497}
1498
1499static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1500 struct nlattr *data[])
1501{
1502 struct ip_tunnel *t, *nt;
1503 struct net *net = dev_net(dev);
1504 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1505 struct ip_tunnel_parm p;
1506 int mtu;
1507
1508 if (dev == ign->fb_tunnel_dev)
1509 return -EINVAL;
1510
1511 nt = netdev_priv(dev);
1512 ipgre_netlink_parms(data, &p);
1513
1514 t = ipgre_tunnel_locate(net, &p, 0);
1515
1516 if (t) {
1517 if (t->dev != dev)
1518 return -EEXIST;
1519 } else {
1520 t = nt;
1521
1522 if (dev->type != ARPHRD_ETHER) {
1523 unsigned nflags = 0;
1524
1525 if (ipv4_is_multicast(p.iph.daddr))
1526 nflags = IFF_BROADCAST;
1527 else if (p.iph.daddr)
1528 nflags = IFF_POINTOPOINT;
1529
1530 if ((dev->flags ^ nflags) &
1531 (IFF_POINTOPOINT | IFF_BROADCAST))
1532 return -EINVAL;
1533 }
1534
1535 ipgre_tunnel_unlink(ign, t);
1536 t->parms.iph.saddr = p.iph.saddr;
1537 t->parms.iph.daddr = p.iph.daddr;
1538 t->parms.i_key = p.i_key;
1539 if (dev->type != ARPHRD_ETHER) {
1540 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1541 memcpy(dev->broadcast, &p.iph.daddr, 4);
1542 }
1543 ipgre_tunnel_link(ign, t);
1544 netdev_state_change(dev);
1545 }
1546
1547 t->parms.o_key = p.o_key;
1548 t->parms.iph.ttl = p.iph.ttl;
1549 t->parms.iph.tos = p.iph.tos;
1550 t->parms.iph.frag_off = p.iph.frag_off;
1551
1552 if (t->parms.link != p.link) {
1553 t->parms.link = p.link;
1554 mtu = ipgre_tunnel_bind_dev(dev);
1555 if (!tb[IFLA_MTU])
1556 dev->mtu = mtu;
1557 netdev_state_change(dev);
1558 }
1559
1560 return 0;
1561}
1562
1563static size_t ipgre_get_size(const struct net_device *dev)
1564{
1565 return
1566 /* IFLA_GRE_LINK */
1567 nla_total_size(4) +
1568 /* IFLA_GRE_IFLAGS */
1569 nla_total_size(2) +
1570 /* IFLA_GRE_OFLAGS */
1571 nla_total_size(2) +
1572 /* IFLA_GRE_IKEY */
1573 nla_total_size(4) +
1574 /* IFLA_GRE_OKEY */
1575 nla_total_size(4) +
1576 /* IFLA_GRE_LOCAL */
1577 nla_total_size(4) +
1578 /* IFLA_GRE_REMOTE */
1579 nla_total_size(4) +
1580 /* IFLA_GRE_TTL */
1581 nla_total_size(1) +
1582 /* IFLA_GRE_TOS */
1583 nla_total_size(1) +
1584 /* IFLA_GRE_PMTUDISC */
1585 nla_total_size(1) +
1586 0;
1587}
1588
1589static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1590{
1591 struct ip_tunnel *t = netdev_priv(dev);
1592 struct ip_tunnel_parm *p = &t->parms;
1593
1594 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1595 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1596 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1597 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1598 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1599 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1600 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1601 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1602 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1603 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1604
1605 return 0;
1606
1607nla_put_failure:
1608 return -EMSGSIZE;
1609}
1610
1611static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1612 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1613 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1614 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1615 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1616 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1617 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1618 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1619 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1620 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1621 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1622};
1623
1624static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1625 .kind = "gre",
1626 .maxtype = IFLA_GRE_MAX,
1627 .policy = ipgre_policy,
1628 .priv_size = sizeof(struct ip_tunnel),
1629 .setup = ipgre_tunnel_setup,
1630 .validate = ipgre_tunnel_validate,
1631 .newlink = ipgre_newlink,
1632 .changelink = ipgre_changelink,
1633 .get_size = ipgre_get_size,
1634 .fill_info = ipgre_fill_info,
1635};
1636
1637static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1638 .kind = "gretap",
1639 .maxtype = IFLA_GRE_MAX,
1640 .policy = ipgre_policy,
1641 .priv_size = sizeof(struct ip_tunnel),
1642 .setup = ipgre_tap_setup,
1643 .validate = ipgre_tap_validate,
1644 .newlink = ipgre_newlink,
1645 .changelink = ipgre_changelink,
1646 .get_size = ipgre_get_size,
1647 .fill_info = ipgre_fill_info,
1648};
1649
1650/*
1651 * And now the modules code and kernel interface.
1652 */
1653
1654static int __init ipgre_init(void)
1655{
1656 int err;
1657
1658 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1659
1660 err = register_pernet_device(&ipgre_net_ops);
1661 if (err < 0)
1662 return err;
1663
1664 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1665 if (err < 0) {
1666 printk(KERN_INFO "ipgre init: can't add protocol\n");
1667 goto add_proto_failed;
1668 }
1669
1670 err = rtnl_link_register(&ipgre_link_ops);
1671 if (err < 0)
1672 goto rtnl_link_failed;
1673
1674 err = rtnl_link_register(&ipgre_tap_ops);
1675 if (err < 0)
1676 goto tap_ops_failed;
1677
1678out:
1679 return err;
1680
1681tap_ops_failed:
1682 rtnl_link_unregister(&ipgre_link_ops);
1683rtnl_link_failed:
1684 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1685add_proto_failed:
1686 unregister_pernet_device(&ipgre_net_ops);
1687 goto out;
1688}
1689
1690static void __exit ipgre_fini(void)
1691{
1692 rtnl_link_unregister(&ipgre_tap_ops);
1693 rtnl_link_unregister(&ipgre_link_ops);
1694 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1695 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1696 unregister_pernet_device(&ipgre_net_ops);
1697}
1698
1699module_init(ipgre_init);
1700module_exit(ipgre_fini);
1701MODULE_LICENSE("GPL");
1702MODULE_ALIAS_RTNL_LINK("gre");
1703MODULE_ALIAS_RTNL_LINK("gretap");