]> bbs.cooldavid.org Git - net-next-2.6.git/blame_incremental - net/ipv4/ip_gre.c
ipip: Optimize multiple unregistration
[net-next-2.6.git] / net / ipv4 / ip_gre.c
... / ...
CommitLineData
1/*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/capability.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
30#include <linux/etherdevice.h>
31#include <linux/if_ether.h>
32
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
45#include <net/rtnetlink.h>
46
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
69 Current solution: HARD_TX_LOCK lock breaks dead loops.
70
71
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107
108
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
115
116 Alexey Kuznetsov.
117 */
118
119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
122static int ipgre_tunnel_bind_dev(struct net_device *dev);
123
124/* Fallback tunnel: no source, no destination, no key, no options */
125
126#define HASH_SIZE 16
127
128static int ipgre_net_id;
129struct ipgre_net {
130 struct ip_tunnel *tunnels[4][HASH_SIZE];
131
132 struct net_device *fb_tunnel_dev;
133};
134
135/* Tunnel hash table */
136
137/*
138 4 hash tables:
139
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
144
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
148
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
151 */
152
153#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
154
155#define tunnels_r_l tunnels[3]
156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0]
159/*
160 * Locking : hash tables are protected by RCU and a spinlock
161 */
162static DEFINE_SPINLOCK(ipgre_lock);
163
164#define for_each_ip_tunnel_rcu(start) \
165 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
169static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
172{
173 struct net *net = dev_net(dev);
174 int link = dev->ifindex;
175 unsigned h0 = HASH(remote);
176 unsigned h1 = HASH(key);
177 struct ip_tunnel *t, *cand = NULL;
178 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
179 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
180 ARPHRD_ETHER : ARPHRD_IPGRE;
181 int score, cand_score = 4;
182
183 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
184 if (local != t->parms.iph.saddr ||
185 remote != t->parms.iph.daddr ||
186 key != t->parms.i_key ||
187 !(t->dev->flags & IFF_UP))
188 continue;
189
190 if (t->dev->type != ARPHRD_IPGRE &&
191 t->dev->type != dev_type)
192 continue;
193
194 score = 0;
195 if (t->parms.link != link)
196 score |= 1;
197 if (t->dev->type != dev_type)
198 score |= 2;
199 if (score == 0)
200 return t;
201
202 if (score < cand_score) {
203 cand = t;
204 cand_score = score;
205 }
206 }
207
208 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
209 if (remote != t->parms.iph.daddr ||
210 key != t->parms.i_key ||
211 !(t->dev->flags & IFF_UP))
212 continue;
213
214 if (t->dev->type != ARPHRD_IPGRE &&
215 t->dev->type != dev_type)
216 continue;
217
218 score = 0;
219 if (t->parms.link != link)
220 score |= 1;
221 if (t->dev->type != dev_type)
222 score |= 2;
223 if (score == 0)
224 return t;
225
226 if (score < cand_score) {
227 cand = t;
228 cand_score = score;
229 }
230 }
231
232 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
233 if ((local != t->parms.iph.saddr &&
234 (local != t->parms.iph.daddr ||
235 !ipv4_is_multicast(local))) ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
238 continue;
239
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
242 continue;
243
244 score = 0;
245 if (t->parms.link != link)
246 score |= 1;
247 if (t->dev->type != dev_type)
248 score |= 2;
249 if (score == 0)
250 return t;
251
252 if (score < cand_score) {
253 cand = t;
254 cand_score = score;
255 }
256 }
257
258 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
259 if (t->parms.i_key != key ||
260 !(t->dev->flags & IFF_UP))
261 continue;
262
263 if (t->dev->type != ARPHRD_IPGRE &&
264 t->dev->type != dev_type)
265 continue;
266
267 score = 0;
268 if (t->parms.link != link)
269 score |= 1;
270 if (t->dev->type != dev_type)
271 score |= 2;
272 if (score == 0)
273 return t;
274
275 if (score < cand_score) {
276 cand = t;
277 cand_score = score;
278 }
279 }
280
281 if (cand != NULL)
282 return cand;
283
284 dev = ign->fb_tunnel_dev;
285 if (dev->flags & IFF_UP)
286 return netdev_priv(dev);
287
288 return NULL;
289}
290
291static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
292 struct ip_tunnel_parm *parms)
293{
294 __be32 remote = parms->iph.daddr;
295 __be32 local = parms->iph.saddr;
296 __be32 key = parms->i_key;
297 unsigned h = HASH(key);
298 int prio = 0;
299
300 if (local)
301 prio |= 1;
302 if (remote && !ipv4_is_multicast(remote)) {
303 prio |= 2;
304 h ^= HASH(remote);
305 }
306
307 return &ign->tunnels[prio][h];
308}
309
310static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
311 struct ip_tunnel *t)
312{
313 return __ipgre_bucket(ign, &t->parms);
314}
315
316static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
317{
318 struct ip_tunnel **tp = ipgre_bucket(ign, t);
319
320 spin_lock_bh(&ipgre_lock);
321 t->next = *tp;
322 rcu_assign_pointer(*tp, t);
323 spin_unlock_bh(&ipgre_lock);
324}
325
326static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
327{
328 struct ip_tunnel **tp;
329
330 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
331 if (t == *tp) {
332 spin_lock_bh(&ipgre_lock);
333 *tp = t->next;
334 spin_unlock_bh(&ipgre_lock);
335 break;
336 }
337 }
338}
339
340static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
341 struct ip_tunnel_parm *parms,
342 int type)
343{
344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
347 int link = parms->link;
348 struct ip_tunnel *t, **tp;
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350
351 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
352 if (local == t->parms.iph.saddr &&
353 remote == t->parms.iph.daddr &&
354 key == t->parms.i_key &&
355 link == t->parms.link &&
356 type == t->dev->type)
357 break;
358
359 return t;
360}
361
362static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
363 struct ip_tunnel_parm *parms, int create)
364{
365 struct ip_tunnel *t, *nt;
366 struct net_device *dev;
367 char name[IFNAMSIZ];
368 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
369
370 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
371 if (t || !create)
372 return t;
373
374 if (parms->name[0])
375 strlcpy(name, parms->name, IFNAMSIZ);
376 else
377 sprintf(name, "gre%%d");
378
379 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
380 if (!dev)
381 return NULL;
382
383 dev_net_set(dev, net);
384
385 if (strchr(name, '%')) {
386 if (dev_alloc_name(dev, name) < 0)
387 goto failed_free;
388 }
389
390 nt = netdev_priv(dev);
391 nt->parms = *parms;
392 dev->rtnl_link_ops = &ipgre_link_ops;
393
394 dev->mtu = ipgre_tunnel_bind_dev(dev);
395
396 if (register_netdevice(dev) < 0)
397 goto failed_free;
398
399 dev_hold(dev);
400 ipgre_tunnel_link(ign, nt);
401 return nt;
402
403failed_free:
404 free_netdev(dev);
405 return NULL;
406}
407
408static void ipgre_tunnel_uninit(struct net_device *dev)
409{
410 struct net *net = dev_net(dev);
411 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
412
413 ipgre_tunnel_unlink(ign, netdev_priv(dev));
414 dev_put(dev);
415}
416
417
418static void ipgre_err(struct sk_buff *skb, u32 info)
419{
420
421/* All the routers (except for Linux) return only
422 8 bytes of packet payload. It means, that precise relaying of
423 ICMP in the real Internet is absolutely infeasible.
424
425 Moreover, Cisco "wise men" put GRE key to the third word
426 in GRE header. It makes impossible maintaining even soft state for keyed
427 GRE tunnels with enabled checksum. Tell them "thank you".
428
429 Well, I wonder, rfc1812 was written by Cisco employee,
430 what the hell these idiots break standrads established
431 by themself???
432 */
433
434 struct iphdr *iph = (struct iphdr *)skb->data;
435 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
436 int grehlen = (iph->ihl<<2) + 4;
437 const int type = icmp_hdr(skb)->type;
438 const int code = icmp_hdr(skb)->code;
439 struct ip_tunnel *t;
440 __be16 flags;
441
442 flags = p[0];
443 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
444 if (flags&(GRE_VERSION|GRE_ROUTING))
445 return;
446 if (flags&GRE_KEY) {
447 grehlen += 4;
448 if (flags&GRE_CSUM)
449 grehlen += 4;
450 }
451 }
452
453 /* If only 8 bytes returned, keyed message will be dropped here */
454 if (skb_headlen(skb) < grehlen)
455 return;
456
457 switch (type) {
458 default:
459 case ICMP_PARAMETERPROB:
460 return;
461
462 case ICMP_DEST_UNREACH:
463 switch (code) {
464 case ICMP_SR_FAILED:
465 case ICMP_PORT_UNREACH:
466 /* Impossible event. */
467 return;
468 case ICMP_FRAG_NEEDED:
469 /* Soft state for pmtu is maintained by IP core. */
470 return;
471 default:
472 /* All others are translated to HOST_UNREACH.
473 rfc2003 contains "deep thoughts" about NET_UNREACH,
474 I believe they are just ether pollution. --ANK
475 */
476 break;
477 }
478 break;
479 case ICMP_TIME_EXCEEDED:
480 if (code != ICMP_EXC_TTL)
481 return;
482 break;
483 }
484
485 rcu_read_lock();
486 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
487 flags & GRE_KEY ?
488 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
489 p[1]);
490 if (t == NULL || t->parms.iph.daddr == 0 ||
491 ipv4_is_multicast(t->parms.iph.daddr))
492 goto out;
493
494 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
495 goto out;
496
497 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
498 t->err_count++;
499 else
500 t->err_count = 1;
501 t->err_time = jiffies;
502out:
503 rcu_read_unlock();
504 return;
505}
506
507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
508{
509 if (INET_ECN_is_ce(iph->tos)) {
510 if (skb->protocol == htons(ETH_P_IP)) {
511 IP_ECN_set_ce(ip_hdr(skb));
512 } else if (skb->protocol == htons(ETH_P_IPV6)) {
513 IP6_ECN_set_ce(ipv6_hdr(skb));
514 }
515 }
516}
517
518static inline u8
519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
520{
521 u8 inner = 0;
522 if (skb->protocol == htons(ETH_P_IP))
523 inner = old_iph->tos;
524 else if (skb->protocol == htons(ETH_P_IPV6))
525 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
526 return INET_ECN_encapsulate(tos, inner);
527}
528
529static int ipgre_rcv(struct sk_buff *skb)
530{
531 struct iphdr *iph;
532 u8 *h;
533 __be16 flags;
534 __sum16 csum = 0;
535 __be32 key = 0;
536 u32 seqno = 0;
537 struct ip_tunnel *tunnel;
538 int offset = 4;
539 __be16 gre_proto;
540 unsigned int len;
541
542 if (!pskb_may_pull(skb, 16))
543 goto drop_nolock;
544
545 iph = ip_hdr(skb);
546 h = skb->data;
547 flags = *(__be16*)h;
548
549 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
550 /* - Version must be 0.
551 - We do not support routing headers.
552 */
553 if (flags&(GRE_VERSION|GRE_ROUTING))
554 goto drop_nolock;
555
556 if (flags&GRE_CSUM) {
557 switch (skb->ip_summed) {
558 case CHECKSUM_COMPLETE:
559 csum = csum_fold(skb->csum);
560 if (!csum)
561 break;
562 /* fall through */
563 case CHECKSUM_NONE:
564 skb->csum = 0;
565 csum = __skb_checksum_complete(skb);
566 skb->ip_summed = CHECKSUM_COMPLETE;
567 }
568 offset += 4;
569 }
570 if (flags&GRE_KEY) {
571 key = *(__be32*)(h + offset);
572 offset += 4;
573 }
574 if (flags&GRE_SEQ) {
575 seqno = ntohl(*(__be32*)(h + offset));
576 offset += 4;
577 }
578 }
579
580 gre_proto = *(__be16 *)(h + 2);
581
582 rcu_read_lock();
583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
584 iph->saddr, iph->daddr, key,
585 gre_proto))) {
586 struct net_device_stats *stats = &tunnel->dev->stats;
587
588 secpath_reset(skb);
589
590 skb->protocol = gre_proto;
591 /* WCCP version 1 and 2 protocol decoding.
592 * - Change protocol to IP
593 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
594 */
595 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
596 skb->protocol = htons(ETH_P_IP);
597 if ((*(h + offset) & 0xF0) != 0x40)
598 offset += 4;
599 }
600
601 skb->mac_header = skb->network_header;
602 __pskb_pull(skb, offset);
603 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
604 skb->pkt_type = PACKET_HOST;
605#ifdef CONFIG_NET_IPGRE_BROADCAST
606 if (ipv4_is_multicast(iph->daddr)) {
607 /* Looped back packet, drop it! */
608 if (skb_rtable(skb)->fl.iif == 0)
609 goto drop;
610 stats->multicast++;
611 skb->pkt_type = PACKET_BROADCAST;
612 }
613#endif
614
615 if (((flags&GRE_CSUM) && csum) ||
616 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
617 stats->rx_crc_errors++;
618 stats->rx_errors++;
619 goto drop;
620 }
621 if (tunnel->parms.i_flags&GRE_SEQ) {
622 if (!(flags&GRE_SEQ) ||
623 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
624 stats->rx_fifo_errors++;
625 stats->rx_errors++;
626 goto drop;
627 }
628 tunnel->i_seqno = seqno + 1;
629 }
630
631 len = skb->len;
632
633 /* Warning: All skb pointers will be invalidated! */
634 if (tunnel->dev->type == ARPHRD_ETHER) {
635 if (!pskb_may_pull(skb, ETH_HLEN)) {
636 stats->rx_length_errors++;
637 stats->rx_errors++;
638 goto drop;
639 }
640
641 iph = ip_hdr(skb);
642 skb->protocol = eth_type_trans(skb, tunnel->dev);
643 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
644 }
645
646 stats->rx_packets++;
647 stats->rx_bytes += len;
648 skb->dev = tunnel->dev;
649 skb_dst_drop(skb);
650 nf_reset(skb);
651
652 skb_reset_network_header(skb);
653 ipgre_ecn_decapsulate(iph, skb);
654
655 netif_rx(skb);
656 rcu_read_unlock();
657 return(0);
658 }
659 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
660
661drop:
662 rcu_read_unlock();
663drop_nolock:
664 kfree_skb(skb);
665 return(0);
666}
667
668static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
669{
670 struct ip_tunnel *tunnel = netdev_priv(dev);
671 struct net_device_stats *stats = &dev->stats;
672 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
673 struct iphdr *old_iph = ip_hdr(skb);
674 struct iphdr *tiph;
675 u8 tos;
676 __be16 df;
677 struct rtable *rt; /* Route to the other host */
678 struct net_device *tdev; /* Device to other host */
679 struct iphdr *iph; /* Our new IP header */
680 unsigned int max_headroom; /* The extra header space needed */
681 int gre_hlen;
682 __be32 dst;
683 int mtu;
684
685 if (dev->type == ARPHRD_ETHER)
686 IPCB(skb)->flags = 0;
687
688 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
689 gre_hlen = 0;
690 tiph = (struct iphdr *)skb->data;
691 } else {
692 gre_hlen = tunnel->hlen;
693 tiph = &tunnel->parms.iph;
694 }
695
696 if ((dst = tiph->daddr) == 0) {
697 /* NBMA tunnel */
698
699 if (skb_dst(skb) == NULL) {
700 stats->tx_fifo_errors++;
701 goto tx_error;
702 }
703
704 if (skb->protocol == htons(ETH_P_IP)) {
705 rt = skb_rtable(skb);
706 if ((dst = rt->rt_gateway) == 0)
707 goto tx_error_icmp;
708 }
709#ifdef CONFIG_IPV6
710 else if (skb->protocol == htons(ETH_P_IPV6)) {
711 struct in6_addr *addr6;
712 int addr_type;
713 struct neighbour *neigh = skb_dst(skb)->neighbour;
714
715 if (neigh == NULL)
716 goto tx_error;
717
718 addr6 = (struct in6_addr *)&neigh->primary_key;
719 addr_type = ipv6_addr_type(addr6);
720
721 if (addr_type == IPV6_ADDR_ANY) {
722 addr6 = &ipv6_hdr(skb)->daddr;
723 addr_type = ipv6_addr_type(addr6);
724 }
725
726 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
727 goto tx_error_icmp;
728
729 dst = addr6->s6_addr32[3];
730 }
731#endif
732 else
733 goto tx_error;
734 }
735
736 tos = tiph->tos;
737 if (tos == 1) {
738 tos = 0;
739 if (skb->protocol == htons(ETH_P_IP))
740 tos = old_iph->tos;
741 }
742
743 {
744 struct flowi fl = { .oif = tunnel->parms.link,
745 .nl_u = { .ip4_u =
746 { .daddr = dst,
747 .saddr = tiph->saddr,
748 .tos = RT_TOS(tos) } },
749 .proto = IPPROTO_GRE };
750 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
751 stats->tx_carrier_errors++;
752 goto tx_error;
753 }
754 }
755 tdev = rt->u.dst.dev;
756
757 if (tdev == dev) {
758 ip_rt_put(rt);
759 stats->collisions++;
760 goto tx_error;
761 }
762
763 df = tiph->frag_off;
764 if (df)
765 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
766 else
767 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
768
769 if (skb_dst(skb))
770 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
771
772 if (skb->protocol == htons(ETH_P_IP)) {
773 df |= (old_iph->frag_off&htons(IP_DF));
774
775 if ((old_iph->frag_off&htons(IP_DF)) &&
776 mtu < ntohs(old_iph->tot_len)) {
777 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
778 ip_rt_put(rt);
779 goto tx_error;
780 }
781 }
782#ifdef CONFIG_IPV6
783 else if (skb->protocol == htons(ETH_P_IPV6)) {
784 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
785
786 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
787 if ((tunnel->parms.iph.daddr &&
788 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
789 rt6->rt6i_dst.plen == 128) {
790 rt6->rt6i_flags |= RTF_MODIFIED;
791 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
792 }
793 }
794
795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
797 ip_rt_put(rt);
798 goto tx_error;
799 }
800 }
801#endif
802
803 if (tunnel->err_count > 0) {
804 if (time_before(jiffies,
805 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
806 tunnel->err_count--;
807
808 dst_link_failure(skb);
809 } else
810 tunnel->err_count = 0;
811 }
812
813 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
814
815 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
817 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
818 if (!new_skb) {
819 ip_rt_put(rt);
820 txq->tx_dropped++;
821 dev_kfree_skb(skb);
822 return NETDEV_TX_OK;
823 }
824 if (skb->sk)
825 skb_set_owner_w(new_skb, skb->sk);
826 dev_kfree_skb(skb);
827 skb = new_skb;
828 old_iph = ip_hdr(skb);
829 }
830
831 skb_reset_transport_header(skb);
832 skb_push(skb, gre_hlen);
833 skb_reset_network_header(skb);
834 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
835 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
836 IPSKB_REROUTED);
837 skb_dst_drop(skb);
838 skb_dst_set(skb, &rt->u.dst);
839
840 /*
841 * Push down and install the IPIP header.
842 */
843
844 iph = ip_hdr(skb);
845 iph->version = 4;
846 iph->ihl = sizeof(struct iphdr) >> 2;
847 iph->frag_off = df;
848 iph->protocol = IPPROTO_GRE;
849 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
850 iph->daddr = rt->rt_dst;
851 iph->saddr = rt->rt_src;
852
853 if ((iph->ttl = tiph->ttl) == 0) {
854 if (skb->protocol == htons(ETH_P_IP))
855 iph->ttl = old_iph->ttl;
856#ifdef CONFIG_IPV6
857 else if (skb->protocol == htons(ETH_P_IPV6))
858 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
859#endif
860 else
861 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
862 }
863
864 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
865 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
866 htons(ETH_P_TEB) : skb->protocol;
867
868 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
869 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
870
871 if (tunnel->parms.o_flags&GRE_SEQ) {
872 ++tunnel->o_seqno;
873 *ptr = htonl(tunnel->o_seqno);
874 ptr--;
875 }
876 if (tunnel->parms.o_flags&GRE_KEY) {
877 *ptr = tunnel->parms.o_key;
878 ptr--;
879 }
880 if (tunnel->parms.o_flags&GRE_CSUM) {
881 *ptr = 0;
882 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
883 }
884 }
885
886 nf_reset(skb);
887
888 IPTUNNEL_XMIT();
889 return NETDEV_TX_OK;
890
891tx_error_icmp:
892 dst_link_failure(skb);
893
894tx_error:
895 stats->tx_errors++;
896 dev_kfree_skb(skb);
897 return NETDEV_TX_OK;
898}
899
900static int ipgre_tunnel_bind_dev(struct net_device *dev)
901{
902 struct net_device *tdev = NULL;
903 struct ip_tunnel *tunnel;
904 struct iphdr *iph;
905 int hlen = LL_MAX_HEADER;
906 int mtu = ETH_DATA_LEN;
907 int addend = sizeof(struct iphdr) + 4;
908
909 tunnel = netdev_priv(dev);
910 iph = &tunnel->parms.iph;
911
912 /* Guess output device to choose reasonable mtu and needed_headroom */
913
914 if (iph->daddr) {
915 struct flowi fl = { .oif = tunnel->parms.link,
916 .nl_u = { .ip4_u =
917 { .daddr = iph->daddr,
918 .saddr = iph->saddr,
919 .tos = RT_TOS(iph->tos) } },
920 .proto = IPPROTO_GRE };
921 struct rtable *rt;
922 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
923 tdev = rt->u.dst.dev;
924 ip_rt_put(rt);
925 }
926
927 if (dev->type != ARPHRD_ETHER)
928 dev->flags |= IFF_POINTOPOINT;
929 }
930
931 if (!tdev && tunnel->parms.link)
932 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
933
934 if (tdev) {
935 hlen = tdev->hard_header_len + tdev->needed_headroom;
936 mtu = tdev->mtu;
937 }
938 dev->iflink = tunnel->parms.link;
939
940 /* Precalculate GRE options length */
941 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
942 if (tunnel->parms.o_flags&GRE_CSUM)
943 addend += 4;
944 if (tunnel->parms.o_flags&GRE_KEY)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_SEQ)
947 addend += 4;
948 }
949 dev->needed_headroom = addend + hlen;
950 mtu -= dev->hard_header_len + addend;
951
952 if (mtu < 68)
953 mtu = 68;
954
955 tunnel->hlen = addend;
956
957 return mtu;
958}
959
960static int
961ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
962{
963 int err = 0;
964 struct ip_tunnel_parm p;
965 struct ip_tunnel *t;
966 struct net *net = dev_net(dev);
967 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
968
969 switch (cmd) {
970 case SIOCGETTUNNEL:
971 t = NULL;
972 if (dev == ign->fb_tunnel_dev) {
973 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
974 err = -EFAULT;
975 break;
976 }
977 t = ipgre_tunnel_locate(net, &p, 0);
978 }
979 if (t == NULL)
980 t = netdev_priv(dev);
981 memcpy(&p, &t->parms, sizeof(p));
982 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
983 err = -EFAULT;
984 break;
985
986 case SIOCADDTUNNEL:
987 case SIOCCHGTUNNEL:
988 err = -EPERM;
989 if (!capable(CAP_NET_ADMIN))
990 goto done;
991
992 err = -EFAULT;
993 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
994 goto done;
995
996 err = -EINVAL;
997 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
998 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
999 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1000 goto done;
1001 if (p.iph.ttl)
1002 p.iph.frag_off |= htons(IP_DF);
1003
1004 if (!(p.i_flags&GRE_KEY))
1005 p.i_key = 0;
1006 if (!(p.o_flags&GRE_KEY))
1007 p.o_key = 0;
1008
1009 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1010
1011 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1012 if (t != NULL) {
1013 if (t->dev != dev) {
1014 err = -EEXIST;
1015 break;
1016 }
1017 } else {
1018 unsigned nflags = 0;
1019
1020 t = netdev_priv(dev);
1021
1022 if (ipv4_is_multicast(p.iph.daddr))
1023 nflags = IFF_BROADCAST;
1024 else if (p.iph.daddr)
1025 nflags = IFF_POINTOPOINT;
1026
1027 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1028 err = -EINVAL;
1029 break;
1030 }
1031 ipgre_tunnel_unlink(ign, t);
1032 t->parms.iph.saddr = p.iph.saddr;
1033 t->parms.iph.daddr = p.iph.daddr;
1034 t->parms.i_key = p.i_key;
1035 t->parms.o_key = p.o_key;
1036 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1037 memcpy(dev->broadcast, &p.iph.daddr, 4);
1038 ipgre_tunnel_link(ign, t);
1039 netdev_state_change(dev);
1040 }
1041 }
1042
1043 if (t) {
1044 err = 0;
1045 if (cmd == SIOCCHGTUNNEL) {
1046 t->parms.iph.ttl = p.iph.ttl;
1047 t->parms.iph.tos = p.iph.tos;
1048 t->parms.iph.frag_off = p.iph.frag_off;
1049 if (t->parms.link != p.link) {
1050 t->parms.link = p.link;
1051 dev->mtu = ipgre_tunnel_bind_dev(dev);
1052 netdev_state_change(dev);
1053 }
1054 }
1055 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1056 err = -EFAULT;
1057 } else
1058 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1059 break;
1060
1061 case SIOCDELTUNNEL:
1062 err = -EPERM;
1063 if (!capable(CAP_NET_ADMIN))
1064 goto done;
1065
1066 if (dev == ign->fb_tunnel_dev) {
1067 err = -EFAULT;
1068 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1069 goto done;
1070 err = -ENOENT;
1071 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1072 goto done;
1073 err = -EPERM;
1074 if (t == netdev_priv(ign->fb_tunnel_dev))
1075 goto done;
1076 dev = t->dev;
1077 }
1078 unregister_netdevice(dev);
1079 err = 0;
1080 break;
1081
1082 default:
1083 err = -EINVAL;
1084 }
1085
1086done:
1087 return err;
1088}
1089
1090static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1091{
1092 struct ip_tunnel *tunnel = netdev_priv(dev);
1093 if (new_mtu < 68 ||
1094 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1095 return -EINVAL;
1096 dev->mtu = new_mtu;
1097 return 0;
1098}
1099
1100/* Nice toy. Unfortunately, useless in real life :-)
1101 It allows to construct virtual multiprotocol broadcast "LAN"
1102 over the Internet, provided multicast routing is tuned.
1103
1104
1105 I have no idea was this bicycle invented before me,
1106 so that I had to set ARPHRD_IPGRE to a random value.
1107 I have an impression, that Cisco could make something similar,
1108 but this feature is apparently missing in IOS<=11.2(8).
1109
1110 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1111 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1112
1113 ping -t 255 224.66.66.66
1114
1115 If nobody answers, mbone does not work.
1116
1117 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1118 ip addr add 10.66.66.<somewhat>/24 dev Universe
1119 ifconfig Universe up
1120 ifconfig Universe add fe80::<Your_real_addr>/10
1121 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1122 ftp 10.66.66.66
1123 ...
1124 ftp fec0:6666:6666::193.233.7.65
1125 ...
1126
1127 */
1128
1129static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1130 unsigned short type,
1131 const void *daddr, const void *saddr, unsigned len)
1132{
1133 struct ip_tunnel *t = netdev_priv(dev);
1134 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1135 __be16 *p = (__be16*)(iph+1);
1136
1137 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1138 p[0] = t->parms.o_flags;
1139 p[1] = htons(type);
1140
1141 /*
1142 * Set the source hardware address.
1143 */
1144
1145 if (saddr)
1146 memcpy(&iph->saddr, saddr, 4);
1147
1148 if (daddr) {
1149 memcpy(&iph->daddr, daddr, 4);
1150 return t->hlen;
1151 }
1152 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1153 return t->hlen;
1154
1155 return -t->hlen;
1156}
1157
1158static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1159{
1160 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1161 memcpy(haddr, &iph->saddr, 4);
1162 return 4;
1163}
1164
1165static const struct header_ops ipgre_header_ops = {
1166 .create = ipgre_header,
1167 .parse = ipgre_header_parse,
1168};
1169
1170#ifdef CONFIG_NET_IPGRE_BROADCAST
1171static int ipgre_open(struct net_device *dev)
1172{
1173 struct ip_tunnel *t = netdev_priv(dev);
1174
1175 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1176 struct flowi fl = { .oif = t->parms.link,
1177 .nl_u = { .ip4_u =
1178 { .daddr = t->parms.iph.daddr,
1179 .saddr = t->parms.iph.saddr,
1180 .tos = RT_TOS(t->parms.iph.tos) } },
1181 .proto = IPPROTO_GRE };
1182 struct rtable *rt;
1183 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1184 return -EADDRNOTAVAIL;
1185 dev = rt->u.dst.dev;
1186 ip_rt_put(rt);
1187 if (__in_dev_get_rtnl(dev) == NULL)
1188 return -EADDRNOTAVAIL;
1189 t->mlink = dev->ifindex;
1190 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1191 }
1192 return 0;
1193}
1194
1195static int ipgre_close(struct net_device *dev)
1196{
1197 struct ip_tunnel *t = netdev_priv(dev);
1198
1199 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1200 struct in_device *in_dev;
1201 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1202 if (in_dev) {
1203 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1204 in_dev_put(in_dev);
1205 }
1206 }
1207 return 0;
1208}
1209
1210#endif
1211
1212static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_init = ipgre_tunnel_init,
1214 .ndo_uninit = ipgre_tunnel_uninit,
1215#ifdef CONFIG_NET_IPGRE_BROADCAST
1216 .ndo_open = ipgre_open,
1217 .ndo_stop = ipgre_close,
1218#endif
1219 .ndo_start_xmit = ipgre_tunnel_xmit,
1220 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1221 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1222};
1223
1224static void ipgre_tunnel_setup(struct net_device *dev)
1225{
1226 dev->netdev_ops = &ipgre_netdev_ops;
1227 dev->destructor = free_netdev;
1228
1229 dev->type = ARPHRD_IPGRE;
1230 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1231 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1232 dev->flags = IFF_NOARP;
1233 dev->iflink = 0;
1234 dev->addr_len = 4;
1235 dev->features |= NETIF_F_NETNS_LOCAL;
1236 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1237}
1238
1239static int ipgre_tunnel_init(struct net_device *dev)
1240{
1241 struct ip_tunnel *tunnel;
1242 struct iphdr *iph;
1243
1244 tunnel = netdev_priv(dev);
1245 iph = &tunnel->parms.iph;
1246
1247 tunnel->dev = dev;
1248 strcpy(tunnel->parms.name, dev->name);
1249
1250 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1251 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1252
1253 if (iph->daddr) {
1254#ifdef CONFIG_NET_IPGRE_BROADCAST
1255 if (ipv4_is_multicast(iph->daddr)) {
1256 if (!iph->saddr)
1257 return -EINVAL;
1258 dev->flags = IFF_BROADCAST;
1259 dev->header_ops = &ipgre_header_ops;
1260 }
1261#endif
1262 } else
1263 dev->header_ops = &ipgre_header_ops;
1264
1265 return 0;
1266}
1267
1268static void ipgre_fb_tunnel_init(struct net_device *dev)
1269{
1270 struct ip_tunnel *tunnel = netdev_priv(dev);
1271 struct iphdr *iph = &tunnel->parms.iph;
1272 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1273
1274 tunnel->dev = dev;
1275 strcpy(tunnel->parms.name, dev->name);
1276
1277 iph->version = 4;
1278 iph->protocol = IPPROTO_GRE;
1279 iph->ihl = 5;
1280 tunnel->hlen = sizeof(struct iphdr) + 4;
1281
1282 dev_hold(dev);
1283 ign->tunnels_wc[0] = tunnel;
1284}
1285
1286
1287static const struct net_protocol ipgre_protocol = {
1288 .handler = ipgre_rcv,
1289 .err_handler = ipgre_err,
1290 .netns_ok = 1,
1291};
1292
1293static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1294{
1295 int prio;
1296
1297 for (prio = 0; prio < 4; prio++) {
1298 int h;
1299 for (h = 0; h < HASH_SIZE; h++) {
1300 struct ip_tunnel *t;
1301 while ((t = ign->tunnels[prio][h]) != NULL)
1302 unregister_netdevice(t->dev);
1303 }
1304 }
1305}
1306
1307static int ipgre_init_net(struct net *net)
1308{
1309 int err;
1310 struct ipgre_net *ign;
1311
1312 err = -ENOMEM;
1313 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1314 if (ign == NULL)
1315 goto err_alloc;
1316
1317 err = net_assign_generic(net, ipgre_net_id, ign);
1318 if (err < 0)
1319 goto err_assign;
1320
1321 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1322 ipgre_tunnel_setup);
1323 if (!ign->fb_tunnel_dev) {
1324 err = -ENOMEM;
1325 goto err_alloc_dev;
1326 }
1327 dev_net_set(ign->fb_tunnel_dev, net);
1328
1329 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1330 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1331
1332 if ((err = register_netdev(ign->fb_tunnel_dev)))
1333 goto err_reg_dev;
1334
1335 return 0;
1336
1337err_reg_dev:
1338 free_netdev(ign->fb_tunnel_dev);
1339err_alloc_dev:
1340 /* nothing */
1341err_assign:
1342 kfree(ign);
1343err_alloc:
1344 return err;
1345}
1346
1347static void ipgre_exit_net(struct net *net)
1348{
1349 struct ipgre_net *ign;
1350
1351 ign = net_generic(net, ipgre_net_id);
1352 rtnl_lock();
1353 ipgre_destroy_tunnels(ign);
1354 rtnl_unlock();
1355 kfree(ign);
1356}
1357
1358static struct pernet_operations ipgre_net_ops = {
1359 .init = ipgre_init_net,
1360 .exit = ipgre_exit_net,
1361};
1362
1363static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1364{
1365 __be16 flags;
1366
1367 if (!data)
1368 return 0;
1369
1370 flags = 0;
1371 if (data[IFLA_GRE_IFLAGS])
1372 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1373 if (data[IFLA_GRE_OFLAGS])
1374 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1375 if (flags & (GRE_VERSION|GRE_ROUTING))
1376 return -EINVAL;
1377
1378 return 0;
1379}
1380
1381static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1382{
1383 __be32 daddr;
1384
1385 if (tb[IFLA_ADDRESS]) {
1386 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1387 return -EINVAL;
1388 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1389 return -EADDRNOTAVAIL;
1390 }
1391
1392 if (!data)
1393 goto out;
1394
1395 if (data[IFLA_GRE_REMOTE]) {
1396 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1397 if (!daddr)
1398 return -EINVAL;
1399 }
1400
1401out:
1402 return ipgre_tunnel_validate(tb, data);
1403}
1404
1405static void ipgre_netlink_parms(struct nlattr *data[],
1406 struct ip_tunnel_parm *parms)
1407{
1408 memset(parms, 0, sizeof(*parms));
1409
1410 parms->iph.protocol = IPPROTO_GRE;
1411
1412 if (!data)
1413 return;
1414
1415 if (data[IFLA_GRE_LINK])
1416 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1417
1418 if (data[IFLA_GRE_IFLAGS])
1419 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1420
1421 if (data[IFLA_GRE_OFLAGS])
1422 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1423
1424 if (data[IFLA_GRE_IKEY])
1425 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1426
1427 if (data[IFLA_GRE_OKEY])
1428 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1429
1430 if (data[IFLA_GRE_LOCAL])
1431 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1432
1433 if (data[IFLA_GRE_REMOTE])
1434 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1435
1436 if (data[IFLA_GRE_TTL])
1437 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1438
1439 if (data[IFLA_GRE_TOS])
1440 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1441
1442 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1443 parms->iph.frag_off = htons(IP_DF);
1444}
1445
1446static int ipgre_tap_init(struct net_device *dev)
1447{
1448 struct ip_tunnel *tunnel;
1449
1450 tunnel = netdev_priv(dev);
1451
1452 tunnel->dev = dev;
1453 strcpy(tunnel->parms.name, dev->name);
1454
1455 ipgre_tunnel_bind_dev(dev);
1456
1457 return 0;
1458}
1459
1460static const struct net_device_ops ipgre_tap_netdev_ops = {
1461 .ndo_init = ipgre_tap_init,
1462 .ndo_uninit = ipgre_tunnel_uninit,
1463 .ndo_start_xmit = ipgre_tunnel_xmit,
1464 .ndo_set_mac_address = eth_mac_addr,
1465 .ndo_validate_addr = eth_validate_addr,
1466 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1467};
1468
1469static void ipgre_tap_setup(struct net_device *dev)
1470{
1471
1472 ether_setup(dev);
1473
1474 dev->netdev_ops = &ipgre_netdev_ops;
1475 dev->destructor = free_netdev;
1476
1477 dev->iflink = 0;
1478 dev->features |= NETIF_F_NETNS_LOCAL;
1479}
1480
1481static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1482 struct nlattr *data[])
1483{
1484 struct ip_tunnel *nt;
1485 struct net *net = dev_net(dev);
1486 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1487 int mtu;
1488 int err;
1489
1490 nt = netdev_priv(dev);
1491 ipgre_netlink_parms(data, &nt->parms);
1492
1493 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1494 return -EEXIST;
1495
1496 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1497 random_ether_addr(dev->dev_addr);
1498
1499 mtu = ipgre_tunnel_bind_dev(dev);
1500 if (!tb[IFLA_MTU])
1501 dev->mtu = mtu;
1502
1503 err = register_netdevice(dev);
1504 if (err)
1505 goto out;
1506
1507 dev_hold(dev);
1508 ipgre_tunnel_link(ign, nt);
1509
1510out:
1511 return err;
1512}
1513
1514static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1515 struct nlattr *data[])
1516{
1517 struct ip_tunnel *t, *nt;
1518 struct net *net = dev_net(dev);
1519 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1520 struct ip_tunnel_parm p;
1521 int mtu;
1522
1523 if (dev == ign->fb_tunnel_dev)
1524 return -EINVAL;
1525
1526 nt = netdev_priv(dev);
1527 ipgre_netlink_parms(data, &p);
1528
1529 t = ipgre_tunnel_locate(net, &p, 0);
1530
1531 if (t) {
1532 if (t->dev != dev)
1533 return -EEXIST;
1534 } else {
1535 unsigned nflags = 0;
1536
1537 t = nt;
1538
1539 if (ipv4_is_multicast(p.iph.daddr))
1540 nflags = IFF_BROADCAST;
1541 else if (p.iph.daddr)
1542 nflags = IFF_POINTOPOINT;
1543
1544 if ((dev->flags ^ nflags) &
1545 (IFF_POINTOPOINT | IFF_BROADCAST))
1546 return -EINVAL;
1547
1548 ipgre_tunnel_unlink(ign, t);
1549 t->parms.iph.saddr = p.iph.saddr;
1550 t->parms.iph.daddr = p.iph.daddr;
1551 t->parms.i_key = p.i_key;
1552 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1553 memcpy(dev->broadcast, &p.iph.daddr, 4);
1554 ipgre_tunnel_link(ign, t);
1555 netdev_state_change(dev);
1556 }
1557
1558 t->parms.o_key = p.o_key;
1559 t->parms.iph.ttl = p.iph.ttl;
1560 t->parms.iph.tos = p.iph.tos;
1561 t->parms.iph.frag_off = p.iph.frag_off;
1562
1563 if (t->parms.link != p.link) {
1564 t->parms.link = p.link;
1565 mtu = ipgre_tunnel_bind_dev(dev);
1566 if (!tb[IFLA_MTU])
1567 dev->mtu = mtu;
1568 netdev_state_change(dev);
1569 }
1570
1571 return 0;
1572}
1573
1574static size_t ipgre_get_size(const struct net_device *dev)
1575{
1576 return
1577 /* IFLA_GRE_LINK */
1578 nla_total_size(4) +
1579 /* IFLA_GRE_IFLAGS */
1580 nla_total_size(2) +
1581 /* IFLA_GRE_OFLAGS */
1582 nla_total_size(2) +
1583 /* IFLA_GRE_IKEY */
1584 nla_total_size(4) +
1585 /* IFLA_GRE_OKEY */
1586 nla_total_size(4) +
1587 /* IFLA_GRE_LOCAL */
1588 nla_total_size(4) +
1589 /* IFLA_GRE_REMOTE */
1590 nla_total_size(4) +
1591 /* IFLA_GRE_TTL */
1592 nla_total_size(1) +
1593 /* IFLA_GRE_TOS */
1594 nla_total_size(1) +
1595 /* IFLA_GRE_PMTUDISC */
1596 nla_total_size(1) +
1597 0;
1598}
1599
1600static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1601{
1602 struct ip_tunnel *t = netdev_priv(dev);
1603 struct ip_tunnel_parm *p = &t->parms;
1604
1605 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1606 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1607 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1608 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1609 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1610 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1611 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1612 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1613 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1614 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1615
1616 return 0;
1617
1618nla_put_failure:
1619 return -EMSGSIZE;
1620}
1621
1622static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1623 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1624 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1625 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1626 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1627 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1628 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1629 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1630 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1631 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1632 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1633};
1634
1635static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1636 .kind = "gre",
1637 .maxtype = IFLA_GRE_MAX,
1638 .policy = ipgre_policy,
1639 .priv_size = sizeof(struct ip_tunnel),
1640 .setup = ipgre_tunnel_setup,
1641 .validate = ipgre_tunnel_validate,
1642 .newlink = ipgre_newlink,
1643 .changelink = ipgre_changelink,
1644 .get_size = ipgre_get_size,
1645 .fill_info = ipgre_fill_info,
1646};
1647
1648static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1649 .kind = "gretap",
1650 .maxtype = IFLA_GRE_MAX,
1651 .policy = ipgre_policy,
1652 .priv_size = sizeof(struct ip_tunnel),
1653 .setup = ipgre_tap_setup,
1654 .validate = ipgre_tap_validate,
1655 .newlink = ipgre_newlink,
1656 .changelink = ipgre_changelink,
1657 .get_size = ipgre_get_size,
1658 .fill_info = ipgre_fill_info,
1659};
1660
1661/*
1662 * And now the modules code and kernel interface.
1663 */
1664
1665static int __init ipgre_init(void)
1666{
1667 int err;
1668
1669 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1670
1671 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1672 printk(KERN_INFO "ipgre init: can't add protocol\n");
1673 return -EAGAIN;
1674 }
1675
1676 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1677 if (err < 0)
1678 goto gen_device_failed;
1679
1680 err = rtnl_link_register(&ipgre_link_ops);
1681 if (err < 0)
1682 goto rtnl_link_failed;
1683
1684 err = rtnl_link_register(&ipgre_tap_ops);
1685 if (err < 0)
1686 goto tap_ops_failed;
1687
1688out:
1689 return err;
1690
1691tap_ops_failed:
1692 rtnl_link_unregister(&ipgre_link_ops);
1693rtnl_link_failed:
1694 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1695gen_device_failed:
1696 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1697 goto out;
1698}
1699
1700static void __exit ipgre_fini(void)
1701{
1702 rtnl_link_unregister(&ipgre_tap_ops);
1703 rtnl_link_unregister(&ipgre_link_ops);
1704 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1705 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1706 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1707}
1708
1709module_init(ipgre_init);
1710module_exit(ipgre_fini);
1711MODULE_LICENSE("GPL");
1712MODULE_ALIAS_RTNL_LINK("gre");
1713MODULE_ALIAS_RTNL_LINK("gretap");