]> bbs.cooldavid.org Git - net-next-2.6.git/commitdiff
Merge branch 'for-patrick' of git://git.kernel.org/pub/scm/linux/kernel/git/horms...
authorPatrick McHardy <kaber@trash.net>
Thu, 21 Oct 2010 14:25:51 +0000 (16:25 +0200)
committerPatrick McHardy <kaber@trash.net>
Thu, 21 Oct 2010 14:25:51 +0000 (16:25 +0200)
23 files changed:
include/linux/in6.h
include/linux/ipv6.h
include/linux/netfilter/xt_TPROXY.h
include/net/inet_hashtables.h
include/net/netfilter/ipv6/nf_defrag_ipv6.h [new file with mode: 0644]
include/net/netfilter/nf_tproxy_core.h
include/net/udp.h
net/dccp/ipv4.c
net/dccp/ipv6.c
net/ipv4/inet_hashtables.c
net/ipv4/tcp_ipv4.c
net/ipv6/af_inet6.c
net/ipv6/datagram.c
net/ipv6/ipv6_sockglue.c
net/ipv6/netfilter/Makefile
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c [new file with mode: 0644]
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/netfilter/nf_tproxy_core.c
net/netfilter/xt_TPROXY.c
net/netfilter/xt_socket.c

index c4bf46f764bf34bffab67e6de3ec9c8ddee70b0e..097a34b55560a9d4434f61f78b85f5fbbef8e2e7 100644 (file)
@@ -268,6 +268,10 @@ struct in6_flowlabel_req {
 /* RFC5082: Generalized Ttl Security Mechanism */
 #define IPV6_MINHOPCOUNT               73
 
+#define IPV6_ORIGDSTADDR        74
+#define IPV6_RECVORIGDSTADDR    IPV6_ORIGDSTADDR
+#define IPV6_TRANSPARENT        75
+
 /*
  * Multicast Routing:
  * see include/linux/mroute6.h.
index e62683ba88e6824e72b3e8c81c7315998868f606..8e429d0e0405df6f50ebbfe9273d719f18facc57 100644 (file)
@@ -341,7 +341,9 @@ struct ipv6_pinfo {
                                odstopts:1,
                                 rxflow:1,
                                rxtclass:1,
-                               rxpmtu:1;
+                               rxpmtu:1,
+                               rxorigdstaddr:1;
+                               /* 2 bits hole */
                } bits;
                __u16           all;
        } rxopt;
index 152e8f97132b1a213271075c16c3ac0b2ce9895d..3f3d69361289ca4cecda2182f71d62e0b7137053 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef _XT_TPROXY_H_target
-#define _XT_TPROXY_H_target
+#ifndef _XT_TPROXY_H
+#define _XT_TPROXY_H
 
 /* TPROXY target is capable of marking the packet to perform
  * redirection. We can get rid of that whenever we get support for
@@ -11,4 +11,11 @@ struct xt_tproxy_target_info {
        __be16 lport;
 };
 
-#endif /* _XT_TPROXY_H_target */
+struct xt_tproxy_target_info_v1 {
+       u_int32_t mark_mask;
+       u_int32_t mark_value;
+       union nf_inet_addr laddr;
+       __be16 lport;
+};
+
+#endif /* _XT_TPROXY_H */
index 74358d1b3f439ba6f995ceb0c928730f8c180252..e9c2ed8af864b4fc197e8310551eb6f7ba71dd25 100644 (file)
@@ -245,7 +245,7 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk)
 }
 
 /* Caller must disable local BH processing. */
-extern void __inet_inherit_port(struct sock *sk, struct sock *child);
+extern int __inet_inherit_port(struct sock *sk, struct sock *child);
 
 extern void inet_put_port(struct sock *sk);
 
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
new file mode 100644 (file)
index 0000000..94dd54d
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _NF_DEFRAG_IPV6_H
+#define _NF_DEFRAG_IPV6_H
+
+extern void nf_defrag_ipv6_enable(void);
+
+#endif /* _NF_DEFRAG_IPV6_H */
index 208b46f4d6d2b2bfbf9b2439b783dbfa6a7a7a46..cd85b3bc8327219f1e036698c253034c27765d66 100644 (file)
 #include <linux/in.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
 #include <net/tcp.h>
 
+#define NFT_LOOKUP_ANY         0
+#define NFT_LOOKUP_LISTENER    1
+#define NFT_LOOKUP_ESTABLISHED 2
+
 /* look up and get a reference to a matching socket */
-extern struct sock *
+
+
+/* This function is used by the 'TPROXY' target and the 'socket'
+ * match. The following lookups are supported:
+ *
+ * Explicit TProxy target rule
+ * ===========================
+ *
+ * This is used when the user wants to intercept a connection matching
+ * an explicit iptables rule. In this case the sockets are assumed
+ * matching in preference order:
+ *
+ *   - match: if there's a fully established connection matching the
+ *     _packet_ tuple, it is returned, assuming the redirection
+ *     already took place and we process a packet belonging to an
+ *     established connection
+ *
+ *   - match: if there's a listening socket matching the redirection
+ *     (e.g. on-port & on-ip of the connection), it is returned,
+ *     regardless if it was bound to 0.0.0.0 or an explicit
+ *     address. The reasoning is that if there's an explicit rule, it
+ *     does not really matter if the listener is bound to an interface
+ *     or to 0. The user already stated that he wants redirection
+ *     (since he added the rule).
+ *
+ * "socket" match based redirection (no specific rule)
+ * ===================================================
+ *
+ * There are connections with dynamic endpoints (e.g. FTP data
+ * connection) that the user is unable to add explicit rules
+ * for. These are taken care of by a generic "socket" rule. It is
+ * assumed that the proxy application is trusted to open such
+ * connections without explicit iptables rule (except of course the
+ * generic 'socket' rule). In this case the following sockets are
+ * matched in preference order:
+ *
+ *   - match: if there's a fully established connection matching the
+ *     _packet_ tuple
+ *
+ *   - match: if there's a non-zero bound listener (possibly with a
+ *     non-local address) We don't accept zero-bound listeners, since
+ *     then local services could intercept traffic going through the
+ *     box.
+ *
+ * Please note that there's an overlap between what a TPROXY target
+ * and a socket match will match. Normally if you have both rules the
+ * "socket" match will be the first one, effectively all packets
+ * belonging to established connections going through that one.
+ */
+static inline struct sock *
 nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
                      const __be32 saddr, const __be32 daddr,
                      const __be16 sport, const __be16 dport,
-                     const struct net_device *in, bool listening);
+                     const struct net_device *in, int lookup_type)
+{
+       struct sock *sk;
+
+       /* look up socket */
+       switch (protocol) {
+       case IPPROTO_TCP:
+               switch (lookup_type) {
+               case NFT_LOOKUP_ANY:
+                       sk = __inet_lookup(net, &tcp_hashinfo,
+                                          saddr, sport, daddr, dport,
+                                          in->ifindex);
+                       break;
+               case NFT_LOOKUP_LISTENER:
+                       sk = inet_lookup_listener(net, &tcp_hashinfo,
+                                                   daddr, dport,
+                                                   in->ifindex);
+
+                       /* NOTE: we return listeners even if bound to
+                        * 0.0.0.0, those are filtered out in
+                        * xt_socket, since xt_TPROXY needs 0 bound
+                        * listeners too */
+
+                       break;
+               case NFT_LOOKUP_ESTABLISHED:
+                       sk = inet_lookup_established(net, &tcp_hashinfo,
+                                                   saddr, sport, daddr, dport,
+                                                   in->ifindex);
+                       break;
+               default:
+                       WARN_ON(1);
+                       sk = NULL;
+                       break;
+               }
+               break;
+       case IPPROTO_UDP:
+               sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+                                    in->ifindex);
+               if (sk && lookup_type != NFT_LOOKUP_ANY) {
+                       int connected = (sk->sk_state == TCP_ESTABLISHED);
+                       int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
+
+                       /* NOTE: we return listeners even if bound to
+                        * 0.0.0.0, those are filtered out in
+                        * xt_socket, since xt_TPROXY needs 0 bound
+                        * listeners too */
+                       if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+                           (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+                               sock_put(sk);
+                               sk = NULL;
+                       }
+               }
+               break;
+       default:
+               WARN_ON(1);
+               sk = NULL;
+       }
+
+       pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
+                protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
+
+       return sk;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static inline struct sock *
+nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+                     const struct in6_addr *saddr, const struct in6_addr *daddr,
+                     const __be16 sport, const __be16 dport,
+                     const struct net_device *in, int lookup_type)
+{
+       struct sock *sk;
+
+       /* look up socket */
+       switch (protocol) {
+       case IPPROTO_TCP:
+               switch (lookup_type) {
+               case NFT_LOOKUP_ANY:
+                       sk = inet6_lookup(net, &tcp_hashinfo,
+                                         saddr, sport, daddr, dport,
+                                         in->ifindex);
+                       break;
+               case NFT_LOOKUP_LISTENER:
+                       sk = inet6_lookup_listener(net, &tcp_hashinfo,
+                                                  daddr, ntohs(dport),
+                                                  in->ifindex);
+
+                       /* NOTE: we return listeners even if bound to
+                        * 0.0.0.0, those are filtered out in
+                        * xt_socket, since xt_TPROXY needs 0 bound
+                        * listeners too */
+
+                       break;
+               case NFT_LOOKUP_ESTABLISHED:
+                       sk = __inet6_lookup_established(net, &tcp_hashinfo,
+                                                       saddr, sport, daddr, ntohs(dport),
+                                                       in->ifindex);
+                       break;
+               default:
+                       WARN_ON(1);
+                       sk = NULL;
+                       break;
+               }
+               break;
+       case IPPROTO_UDP:
+               sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
+                                    in->ifindex);
+               if (sk && lookup_type != NFT_LOOKUP_ANY) {
+                       int connected = (sk->sk_state == TCP_ESTABLISHED);
+                       int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
+
+                       /* NOTE: we return listeners even if bound to
+                        * 0.0.0.0, those are filtered out in
+                        * xt_socket, since xt_TPROXY needs 0 bound
+                        * listeners too */
+                       if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+                           (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+                               sock_put(sk);
+                               sk = NULL;
+                       }
+               }
+               break;
+       default:
+               WARN_ON(1);
+               sk = NULL;
+       }
+
+       pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
+                protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
+
+       return sk;
+}
+#endif
 
 static inline void
 nf_tproxy_put_sock(struct sock *sk)
index a184d3496b1369deefd62aba376f04320f76a773..200b82848c9a3606b0076c3f8621fb63342af6a9 100644 (file)
@@ -183,6 +183,9 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
                                    __be32 daddr, __be16 dport,
                                    int dif);
+extern struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+                                   const struct in6_addr *daddr, __be16 dport,
+                                   int dif);
 
 /*
  *     SNMP statistics for UDP and UDP-Lite
index d4a166f0f391d6bfccd85471b2616057d808c848..3f69ea1148291ce2e5ad4956f4c27931d8db7e8f 100644 (file)
@@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
        newsk = dccp_create_openreq_child(sk, req, skb);
        if (newsk == NULL)
-               goto exit;
+               goto exit_nonewsk;
 
        sk_setup_caps(newsk, dst);
 
@@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
        dccp_sync_mss(newsk, dst_mtu(dst));
 
+       if (__inet_inherit_port(sk, newsk) < 0) {
+               sock_put(newsk);
+               goto exit;
+       }
        __inet_hash_nolisten(newsk, NULL);
-       __inet_inherit_port(sk, newsk);
 
        return newsk;
 
 exit_overflow:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+exit_nonewsk:
+       dst_release(dst);
 exit:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
-       dst_release(dst);
        return NULL;
 }
 
index 6e3f32575df78bbf75fc8e3cd46ea0cb52f4f771..dca711df9b60cea9e8ecf259cfdfc3909c60d3bb 100644 (file)
@@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
        newsk = dccp_create_openreq_child(sk, req, skb);
        if (newsk == NULL)
-               goto out;
+               goto out_nonewsk;
 
        /*
         * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
        newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
        newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
+       if (__inet_inherit_port(sk, newsk) < 0) {
+               sock_put(newsk);
+               goto out;
+       }
        __inet6_hash(newsk, NULL);
-       __inet_inherit_port(sk, newsk);
 
        return newsk;
 
 out_overflow:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+out_nonewsk:
+       dst_release(dst);
 out:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
        if (opt != NULL && opt != np->opt)
                sock_kfree_s(sk, opt, opt->tot_len);
-       dst_release(dst);
        return NULL;
 }
 
index fb7ad5a21ff3e2e86b2d9018ea29ccd2e3921848..1b344f30b463fab9ed70a8f19a19d348d7c626f7 100644 (file)
@@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk)
 }
 EXPORT_SYMBOL(inet_put_port);
 
-void __inet_inherit_port(struct sock *sk, struct sock *child)
+int __inet_inherit_port(struct sock *sk, struct sock *child)
 {
        struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
-       const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num,
+       unsigned short port = inet_sk(child)->inet_num;
+       const int bhash = inet_bhashfn(sock_net(sk), port,
                        table->bhash_size);
        struct inet_bind_hashbucket *head = &table->bhash[bhash];
        struct inet_bind_bucket *tb;
 
        spin_lock(&head->lock);
        tb = inet_csk(sk)->icsk_bind_hash;
+       if (tb->port != port) {
+               /* NOTE: using tproxy and redirecting skbs to a proxy
+                * on a different listener port breaks the assumption
+                * that the listener socket's icsk_bind_hash is the same
+                * as that of the child socket. We have to look up or
+                * create a new bind bucket for the child here. */
+               struct hlist_node *node;
+               inet_bind_bucket_for_each(tb, node, &head->chain) {
+                       if (net_eq(ib_net(tb), sock_net(sk)) &&
+                           tb->port == port)
+                               break;
+               }
+               if (!node) {
+                       tb = inet_bind_bucket_create(table->bind_bucket_cachep,
+                                                    sock_net(sk), head, port);
+                       if (!tb) {
+                               spin_unlock(&head->lock);
+                               return -ENOMEM;
+                       }
+               }
+       }
        sk_add_bind_node(child, &tb->owners);
        inet_csk(child)->icsk_bind_hash = tb;
        spin_unlock(&head->lock);
+
+       return 0;
 }
 EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
index a0232f3a358b2b8fc6cd98b96a98f6b5cc69f1f6..8f8527d4168225be9429766df1d1e2c085057868 100644 (file)
@@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
        newsk = tcp_create_openreq_child(sk, req, skb);
        if (!newsk)
-               goto exit;
+               goto exit_nonewsk;
 
        newsk->sk_gso_type = SKB_GSO_TCPV4;
        sk_setup_caps(newsk, dst);
@@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        }
 #endif
 
+       if (__inet_inherit_port(sk, newsk) < 0) {
+               sock_put(newsk);
+               goto exit;
+       }
        __inet_hash_nolisten(newsk, NULL);
-       __inet_inherit_port(sk, newsk);
 
        return newsk;
 
 exit_overflow:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+exit_nonewsk:
+       dst_release(dst);
 exit:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
-       dst_release(dst);
        return NULL;
 }
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
index 56b9bf2516f4d45b5b866e1e3086f54a05a7b530..4869797c1afa1486fb93c64d5e5b4ac7a70d37f4 100644 (file)
@@ -343,7 +343,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                         */
                        v4addr = LOOPBACK4_IPV6;
                        if (!(addr_type & IPV6_ADDR_MULTICAST)) {
-                               if (!ipv6_chk_addr(net, &addr->sin6_addr,
+                               if (!inet->transparent &&
+                                   !ipv6_chk_addr(net, &addr->sin6_addr,
                                                   dev, 0)) {
                                        err = -EADDRNOTAVAIL;
                                        goto out_unlock;
index ef371aa01ac50724f9dff9cbf7d084e062d844c9..320bdb877eed2ff61da25c6c9a1ac2f6cc00baac 100644 (file)
@@ -577,6 +577,25 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
                u8 *ptr = nh + opt->dst1;
                put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
        }
+       if (np->rxopt.bits.rxorigdstaddr) {
+               struct sockaddr_in6 sin6;
+               u16 *ports = (u16 *) skb_transport_header(skb);
+
+               if (skb_transport_offset(skb) + 4 <= skb->len) {
+                       /* All current transport protocols have the port numbers in the
+                        * first four bytes of the transport header and this function is
+                        * written with this assumption in mind.
+                        */
+
+                       sin6.sin6_family = AF_INET6;
+                       ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
+                       sin6.sin6_port = ports[1];
+                       sin6.sin6_flowinfo = 0;
+                       sin6.sin6_scope_id = 0;
+
+                       put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
+               }
+       }
        return 0;
 }
 
index a7f66bc8f0b0ef2a8b0eee454c1e103344eda417..0553867a317f4466b31df3bd6d2695e180537be9 100644 (file)
@@ -342,6 +342,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
                retv = 0;
                break;
 
+       case IPV6_TRANSPARENT:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
+               inet_sk(sk)->transparent = valbool;
+               retv = 0;
+               break;
+
+       case IPV6_RECVORIGDSTADDR:
+               if (optlen < sizeof(int))
+                       goto e_inval;
+               np->rxopt.bits.rxorigdstaddr = valbool;
+               retv = 0;
+               break;
+
        case IPV6_HOPOPTS:
        case IPV6_RTHDRDSTOPTS:
        case IPV6_RTHDR:
@@ -1104,6 +1119,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
                break;
        }
 
+       case IPV6_TRANSPARENT:
+               val = inet_sk(sk)->transparent;
+               break;
+
+       case IPV6_RECVORIGDSTADDR:
+               val = np->rxopt.bits.rxorigdstaddr;
+               break;
+
        case IPV6_UNICAST_HOPS:
        case IPV6_MULTICAST_HOPS:
        {
index aafbba30c899fac569184a5ef9943f13ecfdb3b9..3f8e4a3d83ce107bdf2280c35fac189f8a2a2838 100644 (file)
@@ -11,10 +11,11 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 
 # objects for l3 independent conntrack
-nf_conntrack_ipv6-objs  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
+nf_conntrack_ipv6-objs  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
+nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 
 # l3 independent conntrack
-obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
 
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
index ff43461704be5c9433d7468f1fe99c63aab7d613..c8af58b225620795af240156ae1e5b735fa78a2d 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
-#include <linux/sysctl.h>
 #include <net/ipv6.h>
 #include <net/inet_frag.h>
 
@@ -29,6 +28,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/netfilter/nf_log.h>
 
 static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
@@ -189,53 +189,6 @@ out:
        return nf_conntrack_confirm(skb);
 }
 
-static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
-                                               struct sk_buff *skb)
-{
-       u16 zone = NF_CT_DEFAULT_ZONE;
-
-       if (skb->nfct)
-               zone = nf_ct_zone((struct nf_conn *)skb->nfct);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
-       if (skb->nf_bridge &&
-           skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
-               return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
-#endif
-       if (hooknum == NF_INET_PRE_ROUTING)
-               return IP6_DEFRAG_CONNTRACK_IN + zone;
-       else
-               return IP6_DEFRAG_CONNTRACK_OUT + zone;
-
-}
-
-static unsigned int ipv6_defrag(unsigned int hooknum,
-                               struct sk_buff *skb,
-                               const struct net_device *in,
-                               const struct net_device *out,
-                               int (*okfn)(struct sk_buff *))
-{
-       struct sk_buff *reasm;
-
-       /* Previously seen (loopback)?  */
-       if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
-               return NF_ACCEPT;
-
-       reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
-       /* queued */
-       if (reasm == NULL)
-               return NF_STOLEN;
-
-       /* error occured or not fragmented */
-       if (reasm == skb)
-               return NF_ACCEPT;
-
-       nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
-                          (struct net_device *)out, okfn);
-
-       return NF_STOLEN;
-}
-
 static unsigned int __ipv6_conntrack_in(struct net *net,
                                        unsigned int hooknum,
                                        struct sk_buff *skb,
@@ -287,13 +240,6 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
-       {
-               .hook           = ipv6_defrag,
-               .owner          = THIS_MODULE,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_PRE_ROUTING,
-               .priority       = NF_IP6_PRI_CONNTRACK_DEFRAG,
-       },
        {
                .hook           = ipv6_conntrack_in,
                .owner          = THIS_MODULE,
@@ -308,13 +254,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP6_PRI_CONNTRACK,
        },
-       {
-               .hook           = ipv6_defrag,
-               .owner          = THIS_MODULE,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_LOCAL_OUT,
-               .priority       = NF_IP6_PRI_CONNTRACK_DEFRAG,
-       },
        {
                .hook           = ipv6_confirm,
                .owner          = THIS_MODULE,
@@ -386,10 +325,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
        .nlattr_tuple_size      = ipv6_nlattr_tuple_size,
        .nlattr_to_tuple        = ipv6_nlattr_to_tuple,
        .nla_policy             = ipv6_nla_policy,
-#endif
-#ifdef CONFIG_SYSCTL
-       .ctl_table_path         = nf_net_netfilter_sysctl_path,
-       .ctl_table              = nf_ct_ipv6_sysctl_table,
 #endif
        .me                     = THIS_MODULE,
 };
@@ -403,16 +338,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
        int ret = 0;
 
        need_conntrack();
+       nf_defrag_ipv6_enable();
 
-       ret = nf_ct_frag6_init();
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv6: can't initialize frag6.\n");
-               return ret;
-       }
        ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
        if (ret < 0) {
                pr_err("nf_conntrack_ipv6: can't register tcp.\n");
-               goto cleanup_frag6;
+               return ret;
        }
 
        ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
@@ -450,8 +381,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
  cleanup_tcp:
        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
- cleanup_frag6:
-       nf_ct_frag6_cleanup();
        return ret;
 }
 
@@ -463,7 +392,6 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
-       nf_ct_frag6_cleanup();
 }
 
 module_init(nf_conntrack_l3proto_ipv6_init);
index 138a8b36270694ec96ca4a21f51a55b0c1d62e98..489d71b844ac9ba7c85d7f5612922e4ceeb712cc 100644 (file)
@@ -73,7 +73,7 @@ static struct inet_frags nf_frags;
 static struct netns_frags nf_init_frags;
 
 #ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_ipv6_sysctl_table[] = {
+struct ctl_table nf_ct_frag6_sysctl_table[] = {
        {
                .procname       = "nf_conntrack_frag6_timeout",
                .data           = &nf_init_frags.timeout,
@@ -97,6 +97,8 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = {
        },
        { }
 };
+
+static struct ctl_table_header *nf_ct_frag6_sysctl_header;
 #endif
 
 static unsigned int nf_hashfn(struct inet_frag_queue *q)
@@ -623,11 +625,21 @@ int nf_ct_frag6_init(void)
        inet_frags_init_net(&nf_init_frags);
        inet_frags_init(&nf_frags);
 
+       nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
+                                                         nf_ct_frag6_sysctl_table);
+       if (!nf_ct_frag6_sysctl_header) {
+               inet_frags_fini(&nf_frags);
+               return -ENOMEM;
+       }
+
        return 0;
 }
 
 void nf_ct_frag6_cleanup(void)
 {
+       unregister_sysctl_table(nf_ct_frag6_sysctl_header);
+       nf_ct_frag6_sysctl_header = NULL;
+
        inet_frags_fini(&nf_frags);
 
        nf_init_frags.low_thresh = 0;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
new file mode 100644 (file)
index 0000000..99abfb5
--- /dev/null
@@ -0,0 +1,131 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
+                                               struct sk_buff *skb)
+{
+       u16 zone = NF_CT_DEFAULT_ZONE;
+
+       if (skb->nfct)
+               zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+       if (skb->nf_bridge &&
+           skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+               return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+#endif
+       if (hooknum == NF_INET_PRE_ROUTING)
+               return IP6_DEFRAG_CONNTRACK_IN + zone;
+       else
+               return IP6_DEFRAG_CONNTRACK_OUT + zone;
+
+}
+
+static unsigned int ipv6_defrag(unsigned int hooknum,
+                               struct sk_buff *skb,
+                               const struct net_device *in,
+                               const struct net_device *out,
+                               int (*okfn)(struct sk_buff *))
+{
+       struct sk_buff *reasm;
+
+       /* Previously seen (loopback)?  */
+       if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
+               return NF_ACCEPT;
+
+       reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
+       /* queued */
+       if (reasm == NULL)
+               return NF_STOLEN;
+
+       /* error occured or not fragmented */
+       if (reasm == skb)
+               return NF_ACCEPT;
+
+       nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
+                          (struct net_device *)out, okfn);
+
+       return NF_STOLEN;
+}
+
+static struct nf_hook_ops ipv6_defrag_ops[] = {
+       {
+               .hook           = ipv6_defrag,
+               .owner          = THIS_MODULE,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_PRE_ROUTING,
+               .priority       = NF_IP6_PRI_CONNTRACK_DEFRAG,
+       },
+       {
+               .hook           = ipv6_defrag,
+               .owner          = THIS_MODULE,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_LOCAL_OUT,
+               .priority       = NF_IP6_PRI_CONNTRACK_DEFRAG,
+       },
+};
+
+static int __init nf_defrag_init(void)
+{
+       int ret = 0;
+
+       ret = nf_ct_frag6_init();
+       if (ret < 0) {
+               pr_err("nf_defrag_ipv6: can't initialize frag6.\n");
+               return ret;
+       }
+       ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+       if (ret < 0) {
+               pr_err("nf_defrag_ipv6: can't register hooks\n");
+               goto cleanup_frag6;
+       }
+       return ret;
+
+cleanup_frag6:
+       nf_ct_frag6_cleanup();
+       return ret;
+
+}
+
+static void __exit nf_defrag_fini(void)
+{
+       nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+       nf_ct_frag6_cleanup();
+}
+
+void nf_defrag_ipv6_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
+
+module_init(nf_defrag_init);
+module_exit(nf_defrag_fini);
+
+MODULE_LICENSE("GPL");
index fe6d40418c0b8fdba3b58fd7b88d3fedc2a7f6a2..ba5258ef1c57c76cdd92597880fde716924276d7 100644 (file)
@@ -1409,7 +1409,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
        newsk = tcp_create_openreq_child(sk, req, skb);
        if (newsk == NULL)
-               goto out;
+               goto out_nonewsk;
 
        /*
         * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -1497,18 +1497,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        }
 #endif
 
+       if (__inet_inherit_port(sk, newsk) < 0) {
+               sock_put(newsk);
+               goto out;
+       }
        __inet6_hash(newsk, NULL);
-       __inet_inherit_port(sk, newsk);
 
        return newsk;
 
 out_overflow:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
-out:
-       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+out_nonewsk:
        if (opt && opt != np->opt)
                sock_kfree_s(sk, opt, opt->tot_len);
        dst_release(dst);
+out:
+       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
        return NULL;
 }
 
index 5acb3560ff15267021266f59c7b2102f633f3d6a..c84dad432114ef0d885b29244bf9df0854f750e2 100644 (file)
@@ -122,8 +122,8 @@ static void udp_v6_rehash(struct sock *sk)
 
 static inline int compute_score(struct sock *sk, struct net *net,
                                unsigned short hnum,
-                               struct in6_addr *saddr, __be16 sport,
-                               struct in6_addr *daddr, __be16 dport,
+                               const struct in6_addr *saddr, __be16 sport,
+                               const struct in6_addr *daddr, __be16 dport,
                                int dif)
 {
        int score = -1;
@@ -239,8 +239,8 @@ exact_match:
 }
 
 static struct sock *__udp6_lib_lookup(struct net *net,
-                                     struct in6_addr *saddr, __be16 sport,
-                                     struct in6_addr *daddr, __be16 dport,
+                                     const struct in6_addr *saddr, __be16 sport,
+                                     const struct in6_addr *daddr, __be16 dport,
                                      int dif, struct udp_table *udptable)
 {
        struct sock *sk, *result;
@@ -320,6 +320,14 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
                                 udptable);
 }
 
+struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+                            const struct in6_addr *daddr, __be16 dport, int dif)
+{
+       return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
+}
+EXPORT_SYMBOL_GPL(udp6_lib_lookup);
+
+
 /*
  *     This should be easy, if there is something there we
  *     return it, otherwise we block.
index 5490fc37c92dfa5363a2992fd67fd4f145f65360..db655638d76dbefb5c0017b3811c4ca43b0f9990 100644 (file)
 #include <net/udp.h>
 #include <net/netfilter/nf_tproxy_core.h>
 
-struct sock *
-nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
-                     const __be32 saddr, const __be32 daddr,
-                     const __be16 sport, const __be16 dport,
-                     const struct net_device *in, bool listening_only)
-{
-       struct sock *sk;
-
-       /* look up socket */
-       switch (protocol) {
-       case IPPROTO_TCP:
-               if (listening_only)
-                       sk = __inet_lookup_listener(net, &tcp_hashinfo,
-                                                   daddr, ntohs(dport),
-                                                   in->ifindex);
-               else
-                       sk = __inet_lookup(net, &tcp_hashinfo,
-                                          saddr, sport, daddr, dport,
-                                          in->ifindex);
-               break;
-       case IPPROTO_UDP:
-               sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
-                                    in->ifindex);
-               break;
-       default:
-               WARN_ON(1);
-               sk = NULL;
-       }
-
-       pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n",
-                protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk);
-
-       return sk;
-}
-EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
 
 static void
 nf_tproxy_destructor(struct sk_buff *skb)
index 21bb2aff6b8f49fdfc4430fcdb5ae64f9f7c8f56..19c482caf30b7f1587f41cd7b3c26a3d598bc084 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Transparent proxy support for Linux/iptables
  *
- * Copyright (c) 2006-2007 BalaBit IT Ltd.
+ * Copyright (c) 2006-2010 BalaBit IT Ltd.
  * Author: Balazs Scheidler, Krisztian Kovacs
  *
  * This program is free software; you can redistribute it and/or modify
 #include <net/checksum.h>
 #include <net/udp.h>
 #include <net/inet_sock.h>
-
+#include <linux/inetdevice.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter/xt_TPROXY.h>
 
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <net/if_inet6.h>
+#include <net/addrconf.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#endif
+
 #include <net/netfilter/nf_tproxy_core.h>
+#include <linux/netfilter/xt_TPROXY.h>
+
+static inline __be32
+tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
+{
+       struct in_device *indev;
+       __be32 laddr;
+
+       if (user_laddr)
+               return user_laddr;
+
+       laddr = 0;
+       rcu_read_lock();
+       indev = __in_dev_get_rcu(skb->dev);
+       for_primary_ifa(indev) {
+               laddr = ifa->ifa_local;
+               break;
+       } endfor_ifa(indev);
+       rcu_read_unlock();
+
+       return laddr ? laddr : daddr;
+}
+
+/**
+ * tproxy_handle_time_wait4() - handle IPv4 TCP TIME_WAIT reopen redirections
+ * @skb:       The skb being processed.
+ * @laddr:     IPv4 address to redirect to or zero.
+ * @lport:     TCP port to redirect to or zero.
+ * @sk:                The TIME_WAIT TCP socket found by the lookup.
+ *
+ * We have to handle SYN packets arriving to TIME_WAIT sockets
+ * differently: instead of reopening the connection we should rather
+ * redirect the new connection to the proxy if there's a listener
+ * socket present.
+ *
+ * tproxy_handle_time_wait4() consumes the socket reference passed in.
+ *
+ * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * no such listener is found, or NULL if the TCP header is incomplete.
+ */
+static struct sock *
+tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
+                       struct sock *sk)
+{
+       const struct iphdr *iph = ip_hdr(skb);
+       struct tcphdr _hdr, *hp;
+
+       hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+       if (hp == NULL) {
+               inet_twsk_put(inet_twsk(sk));
+               return NULL;
+       }
+
+       if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
+               /* SYN to a TIME_WAIT socket, we'd rather redirect it
+                * to a listener socket if there's one */
+               struct sock *sk2;
+
+               sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+                                           iph->saddr, laddr ? laddr : iph->daddr,
+                                           hp->source, lport ? lport : hp->dest,
+                                           skb->dev, NFT_LOOKUP_LISTENER);
+               if (sk2) {
+                       inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+                       inet_twsk_put(inet_twsk(sk));
+                       sk = sk2;
+               }
+       }
+
+       return sk;
+}
 
 static unsigned int
-tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
+tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
+          u_int32_t mark_mask, u_int32_t mark_value)
 {
        const struct iphdr *iph = ip_hdr(skb);
-       const struct xt_tproxy_target_info *tgi = par->targinfo;
        struct udphdr _hdr, *hp;
        struct sock *sk;
 
@@ -36,12 +113,195 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
        if (hp == NULL)
                return NF_DROP;
 
+       /* check if there's an ongoing connection on the packet
+        * addresses, this happens if the redirect already happened
+        * and the current packet belongs to an already established
+        * connection */
        sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
-                                  iph->saddr,
-                                  tgi->laddr ? tgi->laddr : iph->daddr,
-                                  hp->source,
-                                  tgi->lport ? tgi->lport : hp->dest,
-                                  par->in, true);
+                                  iph->saddr, iph->daddr,
+                                  hp->source, hp->dest,
+                                  skb->dev, NFT_LOOKUP_ESTABLISHED);
+
+       laddr = tproxy_laddr4(skb, laddr, iph->daddr);
+       if (!lport)
+               lport = hp->dest;
+
+       /* UDP has no TCP_TIME_WAIT state, so we never enter here */
+       if (sk && sk->sk_state == TCP_TIME_WAIT)
+               /* reopening a TIME_WAIT connection needs special handling */
+               sk = tproxy_handle_time_wait4(skb, laddr, lport, sk);
+       else if (!sk)
+               /* no, there's no established connection, check if
+                * there's a listener on the redirected addr/port */
+               sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+                                          iph->saddr, laddr,
+                                          hp->source, lport,
+                                          skb->dev, NFT_LOOKUP_LISTENER);
+
+       /* NOTE: assign_sock consumes our sk reference */
+       if (sk && nf_tproxy_assign_sock(skb, sk)) {
+               /* This should be in a separate target, but we don't do multiple
+                  targets on the same rule yet */
+               skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
+
+               pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
+                        iph->protocol, &iph->daddr, ntohs(hp->dest),
+                        &laddr, ntohs(lport), skb->mark);
+               return NF_ACCEPT;
+       }
+
+       pr_debug("no socket, dropping: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
+                iph->protocol, &iph->saddr, ntohs(hp->source),
+                &iph->daddr, ntohs(hp->dest), skb->mark);
+       return NF_DROP;
+}
+
+static unsigned int
+tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+       const struct xt_tproxy_target_info *tgi = par->targinfo;
+
+       return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
+}
+
+static unsigned int
+tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+       const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+
+       return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
+static inline const struct in6_addr *
+tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
+             const struct in6_addr *daddr)
+{
+       struct inet6_dev *indev;
+       struct inet6_ifaddr *ifa;
+       struct in6_addr *laddr;
+
+       if (!ipv6_addr_any(user_laddr))
+               return user_laddr;
+       laddr = NULL;
+
+       rcu_read_lock();
+       indev = __in6_dev_get(skb->dev);
+       if (indev)
+               list_for_each_entry(ifa, &indev->addr_list, if_list) {
+                       if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
+                               continue;
+
+                       laddr = &ifa->addr;
+                       break;
+               }
+       rcu_read_unlock();
+
+       return laddr ? laddr : daddr;
+}
+
+/**
+ * tproxy_handle_time_wait6() - handle IPv6 TCP TIME_WAIT reopen redirections
+ * @skb:       The skb being processed.
+ * @tproto:    Transport protocol.
+ * @thoff:     Transport protocol header offset.
+ * @par:       Iptables target parameters.
+ * @sk:                The TIME_WAIT TCP socket found by the lookup.
+ *
+ * We have to handle SYN packets arriving to TIME_WAIT sockets
+ * differently: instead of reopening the connection we should rather
+ * redirect the new connection to the proxy if there's a listener
+ * socket present.
+ *
+ * tproxy_handle_time_wait6() consumes the socket reference passed in.
+ *
+ * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * no such listener is found, or NULL if the TCP header is incomplete.
+ */
+static struct sock *
+tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
+                        const struct xt_action_param *par,
+                        struct sock *sk)
+{
+       const struct ipv6hdr *iph = ipv6_hdr(skb);
+       struct tcphdr _hdr, *hp;
+       const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+
+       hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+       if (hp == NULL) {
+               inet_twsk_put(inet_twsk(sk));
+               return NULL;
+       }
+
+       if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
+               /* SYN to a TIME_WAIT socket, we'd rather redirect it
+                * to a listener socket if there's one */
+               struct sock *sk2;
+
+               sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+                                           &iph->saddr,
+                                           tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
+                                           hp->source,
+                                           tgi->lport ? tgi->lport : hp->dest,
+                                           skb->dev, NFT_LOOKUP_LISTENER);
+               if (sk2) {
+                       inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+                       inet_twsk_put(inet_twsk(sk));
+                       sk = sk2;
+               }
+       }
+
+       return sk;
+}
+
+static unsigned int
+tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+       const struct ipv6hdr *iph = ipv6_hdr(skb);
+       const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+       struct udphdr _hdr, *hp;
+       struct sock *sk;
+       const struct in6_addr *laddr;
+       __be16 lport;
+       int thoff;
+       int tproto;
+
+       tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
+       if (tproto < 0) {
+               pr_debug("unable to find transport header in IPv6 packet, dropping\n");
+               return NF_DROP;
+       }
+
+       hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+       if (hp == NULL) {
+               pr_debug("unable to grab transport header contents in IPv6 packet, dropping\n");
+               return NF_DROP;
+       }
+
+       /* check if there's an ongoing connection on the packet
+        * addresses, this happens if the redirect already happened
+        * and the current packet belongs to an already established
+        * connection */
+       sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+                                  &iph->saddr, &iph->daddr,
+                                  hp->source, hp->dest,
+                                  par->in, NFT_LOOKUP_ESTABLISHED);
+
+       laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
+       lport = tgi->lport ? tgi->lport : hp->dest;
+
+       /* UDP has no TCP_TIME_WAIT state, so we never enter here */
+       if (sk && sk->sk_state == TCP_TIME_WAIT)
+               /* reopening a TIME_WAIT connection needs special handling */
+               sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk);
+       else if (!sk)
+               /* no there's no established connection, check if
+                * there's a listener on the redirected addr/port */
+               sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+                                          &iph->saddr, laddr,
+                                          hp->source, lport,
+                                          par->in, NFT_LOOKUP_LISTENER);
 
        /* NOTE: assign_sock consumes our sk reference */
        if (sk && nf_tproxy_assign_sock(skb, sk)) {
@@ -49,19 +309,34 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
                   targets on the same rule yet */
                skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
 
-               pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n",
-                        iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
-                        ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+               pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
+                        tproto, &iph->saddr, ntohs(hp->source),
+                        laddr, ntohs(lport), skb->mark);
                return NF_ACCEPT;
        }
 
-       pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n",
-                iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
-                ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+       pr_debug("no socket, dropping: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
+                tproto, &iph->saddr, ntohs(hp->source),
+                &iph->daddr, ntohs(hp->dest), skb->mark);
+
        return NF_DROP;
 }
 
-static int tproxy_tg_check(const struct xt_tgchk_param *par)
+static int tproxy_tg6_check(const struct xt_tgchk_param *par)
+{
+       const struct ip6t_ip6 *i = par->entryinfo;
+
+       if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
+           && !(i->flags & IP6T_INV_PROTO))
+               return 0;
+
+       pr_info("Can be used only in combination with "
+               "either -p tcp or -p udp\n");
+       return -EINVAL;
+}
+#endif
+
+static int tproxy_tg4_check(const struct xt_tgchk_param *par)
 {
        const struct ipt_ip *i = par->entryinfo;
 
@@ -74,31 +349,64 @@ static int tproxy_tg_check(const struct xt_tgchk_param *par)
        return -EINVAL;
 }
 
-static struct xt_target tproxy_tg_reg __read_mostly = {
-       .name           = "TPROXY",
-       .family         = NFPROTO_IPV4,
-       .table          = "mangle",
-       .target         = tproxy_tg,
-       .targetsize     = sizeof(struct xt_tproxy_target_info),
-       .checkentry     = tproxy_tg_check,
-       .hooks          = 1 << NF_INET_PRE_ROUTING,
-       .me             = THIS_MODULE,
+static struct xt_target tproxy_tg_reg[] __read_mostly = {
+       {
+               .name           = "TPROXY",
+               .family         = NFPROTO_IPV4,
+               .table          = "mangle",
+               .target         = tproxy_tg4_v0,
+               .revision       = 0,
+               .targetsize     = sizeof(struct xt_tproxy_target_info),
+               .checkentry     = tproxy_tg4_check,
+               .hooks          = 1 << NF_INET_PRE_ROUTING,
+               .me             = THIS_MODULE,
+       },
+       {
+               .name           = "TPROXY",
+               .family         = NFPROTO_IPV4,
+               .table          = "mangle",
+               .target         = tproxy_tg4_v1,
+               .revision       = 1,
+               .targetsize     = sizeof(struct xt_tproxy_target_info_v1),
+               .checkentry     = tproxy_tg4_check,
+               .hooks          = 1 << NF_INET_PRE_ROUTING,
+               .me             = THIS_MODULE,
+       },
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+       {
+               .name           = "TPROXY",
+               .family         = NFPROTO_IPV6,
+               .table          = "mangle",
+               .target         = tproxy_tg6_v1,
+               .revision       = 1,
+               .targetsize     = sizeof(struct xt_tproxy_target_info_v1),
+               .checkentry     = tproxy_tg6_check,
+               .hooks          = 1 << NF_INET_PRE_ROUTING,
+               .me             = THIS_MODULE,
+       },
+#endif
+
 };
 
 static int __init tproxy_tg_init(void)
 {
        nf_defrag_ipv4_enable();
-       return xt_register_target(&tproxy_tg_reg);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+       nf_defrag_ipv6_enable();
+#endif
+
+       return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
 }
 
 static void __exit tproxy_tg_exit(void)
 {
-       xt_unregister_target(&tproxy_tg_reg);
+       xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
 }
 
 module_init(tproxy_tg_init);
 module_exit(tproxy_tg_exit);
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Krisztian Kovacs");
+MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
 MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module.");
 MODULE_ALIAS("ipt_TPROXY");
+MODULE_ALIAS("ip6t_TPROXY");
index 1ca89908cbad84d2dc45093aa474cca056f239e3..2dbd4c857735abdddde389dd5154bbba0e5f2aef 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <net/icmp.h>
@@ -21,6 +22,7 @@
 #include <net/inet_sock.h>
 #include <net/netfilter/nf_tproxy_core.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
 #include <linux/netfilter/xt_socket.h>
 
@@ -30,7 +32,7 @@
 #endif
 
 static int
-extract_icmp_fields(const struct sk_buff *skb,
+extract_icmp4_fields(const struct sk_buff *skb,
                    u8 *protocol,
                    __be32 *raddr,
                    __be32 *laddr,
@@ -86,7 +88,6 @@ extract_icmp_fields(const struct sk_buff *skb,
        return 0;
 }
 
-
 static bool
 socket_match(const struct sk_buff *skb, struct xt_action_param *par,
             const struct xt_socket_mtinfo1 *info)
@@ -115,7 +116,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
                dport = hp->dest;
 
        } else if (iph->protocol == IPPROTO_ICMP) {
-               if (extract_icmp_fields(skb, &protocol, &saddr, &daddr,
+               if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
                                        &sport, &dport))
                        return false;
        } else {
@@ -142,7 +143,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 #endif
 
        sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
-                                  saddr, daddr, sport, dport, par->in, false);
+                                  saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
        if (sk != NULL) {
                bool wildcard;
                bool transparent = true;
@@ -165,32 +166,157 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
                        sk = NULL;
        }
 
-       pr_debug("proto %u %08x:%u -> %08x:%u (orig %08x:%u) sock %p\n",
-                protocol, ntohl(saddr), ntohs(sport),
-                ntohl(daddr), ntohs(dport),
-                ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk);
+       pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
+                protocol, &saddr, ntohs(sport),
+                &daddr, ntohs(dport),
+                &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
 
        return (sk != NULL);
 }
 
 static bool
-socket_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
        return socket_match(skb, par, NULL);
 }
 
 static bool
-socket_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
        return socket_match(skb, par, par->matchinfo);
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
+static int
+extract_icmp6_fields(const struct sk_buff *skb,
+                    unsigned int outside_hdrlen,
+                    u8 *protocol,
+                    struct in6_addr **raddr,
+                    struct in6_addr **laddr,
+                    __be16 *rport,
+                    __be16 *lport)
+{
+       struct ipv6hdr *inside_iph, _inside_iph;
+       struct icmp6hdr *icmph, _icmph;
+       __be16 *ports, _ports[2];
+       u8 inside_nexthdr;
+       int inside_hdrlen;
+
+       icmph = skb_header_pointer(skb, outside_hdrlen,
+                                  sizeof(_icmph), &_icmph);
+       if (icmph == NULL)
+               return 1;
+
+       if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
+               return 1;
+
+       inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph);
+       if (inside_iph == NULL)
+               return 1;
+       inside_nexthdr = inside_iph->nexthdr;
+
+       inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), &inside_nexthdr);
+       if (inside_hdrlen < 0)
+               return 1; /* hjm: Packet has no/incomplete transport layer headers. */
+
+       if (inside_nexthdr != IPPROTO_TCP &&
+           inside_nexthdr != IPPROTO_UDP)
+               return 1;
+
+       ports = skb_header_pointer(skb, inside_hdrlen,
+                                  sizeof(_ports), &_ports);
+       if (ports == NULL)
+               return 1;
+
+       /* the inside IP packet is the one quoted from our side, thus
+        * its saddr is the local address */
+       *protocol = inside_nexthdr;
+       *laddr = &inside_iph->saddr;
+       *lport = ports[0];
+       *raddr = &inside_iph->daddr;
+       *rport = ports[1];
+
+       return 0;
+}
+
+static bool
+socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+       struct ipv6hdr *iph = ipv6_hdr(skb);
+       struct udphdr _hdr, *hp = NULL;
+       struct sock *sk;
+       struct in6_addr *daddr, *saddr;
+       __be16 dport, sport;
+       int thoff;
+       u8 tproto;
+       const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+
+       tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
+       if (tproto < 0) {
+               pr_debug("unable to find transport header in IPv6 packet, dropping\n");
+               return NF_DROP;
+       }
+
+       if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
+               hp = skb_header_pointer(skb, thoff,
+                                       sizeof(_hdr), &_hdr);
+               if (hp == NULL)
+                       return false;
+
+               saddr = &iph->saddr;
+               sport = hp->source;
+               daddr = &iph->daddr;
+               dport = hp->dest;
+
+       } else if (tproto == IPPROTO_ICMPV6) {
+               if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
+                                        &sport, &dport))
+                       return false;
+       } else {
+               return false;
+       }
+
+       sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+                                  saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
+       if (sk != NULL) {
+               bool wildcard;
+               bool transparent = true;
+
+               /* Ignore sockets listening on INADDR_ANY */
+               wildcard = (sk->sk_state != TCP_TIME_WAIT &&
+                           ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
+
+               /* Ignore non-transparent sockets,
+                  if XT_SOCKET_TRANSPARENT is used */
+               if (info && info->flags & XT_SOCKET_TRANSPARENT)
+                       transparent = ((sk->sk_state != TCP_TIME_WAIT &&
+                                       inet_sk(sk)->transparent) ||
+                                      (sk->sk_state == TCP_TIME_WAIT &&
+                                       inet_twsk(sk)->tw_transparent));
+
+               nf_tproxy_put_sock(sk);
+
+               if (wildcard || !transparent)
+                       sk = NULL;
+       }
+
+       pr_debug("proto %hhu %pI6:%hu -> %pI6:%hu "
+                "(orig %pI6:%hu) sock %p\n",
+                tproto, saddr, ntohs(sport),
+                daddr, ntohs(dport),
+                &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
+
+       return (sk != NULL);
+}
+#endif
+
 static struct xt_match socket_mt_reg[] __read_mostly = {
        {
                .name           = "socket",
                .revision       = 0,
                .family         = NFPROTO_IPV4,
-               .match          = socket_mt_v0,
+               .match          = socket_mt4_v0,
                .hooks          = (1 << NF_INET_PRE_ROUTING) |
                                  (1 << NF_INET_LOCAL_IN),
                .me             = THIS_MODULE,
@@ -199,17 +325,33 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
                .name           = "socket",
                .revision       = 1,
                .family         = NFPROTO_IPV4,
-               .match          = socket_mt_v1,
+               .match          = socket_mt4_v1,
                .matchsize      = sizeof(struct xt_socket_mtinfo1),
                .hooks          = (1 << NF_INET_PRE_ROUTING) |
                                  (1 << NF_INET_LOCAL_IN),
                .me             = THIS_MODULE,
        },
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+       {
+               .name           = "socket",
+               .revision       = 1,
+               .family         = NFPROTO_IPV6,
+               .match          = socket_mt6_v1,
+               .matchsize      = sizeof(struct xt_socket_mtinfo1),
+               .hooks          = (1 << NF_INET_PRE_ROUTING) |
+                                 (1 << NF_INET_LOCAL_IN),
+               .me             = THIS_MODULE,
+       },
+#endif
 };
 
 static int __init socket_mt_init(void)
 {
        nf_defrag_ipv4_enable();
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+       nf_defrag_ipv6_enable();
+#endif
+
        return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
 }
 
@@ -225,3 +367,4 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
 MODULE_DESCRIPTION("x_tables socket match module");
 MODULE_ALIAS("ipt_socket");
+MODULE_ALIAS("ip6t_socket");