]> bbs.cooldavid.org Git - net-next-2.6.git/commitdiff
Merge branch 'master' of /repos/git/net-next-2.6
authorPatrick McHardy <kaber@trash.net>
Tue, 15 Jun 2010 15:31:06 +0000 (17:31 +0200)
committerPatrick McHardy <kaber@trash.net>
Tue, 15 Jun 2010 15:31:06 +0000 (17:31 +0200)
Conflicts:
include/net/netfilter/xt_rateest.h
net/bridge/br_netfilter.c
net/netfilter/nf_conntrack_core.c

Signed-off-by: Patrick McHardy <kaber@trash.net>
36 files changed:
include/linux/netfilter/Kbuild
include/linux/netfilter/nf_conntrack_common.h
include/linux/netfilter/nfnetlink_log.h
include/linux/netfilter/xt_IDLETIMER.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack_core.h
include/net/netfilter/xt_rateest.h
net/bridge/br_netfilter.c
net/ipv4/netfilter.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_queue.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/nf_nat_core.c
net/ipv4/netfilter/nf_nat_standalone.c
net/ipv6/netfilter.c
net/ipv6/netfilter/ip6_queue.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue.c
net/netfilter/xt_CT.c
net/netfilter/xt_IDLETIMER.c [new file with mode: 0644]
net/netfilter/xt_NOTRACK.c
net/netfilter/xt_TEE.c
net/netfilter/xt_cluster.c
net/netfilter/xt_conntrack.c
net/netfilter/xt_sctp.c
net/netfilter/xt_socket.c
net/netfilter/xt_state.c
net/netfilter/xt_statistic.c

index 48767cd164537760d6e8bd99db88d971afbef88d..bb103f43afa00463a1e5c99644144f0f7f0bc243 100644 (file)
@@ -8,6 +8,7 @@ header-y += xt_CONNMARK.h
 header-y += xt_CONNSECMARK.h
 header-y += xt_CT.h
 header-y += xt_DSCP.h
+header-y += xt_IDLETIMER.h
 header-y += xt_LED.h
 header-y += xt_MARK.h
 header-y += xt_NFLOG.h
index 14e6d32002c43d5a104bd45878c7671f9cbc20bc..1afd18c855ec99d9cf8cb4b9570b06b6e28b74d2 100644 (file)
@@ -76,6 +76,10 @@ enum ip_conntrack_status {
        /* Conntrack is a template */
        IPS_TEMPLATE_BIT = 11,
        IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT),
+
+       /* Conntrack is a fake untracked entry */
+       IPS_UNTRACKED_BIT = 12,
+       IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
 };
 
 /* Connection tracking event types */
index d3bab7a2c9b710eeecaecd24f660697b14263196..1d0b84aa1d4294624213b771b5f87f7d5962cb1d 100644 (file)
@@ -89,6 +89,7 @@ enum nfulnl_attr_config {
 #define NFULNL_COPY_NONE       0x00
 #define NFULNL_COPY_META       0x01
 #define NFULNL_COPY_PACKET     0x02
+#define NFULNL_COPY_DISABLED   0x03
 
 #define NFULNL_CFG_F_SEQ       0x0001
 #define NFULNL_CFG_F_SEQ_GLOBAL        0x0002
diff --git a/include/linux/netfilter/xt_IDLETIMER.h b/include/linux/netfilter/xt_IDLETIMER.h
new file mode 100644 (file)
index 0000000..3e1aa1b
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * linux/include/linux/netfilter/xt_IDLETIMER.h
+ *
+ * Header file for Xtables timer target module.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and forward-ported to 2.6.34
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _XT_IDLETIMER_H
+#define _XT_IDLETIMER_H
+
+#include <linux/types.h>
+
+#define MAX_IDLETIMER_LABEL_SIZE 28
+
+struct idletimer_tg_info {
+       __u32 timeout;
+
+       char label[MAX_IDLETIMER_LABEL_SIZE];
+
+       /* for kernel module internal use only */
+       struct idletimer_tg *timer __attribute((aligned(8)));
+};
+
+#endif
index bbfdd9453087c8fbdcc23d6a92e989b64f9d532b..e624dae54fa49b7d713b78641c7f18c2e3cfbae2 100644 (file)
@@ -257,7 +257,12 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
                               u32 seq);
 
 /* Fake conntrack entry for untracked connections */
-extern struct nf_conn nf_conntrack_untracked;
+DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+static inline struct nf_conn *nf_ct_untracked_get(void)
+{
+       return &__raw_get_cpu_var(nf_conntrack_untracked);
+}
+extern void nf_ct_untracked_status_or(unsigned long bits);
 
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 extern void
@@ -285,9 +290,9 @@ static inline int nf_ct_is_dying(struct nf_conn *ct)
        return test_bit(IPS_DYING_BIT, &ct->status);
 }
 
-static inline int nf_ct_is_untracked(const struct sk_buff *skb)
+static inline int nf_ct_is_untracked(const struct nf_conn *ct)
 {
-       return (skb->nfct == &nf_conntrack_untracked.ct_general);
+       return test_bit(IPS_UNTRACKED_BIT, &ct->status);
 }
 
 extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
index 3d7524fba1946d2993033f44b8330f210eaa4bb4..aced085132e7e52591f05ba1f1529dbba420465b 100644 (file)
@@ -60,7 +60,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
        struct nf_conn *ct = (struct nf_conn *)skb->nfct;
        int ret = NF_ACCEPT;
 
-       if (ct && ct != &nf_conntrack_untracked) {
+       if (ct && !nf_ct_is_untracked(ct)) {
                if (!nf_ct_is_confirmed(ct))
                        ret = __nf_conntrack_confirm(skb);
                if (likely(ret == NF_ACCEPT))
index 5e1427795928541d3eb37bd3c46e87b4b90c434d..5a2978d1cb22ae8713d43688b74370e44727725e 100644 (file)
@@ -2,13 +2,17 @@
 #define _XT_RATEEST_H
 
 struct xt_rateest {
+       /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
+       struct gnet_stats_basic_packed  bstats;
+       spinlock_t                      lock;
+       /* keep rstats and lock on same cache line to speedup xt_rateest_mt() */
+       struct gnet_stats_rate_est      rstats;
+
+       /* following fields not accessed in hot path */
        struct hlist_node               list;
        char                            name[IFNAMSIZ];
        unsigned int                    refcnt;
-       spinlock_t                      lock;
        struct gnet_estimator           params;
-       struct gnet_stats_rate_est      rstats;
-       struct gnet_stats_basic_packed  bstats;
        struct rcu_head                 rcu;
 };
 
index 0685b2558ab507bcc976d2bbaa41169f23f28dc8..6bb6f7c9e6e1afe4b00fdfadc1caef68b01d1c7e 100644 (file)
@@ -244,8 +244,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
                kfree_skb(skb);
                return 0;
        }
-       dst_hold(&rt->dst);
-       skb_dst_set(skb, &rt->dst);
+       skb_dst_set_noref(skb, &rt->dst);
 
        skb->dev = nf_bridge->physindev;
        nf_bridge_update_protocol(skb);
@@ -396,8 +395,7 @@ bridged_dnat:
                        kfree_skb(skb);
                        return 0;
                }
-               dst_hold(&rt->dst);
-               skb_dst_set(skb, &rt->dst);
+               skb_dst_set_noref(skb, &rt->dst);
        }
 
        skb->dev = nf_bridge->physindev;
index cfbc79af21c3093ee3ba87e94963500564fdec85..d88a46c54fd1bbb62a6f9ed13570c518940666b5 100644 (file)
@@ -212,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
                skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
                                               skb->len - dataoff, 0);
                skb->ip_summed = CHECKSUM_NONE;
-               csum = __skb_checksum_complete_head(skb, dataoff + len);
-               if (!csum)
-                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+               return __skb_checksum_complete_head(skb, dataoff + len);
        }
        return csum;
 }
index 1ac01b1286219475cde625bb921887fc3645d7a3..16c0ba0a272840c499d6e45b482133d1634d22b3 100644 (file)
@@ -758,7 +758,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
         * about).
         */
        countersize = sizeof(struct xt_counters) * private->number;
-       counters = vmalloc_node(countersize, numa_node_id());
+       counters = vmalloc(countersize);
 
        if (counters == NULL)
                return ERR_PTR(-ENOMEM);
@@ -1005,8 +1005,7 @@ static int __do_replace(struct net *net, const char *name,
        struct arpt_entry *iter;
 
        ret = 0;
-       counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
-                               numa_node_id());
+       counters = vmalloc(num_counters * sizeof(struct xt_counters));
        if (!counters) {
                ret = -ENOMEM;
                goto out;
@@ -1159,7 +1158,7 @@ static int do_add_counters(struct net *net, const void __user *user,
        if (len != size + num_counters * sizeof(struct xt_counters))
                return -EINVAL;
 
-       paddc = vmalloc_node(len - size, numa_node_id());
+       paddc = vmalloc(len - size);
        if (!paddc)
                return -ENOMEM;
 
index a4e5fc5df4bfd31aef2ed606fe475e7cfc15559d..d2c1311cb28d6aee7b218351580210b4e5783a3e 100644 (file)
@@ -42,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
 
 static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_RWLOCK(queue_lock);
+static DEFINE_SPINLOCK(queue_lock);
 static int peer_pid __read_mostly;
 static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
@@ -72,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
                break;
 
        case IPQ_COPY_PACKET:
-               copy_mode = mode;
+               if (range > 0xFFFF)
+                       range = 0xFFFF;
                copy_range = range;
-               if (copy_range > 0xFFFF)
-                       copy_range = 0xFFFF;
+               copy_mode = mode;
                break;
 
        default:
@@ -101,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id)
 {
        struct nf_queue_entry *entry = NULL, *i;
 
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
 
        list_for_each_entry(i, &queue_list, list) {
                if ((unsigned long)i == id) {
@@ -115,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id)
                queue_total--;
        }
 
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return entry;
 }
 
@@ -136,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 static void
 ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
        __ipq_flush(cmpfn, data);
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
 }
 
 static struct sk_buff *
@@ -152,9 +152,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
        struct nlmsghdr *nlh;
        struct timeval tv;
 
-       read_lock_bh(&queue_lock);
-
-       switch (copy_mode) {
+       switch (ACCESS_ONCE(copy_mode)) {
        case IPQ_COPY_META:
        case IPQ_COPY_NONE:
                size = NLMSG_SPACE(sizeof(*pmsg));
@@ -162,26 +160,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 
        case IPQ_COPY_PACKET:
                if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-                   (*errp = skb_checksum_help(entry->skb))) {
-                       read_unlock_bh(&queue_lock);
+                   (*errp = skb_checksum_help(entry->skb)))
                        return NULL;
-               }
-               if (copy_range == 0 || copy_range > entry->skb->len)
+
+               data_len = ACCESS_ONCE(copy_range);
+               if (data_len == 0 || data_len > entry->skb->len)
                        data_len = entry->skb->len;
-               else
-                       data_len = copy_range;
 
                size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
                break;
 
        default:
                *errp = -EINVAL;
-               read_unlock_bh(&queue_lock);
                return NULL;
        }
 
-       read_unlock_bh(&queue_lock);
-
        skb = alloc_skb(size, GFP_ATOMIC);
        if (!skb)
                goto nlmsg_failure;
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
        if (nskb == NULL)
                return status;
 
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
 
        if (!peer_pid)
                goto err_out_free_nskb;
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
        __ipq_enqueue_entry(entry);
 
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return status;
 
 err_out_free_nskb:
        kfree_skb(nskb);
 
 err_out_unlock:
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return status;
 }
 
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
 {
        int status;
 
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
        status = __ipq_set_mode(mode, range);
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return status;
 }
 
@@ -440,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
        if (security_netlink_recv(skb, CAP_NET_ADMIN))
                RCV_SKB_FAIL(-EPERM);
 
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
 
        if (peer_pid) {
                if (peer_pid != pid) {
-                       write_unlock_bh(&queue_lock);
+                       spin_unlock_bh(&queue_lock);
                        RCV_SKB_FAIL(-EBUSY);
                }
        } else {
@@ -452,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
                peer_pid = pid;
        }
 
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
 
        status = ipq_receive_peer(NLMSG_DATA(nlh), type,
                                  nlmsglen - NLMSG_LENGTH(0));
@@ -497,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
        struct netlink_notify *n = ptr;
 
        if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
-               write_lock_bh(&queue_lock);
+               spin_lock_bh(&queue_lock);
                if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
                        __ipq_reset();
-               write_unlock_bh(&queue_lock);
+               spin_unlock_bh(&queue_lock);
        }
        return NOTIFY_DONE;
 }
@@ -527,7 +520,7 @@ static ctl_table ipq_table[] = {
 #ifdef CONFIG_PROC_FS
 static int ip_queue_show(struct seq_file *m, void *v)
 {
-       read_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
 
        seq_printf(m,
                      "Peer PID          : %d\n"
@@ -545,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v)
                      queue_dropped,
                      queue_user_dropped);
 
-       read_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return 0;
 }
 
index 4b6c5ca610fc0a463db4995e8b58393916e0a35c..b38c11810c65938513282a9e8a0699317dad231a 100644 (file)
@@ -928,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
           (other than comefrom, which userspace doesn't care
           about). */
        countersize = sizeof(struct xt_counters) * private->number;
-       counters = vmalloc_node(countersize, numa_node_id());
+       counters = vmalloc(countersize);
 
        if (counters == NULL)
                return ERR_PTR(-ENOMEM);
@@ -1352,7 +1352,7 @@ do_add_counters(struct net *net, const void __user *user,
        if (len != size + num_counters * sizeof(struct xt_counters))
                return -EINVAL;
 
-       paddc = vmalloc_node(len - size, numa_node_id());
+       paddc = vmalloc(len - size);
        if (!paddc)
                return -ENOMEM;
 
index f91c94b9a7900017b3589598a26c3491ef6a2407..64d0875f519245762748055f40be0cc291be7e3c 100644 (file)
@@ -53,12 +53,13 @@ struct clusterip_config {
 #endif
        enum clusterip_hashmode hash_mode;      /* which hashing mode */
        u_int32_t hash_initval;                 /* hash initialization */
+       struct rcu_head rcu;
 };
 
 static LIST_HEAD(clusterip_configs);
 
 /* clusterip_lock protects the clusterip_configs list */
-static DEFINE_RWLOCK(clusterip_lock);
+static DEFINE_SPINLOCK(clusterip_lock);
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations clusterip_proc_fops;
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
        atomic_inc(&c->refcount);
 }
 
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
+{
+       kfree(container_of(head, struct clusterip_config, rcu));
+}
+
 static inline void
 clusterip_config_put(struct clusterip_config *c)
 {
        if (atomic_dec_and_test(&c->refcount))
-               kfree(c);
+               call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
 }
 
 /* decrease the count of entries using/referencing this config.  If last
@@ -84,10 +91,11 @@ clusterip_config_put(struct clusterip_config *c)
 static inline void
 clusterip_config_entry_put(struct clusterip_config *c)
 {
-       write_lock_bh(&clusterip_lock);
-       if (atomic_dec_and_test(&c->entries)) {
-               list_del(&c->list);
-               write_unlock_bh(&clusterip_lock);
+       local_bh_disable();
+       if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) {
+               list_del_rcu(&c->list);
+               spin_unlock(&clusterip_lock);
+               local_bh_enable();
 
                dev_mc_del(c->dev, c->clustermac);
                dev_put(c->dev);
@@ -100,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 #endif
                return;
        }
-       write_unlock_bh(&clusterip_lock);
+       local_bh_enable();
 }
 
 static struct clusterip_config *
@@ -108,7 +116,7 @@ __clusterip_config_find(__be32 clusterip)
 {
        struct clusterip_config *c;
 
-       list_for_each_entry(c, &clusterip_configs, list) {
+       list_for_each_entry_rcu(c, &clusterip_configs, list) {
                if (c->clusterip == clusterip)
                        return c;
        }
@@ -121,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
 {
        struct clusterip_config *c;
 
-       read_lock_bh(&clusterip_lock);
+       rcu_read_lock_bh();
        c = __clusterip_config_find(clusterip);
-       if (!c) {
-               read_unlock_bh(&clusterip_lock);
-               return NULL;
+       if (c) {
+               if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+                       c = NULL;
+               else if (entry)
+                       atomic_inc(&c->entries);
        }
-       atomic_inc(&c->refcount);
-       if (entry)
-               atomic_inc(&c->entries);
-       read_unlock_bh(&clusterip_lock);
+       rcu_read_unlock_bh();
 
        return c;
 }
@@ -181,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
        }
 #endif
 
-       write_lock_bh(&clusterip_lock);
-       list_add(&c->list, &clusterip_configs);
-       write_unlock_bh(&clusterip_lock);
+       spin_lock_bh(&clusterip_lock);
+       list_add_rcu(&c->list, &clusterip_configs);
+       spin_unlock_bh(&clusterip_lock);
 
        return c;
 }
@@ -733,6 +740,9 @@ static void __exit clusterip_tg_exit(void)
 #endif
        nf_unregister_hook(&cip_arp_ops);
        xt_unregister_target(&clusterip_tg_reg);
+
+       /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+       rcu_barrier_bh();
 }
 
 module_init(clusterip_tg_init);
index 4f8bddb760c9cb47d6931647d8b03ece5fef5fc0..c7719b283ada4c0e9628dbfd2fbc8a29e8da3fdd 100644 (file)
@@ -742,7 +742,7 @@ static int __init nf_nat_init(void)
        spin_unlock_bh(&nf_nat_lock);
 
        /* Initialize fake conntrack so that NAT will skip it */
-       nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+       nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
 
        l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
 
index beb25819c9c9f66c0fa5a1a40c398e860f3e10a8..6723c682250d64c640d0b36203f0189080bcfbc7 100644 (file)
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
                return NF_ACCEPT;
 
        /* Don't try to NAT if this packet is not conntracked */
-       if (ct == &nf_conntrack_untracked)
+       if (nf_ct_is_untracked(ct))
                return NF_ACCEPT;
 
        nat = nfct_nat(ct);
index a74951c039b6abdcc8844202b082eefd15da7da1..7155b2451d7cf297ab2b87d244e217ccf82f4071 100644 (file)
@@ -151,9 +151,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
                                                         protocol,
                                                         csum_sub(0, hsum)));
                skb->ip_summed = CHECKSUM_NONE;
-               csum = __skb_checksum_complete_head(skb, dataoff + len);
-               if (!csum)
-                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+               return __skb_checksum_complete_head(skb, dataoff + len);
        }
        return csum;
 };
index 8c201743d96d3fe8366603fc29d071c773c07166..413ab0754e1fe4923c139e632444f2cfcc623d86 100644 (file)
@@ -43,7 +43,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
 
 static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_RWLOCK(queue_lock);
+static DEFINE_SPINLOCK(queue_lock);
 static int peer_pid __read_mostly;
 static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
@@ -73,10 +73,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
                break;
 
        case IPQ_COPY_PACKET:
-               copy_mode = mode;
+               if (range > 0xFFFF)
+                       range = 0xFFFF;
                copy_range = range;
-               if (copy_range > 0xFFFF)
-                       copy_range = 0xFFFF;
+               copy_mode = mode;
                break;
 
        default:
@@ -102,7 +102,7 @@ ipq_find_dequeue_entry(unsigned long id)
 {
        struct nf_queue_entry *entry = NULL, *i;
 
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
 
        list_for_each_entry(i, &queue_list, list) {
                if ((unsigned long)i == id) {
@@ -116,7 +116,7 @@ ipq_find_dequeue_entry(unsigned long id)
                queue_total--;
        }
 
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return entry;
 }
 
@@ -137,9 +137,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 static void
 ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
        __ipq_flush(cmpfn, data);
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
 }
 
 static struct sk_buff *
@@ -153,9 +153,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
        struct nlmsghdr *nlh;
        struct timeval tv;
 
-       read_lock_bh(&queue_lock);
-
-       switch (copy_mode) {
+       switch (ACCESS_ONCE(copy_mode)) {
        case IPQ_COPY_META:
        case IPQ_COPY_NONE:
                size = NLMSG_SPACE(sizeof(*pmsg));
@@ -163,26 +161,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 
        case IPQ_COPY_PACKET:
                if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-                   (*errp = skb_checksum_help(entry->skb))) {
-                       read_unlock_bh(&queue_lock);
+                   (*errp = skb_checksum_help(entry->skb)))
                        return NULL;
-               }
-               if (copy_range == 0 || copy_range > entry->skb->len)
+
+               data_len = ACCESS_ONCE(copy_range);
+               if (data_len == 0 || data_len > entry->skb->len)
                        data_len = entry->skb->len;
-               else
-                       data_len = copy_range;
 
                size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
                break;
 
        default:
                *errp = -EINVAL;
-               read_unlock_bh(&queue_lock);
                return NULL;
        }
 
-       read_unlock_bh(&queue_lock);
-
        skb = alloc_skb(size, GFP_ATOMIC);
        if (!skb)
                goto nlmsg_failure;
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
        if (nskb == NULL)
                return status;
 
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
 
        if (!peer_pid)
                goto err_out_free_nskb;
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
        __ipq_enqueue_entry(entry);
 
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return status;
 
 err_out_free_nskb:
        kfree_skb(nskb);
 
 err_out_unlock:
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return status;
 }
 
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
 {
        int status;
 
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
        status = __ipq_set_mode(mode, range);
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return status;
 }
 
@@ -441,11 +434,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
        if (security_netlink_recv(skb, CAP_NET_ADMIN))
                RCV_SKB_FAIL(-EPERM);
 
-       write_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
 
        if (peer_pid) {
                if (peer_pid != pid) {
-                       write_unlock_bh(&queue_lock);
+                       spin_unlock_bh(&queue_lock);
                        RCV_SKB_FAIL(-EBUSY);
                }
        } else {
@@ -453,7 +446,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
                peer_pid = pid;
        }
 
-       write_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
 
        status = ipq_receive_peer(NLMSG_DATA(nlh), type,
                                  nlmsglen - NLMSG_LENGTH(0));
@@ -498,10 +491,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
        struct netlink_notify *n = ptr;
 
        if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
-               write_lock_bh(&queue_lock);
+               spin_lock_bh(&queue_lock);
                if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
                        __ipq_reset();
-               write_unlock_bh(&queue_lock);
+               spin_unlock_bh(&queue_lock);
        }
        return NOTIFY_DONE;
 }
@@ -528,7 +521,7 @@ static ctl_table ipq_table[] = {
 #ifdef CONFIG_PROC_FS
 static int ip6_queue_show(struct seq_file *m, void *v)
 {
-       read_lock_bh(&queue_lock);
+       spin_lock_bh(&queue_lock);
 
        seq_printf(m,
                      "Peer PID          : %d\n"
@@ -546,7 +539,7 @@ static int ip6_queue_show(struct seq_file *m, void *v)
                      queue_dropped,
                      queue_user_dropped);
 
-       read_unlock_bh(&queue_lock);
+       spin_unlock_bh(&queue_lock);
        return 0;
 }
 
index 9d2d68f0e6053d97fdfc3f350135604309a35808..dc41d6d3c6c6a6fb6e74b57fe12575362123aedf 100644 (file)
@@ -943,7 +943,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
           (other than comefrom, which userspace doesn't care
           about). */
        countersize = sizeof(struct xt_counters) * private->number;
-       counters = vmalloc_node(countersize, numa_node_id());
+       counters = vmalloc(countersize);
 
        if (counters == NULL)
                return ERR_PTR(-ENOMEM);
@@ -1213,8 +1213,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
        struct ip6t_entry *iter;
 
        ret = 0;
-       counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
-                               numa_node_id());
+       counters = vmalloc(num_counters * sizeof(struct xt_counters));
        if (!counters) {
                ret = -ENOMEM;
                goto out;
@@ -1368,7 +1367,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
        if (len != size + num_counters * sizeof(struct xt_counters))
                return -EINVAL;
 
-       paddc = vmalloc_node(len - size, numa_node_id());
+       paddc = vmalloc(len - size);
        if (!paddc)
                return -ENOMEM;
 
index 9be81776415ed67a4a09d536d742cf80e4ea7941..1df3c8b6bf4723668e6b8c43666b3e31f4583fd8 100644 (file)
@@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
        type = icmp6h->icmp6_type - 130;
        if (type >= 0 && type < sizeof(noct_valid_new) &&
            noct_valid_new[type]) {
-               skb->nfct = &nf_conntrack_untracked.ct_general;
+               skb->nfct = &nf_ct_untracked_get()->ct_general;
                skb->nfctinfo = IP_CT_NEW;
                nf_conntrack_get(skb->nfct);
                return NF_ACCEPT;
index 6fb890187de09b01f8dc6453b7862a70de2edb8f..9254008602d4901d0d6f6b6813c439b1e7ef84db 100644 (file)
@@ -114,10 +114,8 @@ static void nf_skb_free(struct sk_buff *skb)
 }
 
 /* Memory Tracking Functions. */
-static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
+static void frag_kfree_skb(struct sk_buff *skb)
 {
-       if (work)
-               *work -= skb->truesize;
        atomic_sub(skb->truesize, &nf_init_frags.mem);
        nf_skb_free(skb);
        kfree_skb(skb);
@@ -335,7 +333,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
                                fq->q.fragments = next;
 
                        fq->q.meat -= free_it->len;
-                       frag_kfree_skb(free_it, NULL);
+                       frag_kfree_skb(free_it);
                }
        }
 
@@ -442,7 +440,6 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
        skb_shinfo(head)->frag_list = head->next;
        skb_reset_transport_header(head);
        skb_push(head, head->data - skb_network_header(head));
-       atomic_sub(head->truesize, &nf_init_frags.mem);
 
        for (fp=head->next; fp; fp = fp->next) {
                head->data_len += fp->len;
@@ -452,8 +449,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
                else if (head->ip_summed == CHECKSUM_COMPLETE)
                        head->csum = csum_add(head->csum, fp->csum);
                head->truesize += fp->truesize;
-               atomic_sub(fp->truesize, &nf_init_frags.mem);
        }
+       atomic_sub(head->truesize, &nf_init_frags.mem);
 
        head->next = NULL;
        head->dev = dev;
index 8593a77cfea906ac0257405fb8aa9ae0e0bef597..413ed24a968a6f392f55b4d597cda2232beb95ed 100644 (file)
@@ -424,6 +424,18 @@ config NETFILTER_XT_TARGET_HL
        since you can easily create immortal packets that loop
        forever on the network.
 
+config NETFILTER_XT_TARGET_IDLETIMER
+       tristate  "IDLETIMER target support"
+       depends on NETFILTER_ADVANCED
+       help
+
+         This option adds the `IDLETIMER' target.  Each matching packet
+         resets the timer associated with label specified when the rule is
+         added.  When the timer expires, it triggers a sysfs notification.
+         The remaining time for expiration can be read via sysfs.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_LED
        tristate '"LED" target support'
        depends on LEDS_CLASS && LEDS_TRIGGERS
index 14e3a8fd81803fd260f3af6580bf4e2567b30506..e28420aac5efa793196b2ae77698b4cfefb8d9b5 100644 (file)
@@ -61,6 +61,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
index 77288980fae017d8694df9d4c5efccd8b18d23b4..16b41b4e2a3c18a4b7979f19d51a91a9427761ae 100644 (file)
@@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct nf_conn nf_conntrack_untracked __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
+DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
@@ -1181,10 +1181,21 @@ static void nf_ct_release_dying_list(struct net *net)
        spin_unlock_bh(&nf_conntrack_lock);
 }
 
+static int untrack_refs(void)
+{
+       int cnt = 0, cpu;
+
+       for_each_possible_cpu(cpu) {
+               struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+
+               cnt += atomic_read(&ct->ct_general.use) - 1;
+       }
+       return cnt;
+}
+
 static void nf_conntrack_cleanup_init_net(void)
 {
-       /* wait until all references to nf_conntrack_untracked are dropped */
-       while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+       while (untrack_refs() > 0)
                schedule();
 
        nf_conntrack_helper_fini();
@@ -1319,10 +1330,19 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
                  &nf_conntrack_htable_size, 0600);
 
+void nf_ct_untracked_status_or(unsigned long bits)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               per_cpu(nf_conntrack_untracked, cpu).status |= bits;
+}
+EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
+
 static int nf_conntrack_init_init_net(void)
 {
        int max_factor = 8;
-       int ret;
+       int ret, cpu;
 
        /* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
         * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1361,11 +1381,13 @@ static int nf_conntrack_init_init_net(void)
                goto err_extend;
 #endif
        /* Set up fake conntrack: to never be deleted, not in any hashes */
-       write_pnet(&nf_conntrack_untracked.ct_net, &init_net);
-       atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+       for_each_possible_cpu(cpu) {
+               struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+               write_pnet(&ct->ct_net, &init_net);
+               atomic_set(&ct->ct_general.use, 1);
+       }
        /*  - and look it like as a confirmed connection */
-       set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-
+       nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
        return 0;
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
index c42ff6aa441d657c03e2eb0207350e50b5adc239..5bae1cd15eea93ee3f74cb51dab972c10c96d33c 100644 (file)
@@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
        int err;
 
        /* ignore our fake conntrack entry */
-       if (ct == &nf_conntrack_untracked)
+       if (nf_ct_is_untracked(ct))
                return 0;
 
        if (events & (1 << IPCT_DESTROY)) {
index fc9a211e629e499bbc15eed8f593157a284cc1b7..fb86a51bb65aa04e553c9ffa4c26331a2d7512fa 100644 (file)
@@ -66,9 +66,10 @@ struct nfulnl_instance {
        u_int16_t group_num;            /* number of this queue */
        u_int16_t flags;
        u_int8_t copy_mode;
+       struct rcu_head rcu;
 };
 
-static DEFINE_RWLOCK(instances_lock);
+static DEFINE_SPINLOCK(instances_lock);
 static atomic_t global_seq;
 
 #define INSTANCE_BUCKETS       16
@@ -88,7 +89,7 @@ __instance_lookup(u_int16_t group_num)
        struct nfulnl_instance *inst;
 
        head = &instance_table[instance_hashfn(group_num)];
-       hlist_for_each_entry(inst, pos, head, hlist) {
+       hlist_for_each_entry_rcu(inst, pos, head, hlist) {
                if (inst->group_num == group_num)
                        return inst;
        }
@@ -106,22 +107,26 @@ instance_lookup_get(u_int16_t group_num)
 {
        struct nfulnl_instance *inst;
 
-       read_lock_bh(&instances_lock);
+       rcu_read_lock_bh();
        inst = __instance_lookup(group_num);
-       if (inst)
-               instance_get(inst);
-       read_unlock_bh(&instances_lock);
+       if (inst && !atomic_inc_not_zero(&inst->use))
+               inst = NULL;
+       rcu_read_unlock_bh();
 
        return inst;
 }
 
+static void nfulnl_instance_free_rcu(struct rcu_head *head)
+{
+       kfree(container_of(head, struct nfulnl_instance, rcu));
+       module_put(THIS_MODULE);
+}
+
 static void
 instance_put(struct nfulnl_instance *inst)
 {
-       if (inst && atomic_dec_and_test(&inst->use)) {
-               kfree(inst);
-               module_put(THIS_MODULE);
-       }
+       if (inst && atomic_dec_and_test(&inst->use))
+               call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
 }
 
 static void nfulnl_timer(unsigned long data);
@@ -132,7 +137,7 @@ instance_create(u_int16_t group_num, int pid)
        struct nfulnl_instance *inst;
        int err;
 
-       write_lock_bh(&instances_lock);
+       spin_lock_bh(&instances_lock);
        if (__instance_lookup(group_num)) {
                err = -EEXIST;
                goto out_unlock;
@@ -166,32 +171,37 @@ instance_create(u_int16_t group_num, int pid)
        inst->copy_mode         = NFULNL_COPY_PACKET;
        inst->copy_range        = NFULNL_COPY_RANGE_MAX;
 
-       hlist_add_head(&inst->hlist,
+       hlist_add_head_rcu(&inst->hlist,
                       &instance_table[instance_hashfn(group_num)]);
 
-       write_unlock_bh(&instances_lock);
+       spin_unlock_bh(&instances_lock);
 
        return inst;
 
 out_unlock:
-       write_unlock_bh(&instances_lock);
+       spin_unlock_bh(&instances_lock);
        return ERR_PTR(err);
 }
 
 static void __nfulnl_flush(struct nfulnl_instance *inst);
 
+/* called with BH disabled */
 static void
 __instance_destroy(struct nfulnl_instance *inst)
 {
        /* first pull it out of the global list */
-       hlist_del(&inst->hlist);
+       hlist_del_rcu(&inst->hlist);
 
        /* then flush all pending packets from skb */
 
-       spin_lock_bh(&inst->lock);
+       spin_lock(&inst->lock);
+
+       /* lockless readers wont be able to use us */
+       inst->copy_mode = NFULNL_COPY_DISABLED;
+
        if (inst->skb)
                __nfulnl_flush(inst);
-       spin_unlock_bh(&inst->lock);
+       spin_unlock(&inst->lock);
 
        /* and finally put the refcount */
        instance_put(inst);
@@ -200,9 +210,9 @@ __instance_destroy(struct nfulnl_instance *inst)
 static inline void
 instance_destroy(struct nfulnl_instance *inst)
 {
-       write_lock_bh(&instances_lock);
+       spin_lock_bh(&instances_lock);
        __instance_destroy(inst);
-       write_unlock_bh(&instances_lock);
+       spin_unlock_bh(&instances_lock);
 }
 
 static int
@@ -619,6 +629,7 @@ nfulnl_log_packet(u_int8_t pf,
                size += nla_total_size(data_len);
                break;
 
+       case NFULNL_COPY_DISABLED:
        default:
                goto unlock_and_release;
        }
@@ -672,7 +683,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
                int i;
 
                /* destroy all instances for this pid */
-               write_lock_bh(&instances_lock);
+               spin_lock_bh(&instances_lock);
                for  (i = 0; i < INSTANCE_BUCKETS; i++) {
                        struct hlist_node *tmp, *t2;
                        struct nfulnl_instance *inst;
@@ -684,7 +695,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
                                        __instance_destroy(inst);
                        }
                }
-               write_unlock_bh(&instances_lock);
+               spin_unlock_bh(&instances_lock);
        }
        return NOTIFY_DONE;
 }
@@ -861,19 +872,19 @@ static struct hlist_node *get_first(struct iter_state *st)
 
        for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
                if (!hlist_empty(&instance_table[st->bucket]))
-                       return instance_table[st->bucket].first;
+                       return rcu_dereference_bh(instance_table[st->bucket].first);
        }
        return NULL;
 }
 
 static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 {
-       h = h->next;
+       h = rcu_dereference_bh(h->next);
        while (!h) {
                if (++st->bucket >= INSTANCE_BUCKETS)
                        return NULL;
 
-               h = instance_table[st->bucket].first;
+               h = rcu_dereference_bh(instance_table[st->bucket].first);
        }
        return h;
 }
@@ -890,9 +901,9 @@ static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
 }
 
 static void *seq_start(struct seq_file *seq, loff_t *pos)
-       __acquires(instances_lock)
+       __acquires(rcu_bh)
 {
-       read_lock_bh(&instances_lock);
+       rcu_read_lock_bh();
        return get_idx(seq->private, *pos);
 }
 
@@ -903,9 +914,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 }
 
 static void seq_stop(struct seq_file *s, void *v)
-       __releases(instances_lock)
+       __releases(rcu_bh)
 {
-       read_unlock_bh(&instances_lock);
+       rcu_read_unlock_bh();
 }
 
 static int seq_show(struct seq_file *s, void *v)
index 12e1ab37fcd8f400151f8bced1b13a873b328e77..d05605b38f6f60fd5e6a6675abc7e11a1ebe768a 100644 (file)
@@ -46,17 +46,19 @@ struct nfqnl_instance {
        int peer_pid;
        unsigned int queue_maxlen;
        unsigned int copy_range;
-       unsigned int queue_total;
        unsigned int queue_dropped;
        unsigned int queue_user_dropped;
 
-       unsigned int id_sequence;               /* 'sequence' of pkt ids */
 
        u_int16_t queue_num;                    /* number of this queue */
        u_int8_t copy_mode;
-
-       spinlock_t lock;
-
+/*
+ * Following fields are dirtied for each queued packet,
+ * keep them in same cache line if possible.
+ */
+       spinlock_t      lock;
+       unsigned int    queue_total;
+       atomic_t        id_sequence;            /* 'sequence' of pkt ids */
        struct list_head queue_list;            /* packets in queue */
 };
 
@@ -238,32 +240,24 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
        outdev = entry->outdev;
 
-       spin_lock_bh(&queue->lock);
-
-       switch ((enum nfqnl_config_mode)queue->copy_mode) {
+       switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
        case NFQNL_COPY_META:
        case NFQNL_COPY_NONE:
                break;
 
        case NFQNL_COPY_PACKET:
                if (entskb->ip_summed == CHECKSUM_PARTIAL &&
-                   skb_checksum_help(entskb)) {
-                       spin_unlock_bh(&queue->lock);
+                   skb_checksum_help(entskb))
                        return NULL;
-               }
-               if (queue->copy_range == 0
-                   || queue->copy_range > entskb->len)
+
+               data_len = ACCESS_ONCE(queue->copy_range);
+               if (data_len == 0 || data_len > entskb->len)
                        data_len = entskb->len;
-               else
-                       data_len = queue->copy_range;
 
                size += nla_total_size(data_len);
                break;
        }
 
-       entry->id = queue->id_sequence++;
-
-       spin_unlock_bh(&queue->lock);
 
        skb = alloc_skb(size, GFP_ATOMIC);
        if (!skb)
@@ -278,6 +272,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
        nfmsg->version = NFNETLINK_V0;
        nfmsg->res_id = htons(queue->queue_num);
 
+       entry->id = atomic_inc_return(&queue->id_sequence);
        pmsg.packet_id          = htonl(entry->id);
        pmsg.hw_protocol        = entskb->protocol;
        pmsg.hook               = entry->hook;
@@ -866,7 +861,7 @@ static int seq_show(struct seq_file *s, void *v)
                          inst->peer_pid, inst->queue_total,
                          inst->copy_mode, inst->copy_range,
                          inst->queue_dropped, inst->queue_user_dropped,
-                         inst->id_sequence, 1);
+                         atomic_read(&inst->id_sequence), 1);
 }
 
 static const struct seq_operations nfqnl_seq_ops = {
index 562bf3266e043d421621ee7472aad72b50b0da51..0cb6053f02fdf04723254bfe90d8ca9bff29edfc 100644 (file)
@@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
                return -EINVAL;
 
        if (info->flags & XT_CT_NOTRACK) {
-               ct = &nf_conntrack_untracked;
+               ct = nf_ct_untracked_get();
                atomic_inc(&ct->ct_general.use);
                goto out;
        }
@@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
        struct nf_conn *ct = info->ct;
        struct nf_conn_help *help;
 
-       if (ct != &nf_conntrack_untracked) {
+       if (!nf_ct_is_untracked(ct)) {
                help = nfct_help(ct);
                if (help)
                        module_put(help->helper->me);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
new file mode 100644 (file)
index 0000000..e11090a
--- /dev/null
@@ -0,0 +1,314 @@
+/*
+ * linux/net/netfilter/xt_IDLETIMER.c
+ *
+ * Netfilter module to trigger a timer when packet matches.
+ * After timer expires a kevent will be sent.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and reworked for upstream inclusion
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_IDLETIMER.h>
+#include <linux/kobject.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+
+struct idletimer_tg_attr {
+       struct attribute attr;
+       ssize_t (*show)(struct kobject *kobj,
+                       struct attribute *attr, char *buf);
+};
+
+struct idletimer_tg {
+       struct list_head entry;
+       struct timer_list timer;
+       struct work_struct work;
+
+       struct kobject *kobj;
+       struct idletimer_tg_attr attr;
+
+       unsigned int refcnt;
+};
+
+static LIST_HEAD(idletimer_tg_list);
+static DEFINE_MUTEX(list_mutex);
+
+static struct kobject *idletimer_tg_kobj;
+
+static
+struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
+{
+       struct idletimer_tg *entry;
+
+       BUG_ON(!label);
+
+       list_for_each_entry(entry, &idletimer_tg_list, entry) {
+               if (!strcmp(label, entry->attr.attr.name))
+                       return entry;
+       }
+
+       return NULL;
+}
+
+static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
+                                char *buf)
+{
+       struct idletimer_tg *timer;
+       unsigned long expires = 0;
+
+       mutex_lock(&list_mutex);
+
+       timer = __idletimer_tg_find_by_label(attr->name);
+       if (timer)
+               expires = timer->timer.expires;
+
+       mutex_unlock(&list_mutex);
+
+       if (time_after(expires, jiffies))
+               return sprintf(buf, "%u\n",
+                              jiffies_to_msecs(expires - jiffies) / 1000);
+
+       return sprintf(buf, "0\n");
+}
+
+static void idletimer_tg_work(struct work_struct *work)
+{
+       struct idletimer_tg *timer = container_of(work, struct idletimer_tg,
+                                                 work);
+
+       sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+}
+
+static void idletimer_tg_expired(unsigned long data)
+{
+       struct idletimer_tg *timer = (struct idletimer_tg *) data;
+
+       pr_debug("timer %s expired\n", timer->attr.attr.name);
+
+       schedule_work(&timer->work);
+}
+
+static int idletimer_tg_create(struct idletimer_tg_info *info)
+{
+       int ret;
+
+       info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
+       if (!info->timer) {
+               pr_debug("couldn't alloc timer\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
+       if (!info->timer->attr.attr.name) {
+               pr_debug("couldn't alloc attribute name\n");
+               ret = -ENOMEM;
+               goto out_free_timer;
+       }
+       info->timer->attr.attr.mode = S_IRUGO;
+       info->timer->attr.show = idletimer_tg_show;
+
+       ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
+       if (ret < 0) {
+               pr_debug("couldn't add file to sysfs");
+               goto out_free_attr;
+       }
+
+       list_add(&info->timer->entry, &idletimer_tg_list);
+
+       setup_timer(&info->timer->timer, idletimer_tg_expired,
+                   (unsigned long) info->timer);
+       info->timer->refcnt = 1;
+
+       mod_timer(&info->timer->timer,
+                 msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+       INIT_WORK(&info->timer->work, idletimer_tg_work);
+
+       return 0;
+
+out_free_attr:
+       kfree(info->timer->attr.attr.name);
+out_free_timer:
+       kfree(info->timer);
+out:
+       return ret;
+}
+
+/*
+ * The actual xt_tables plugin.
+ */
+static unsigned int idletimer_tg_target(struct sk_buff *skb,
+                                        const struct xt_action_param *par)
+{
+       const struct idletimer_tg_info *info = par->targinfo;
+
+       pr_debug("resetting timer %s, timeout period %u\n",
+                info->label, info->timeout);
+
+       BUG_ON(!info->timer);
+
+       mod_timer(&info->timer->timer,
+                 msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+       return XT_CONTINUE;
+}
+
+static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
+{
+       struct idletimer_tg_info *info = par->targinfo;
+       int ret;
+
+       pr_debug("checkentry targinfo%s\n", info->label);
+
+       if (info->timeout == 0) {
+               pr_debug("timeout value is zero\n");
+               return -EINVAL;
+       }
+
+       if (info->label[0] == '\0' ||
+           strnlen(info->label,
+                   MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
+               pr_debug("label is empty or not nul-terminated\n");
+               return -EINVAL;
+       }
+
+       mutex_lock(&list_mutex);
+
+       info->timer = __idletimer_tg_find_by_label(info->label);
+       if (info->timer) {
+               info->timer->refcnt++;
+               mod_timer(&info->timer->timer,
+                         msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+               pr_debug("increased refcnt of timer %s to %u\n",
+                        info->label, info->timer->refcnt);
+       } else {
+               ret = idletimer_tg_create(info);
+               if (ret < 0) {
+                       pr_debug("failed to create timer\n");
+                       mutex_unlock(&list_mutex);
+                       return ret;
+               }
+       }
+
+       mutex_unlock(&list_mutex);
+       return 0;
+}
+
+static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
+{
+       const struct idletimer_tg_info *info = par->targinfo;
+
+       pr_debug("destroy targinfo %s\n", info->label);
+
+       mutex_lock(&list_mutex);
+
+       if (--info->timer->refcnt == 0) {
+               pr_debug("deleting timer %s\n", info->label);
+
+               list_del(&info->timer->entry);
+               del_timer_sync(&info->timer->timer);
+               sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+               kfree(info->timer->attr.attr.name);
+               kfree(info->timer);
+       } else {
+               pr_debug("decreased refcnt of timer %s to %u\n",
+                        info->label, info->timer->refcnt);
+       }
+
+       mutex_unlock(&list_mutex);
+}
+
+static struct xt_target idletimer_tg __read_mostly = {
+       .name           = "IDLETIMER",
+       .family         = NFPROTO_UNSPEC,
+       .target         = idletimer_tg_target,
+       .targetsize     = sizeof(struct idletimer_tg_info),
+       .checkentry     = idletimer_tg_checkentry,
+       .destroy        = idletimer_tg_destroy,
+       .me             = THIS_MODULE,
+};
+
+static struct class *idletimer_tg_class;
+
+static struct device *idletimer_tg_device;
+
+static int __init idletimer_tg_init(void)
+{
+       int err;
+
+       idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer");
+       err = PTR_ERR(idletimer_tg_class);
+       if (IS_ERR(idletimer_tg_class)) {
+               pr_debug("couldn't register device class\n");
+               goto out;
+       }
+
+       idletimer_tg_device = device_create(idletimer_tg_class, NULL,
+                                           MKDEV(0, 0), NULL, "timers");
+       err = PTR_ERR(idletimer_tg_device);
+       if (IS_ERR(idletimer_tg_device)) {
+               pr_debug("couldn't register system device\n");
+               goto out_class;
+       }
+
+       idletimer_tg_kobj = &idletimer_tg_device->kobj;
+
+       err =  xt_register_target(&idletimer_tg);
+       if (err < 0) {
+               pr_debug("couldn't register xt target\n");
+               goto out_dev;
+       }
+
+       return 0;
+out_dev:
+       device_destroy(idletimer_tg_class, MKDEV(0, 0));
+out_class:
+       class_destroy(idletimer_tg_class);
+out:
+       return err;
+}
+
+static void __exit idletimer_tg_exit(void)
+{
+       xt_unregister_target(&idletimer_tg);
+
+       device_destroy(idletimer_tg_class, MKDEV(0, 0));
+       class_destroy(idletimer_tg_class);
+}
+
+module_init(idletimer_tg_init);
+module_exit(idletimer_tg_exit);
+
+MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
+MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
+MODULE_DESCRIPTION("Xtables: idle time monitor");
+MODULE_LICENSE("GPL v2");
index 512b9123252f0a0caa625a0a43cead965865864c..9d782181b6c8993236cec126c131949cbd3ee0a2 100644 (file)
@@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
           If there is a real ct entry correspondig to this packet,
           it'll hang aroun till timing out. We don't deal with it
           for performance reasons. JK */
-       skb->nfct = &nf_conntrack_untracked.ct_general;
+       skb->nfct = &nf_ct_untracked_get()->ct_general;
        skb->nfctinfo = IP_CT_NEW;
        nf_conntrack_get(skb->nfct);
 
index c77a85bbd9ebc4b6bd10e95ebd19c663ab2ee9ef..22a2d421e7ebc5172761d51caea4298d47d7ebfb 100644 (file)
@@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 #ifdef WITH_CONNTRACK
        /* Avoid counting cloned packets towards the original connection. */
        nf_conntrack_put(skb->nfct);
-       skb->nfct     = &nf_conntrack_untracked.ct_general;
+       skb->nfct     = &nf_ct_untracked_get()->ct_general;
        skb->nfctinfo = IP_CT_NEW;
        nf_conntrack_get(skb->nfct);
 #endif
@@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 #ifdef WITH_CONNTRACK
        nf_conntrack_put(skb->nfct);
-       skb->nfct     = &nf_conntrack_untracked.ct_general;
+       skb->nfct     = &nf_ct_untracked_get()->ct_general;
        skb->nfctinfo = IP_CT_NEW;
        nf_conntrack_get(skb->nfct);
 #endif
index 30b95a1c1c892da3018bbf9084e4fe3a8d59b4b1..f4af1bfafb1c61642ddb56ac490f7e161df5d39d 100644 (file)
@@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
        if (ct == NULL)
                return false;
 
-       if (ct == &nf_conntrack_untracked)
+       if (nf_ct_is_untracked(ct))
                return false;
 
        if (ct->master)
index 39681f10291c13a8a72efbfebad857ca10a7d750..e536710ad916246a4f558eaad02092d4c81168df 100644 (file)
@@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
 
        ct = nf_ct_get(skb, &ctinfo);
 
-       if (ct == &nf_conntrack_untracked)
-               statebit = XT_CONNTRACK_STATE_UNTRACKED;
-       else if (ct != NULL)
-               statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
-       else
+       if (ct) {
+               if (nf_ct_is_untracked(ct))
+                       statebit = XT_CONNTRACK_STATE_UNTRACKED;
+               else
+                       statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+       } else
                statebit = XT_CONNTRACK_STATE_INVALID;
 
        if (info->match_flags & XT_CONNTRACK_STATE) {
index c04fcf385c591875ec45f2f50812b9c564657869..ef36a56a02c6881c58296b2bf45c4b99d3836456 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/sctp/sctp.h>
 #include <linux/sctp.h>
 
 #include <linux/netfilter/x_tables.h>
@@ -67,7 +68,7 @@ match_packet(const struct sk_buff *skb,
                         ++i, offset, sch->type, htons(sch->length),
                         sch->flags);
 #endif
-               offset += (ntohs(sch->length) + 3) & ~3;
+               offset += WORD_ROUND(ntohs(sch->length));
 
                pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
 
index 3d54c236a1ba0643ba0077eee8caf4f93078155f..1ca89908cbad84d2dc45093aa474cca056f239e3 100644 (file)
@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
         * reply packet of an established SNAT-ted connection. */
 
        ct = nf_ct_get(skb, &ctinfo);
-       if (ct && (ct != &nf_conntrack_untracked) &&
+       if (ct && !nf_ct_is_untracked(ct) &&
            ((iph->protocol != IPPROTO_ICMP &&
              ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
             (iph->protocol == IPPROTO_ICMP &&
index e12e053d3782ba8f9252a5b5b8a2d3085b2345c3..a507922d80cdc2854a141acde78e5410bf918a48 100644 (file)
@@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par)
        const struct xt_state_info *sinfo = par->matchinfo;
        enum ip_conntrack_info ctinfo;
        unsigned int statebit;
+       struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
-       if (nf_ct_is_untracked(skb))
-               statebit = XT_STATE_UNTRACKED;
-       else if (!nf_ct_get(skb, &ctinfo))
+       if (!ct)
                statebit = XT_STATE_INVALID;
-       else
-               statebit = XT_STATE_BIT(ctinfo);
-
+       else {
+               if (nf_ct_is_untracked(ct))
+                       statebit = XT_STATE_UNTRACKED;
+               else
+                       statebit = XT_STATE_BIT(ctinfo);
+       }
        return (sinfo->statemask & statebit);
 }
 
index 96e62b8fd6b10d063f7bdc4cc2ee0ba28ed9991a..42ecb71d445fe6d009ac252d1b030cd86befcfb1 100644 (file)
@@ -18,8 +18,8 @@
 #include <linux/netfilter/x_tables.h>
 
 struct xt_statistic_priv {
-       uint32_t count;
-};
+       atomic_t count;
+} ____cacheline_aligned_in_smp;
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
@@ -27,13 +27,12 @@ MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
 MODULE_ALIAS("ipt_statistic");
 MODULE_ALIAS("ip6t_statistic");
 
-static DEFINE_SPINLOCK(nth_lock);
-
 static bool
 statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
        const struct xt_statistic_info *info = par->matchinfo;
        bool ret = info->flags & XT_STATISTIC_INVERT;
+       int nval, oval;
 
        switch (info->mode) {
        case XT_STATISTIC_MODE_RANDOM:
@@ -41,12 +40,12 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
                        ret = !ret;
                break;
        case XT_STATISTIC_MODE_NTH:
-               spin_lock_bh(&nth_lock);
-               if (info->master->count++ == info->u.nth.every) {
-                       info->master->count = 0;
+               do {
+                       oval = atomic_read(&info->master->count);
+                       nval = (oval == info->u.nth.every) ? 0 : oval + 1;
+               } while (atomic_cmpxchg(&info->master->count, oval, nval) != oval);
+               if (nval == 0)
                        ret = !ret;
-               }
-               spin_unlock_bh(&nth_lock);
                break;
        }
 
@@ -64,7 +63,7 @@ static int statistic_mt_check(const struct xt_mtchk_param *par)
        info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
        if (info->master == NULL)
                return -ENOMEM;
-       info->master->count = info->u.nth.count;
+       atomic_set(&info->master->count, info->u.nth.count);
 
        return 0;
 }