]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/netfilter/ip_tables.c
f92818f766710da455c9dd1e0b7ab0a1c2520dc6
[net-next-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 #include <linux/cache.h>
13 #include <linux/capability.h>
14 #include <linux/skbuff.h>
15 #include <linux/kmod.h>
16 #include <linux/vmalloc.h>
17 #include <linux/netdevice.h>
18 #include <linux/module.h>
19 #include <linux/icmp.h>
20 #include <net/ip.h>
21 #include <net/compat.h>
22 #include <asm/uaccess.h>
23 #include <linux/mutex.h>
24 #include <linux/proc_fs.h>
25 #include <linux/err.h>
26 #include <linux/cpumask.h>
27
28 #include <linux/netfilter/x_tables.h>
29 #include <linux/netfilter_ipv4/ip_tables.h>
30 #include <net/netfilter/nf_log.h>
31 #include "../../netfilter/xt_repldata.h"
32
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
35 MODULE_DESCRIPTION("IPv4 packet filter");
36
37 /*#define DEBUG_IP_FIREWALL*/
38 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39 /*#define DEBUG_IP_FIREWALL_USER*/
40
41 #ifdef DEBUG_IP_FIREWALL
42 #define dprintf(format, args...) pr_info(format , ## args)
43 #else
44 #define dprintf(format, args...)
45 #endif
46
47 #ifdef DEBUG_IP_FIREWALL_USER
48 #define duprintf(format, args...) pr_info(format , ## args)
49 #else
50 #define duprintf(format, args...)
51 #endif
52
53 #ifdef CONFIG_NETFILTER_DEBUG
54 #define IP_NF_ASSERT(x)                                         \
55 do {                                                            \
56         if (!(x))                                               \
57                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
58                        __func__, __FILE__, __LINE__);   \
59 } while(0)
60 #else
61 #define IP_NF_ASSERT(x)
62 #endif
63
64 #if 0
65 /* All the better to debug you with... */
66 #define static
67 #define inline
68 #endif
69
70 void *ipt_alloc_initial_table(const struct xt_table *info)
71 {
72         return xt_alloc_initial_table(ipt, IPT);
73 }
74 EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
75
76 /*
77    We keep a set of rules for each CPU, so we can avoid write-locking
78    them in the softirq when updating the counters and therefore
79    only need to read-lock in the softirq; doing a write_lock_bh() in user
80    context stops packets coming through and allows user context to read
81    the counters or update the rules.
82
83    Hence the start of any table is given by get_table() below.  */
84
85 /* Returns whether matches rule or not. */
86 /* Performance critical - called for every packet */
87 static inline bool
88 ip_packet_match(const struct iphdr *ip,
89                 const char *indev,
90                 const char *outdev,
91                 const struct ipt_ip *ipinfo,
92                 int isfrag)
93 {
94         unsigned long ret;
95
96 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
97
98         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
99                   IPT_INV_SRCIP) ||
100             FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
101                   IPT_INV_DSTIP)) {
102                 dprintf("Source or dest mismatch.\n");
103
104                 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
105                         &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
106                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
107                 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
108                         &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
109                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
110                 return false;
111         }
112
113         ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
114
115         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
116                 dprintf("VIA in mismatch (%s vs %s).%s\n",
117                         indev, ipinfo->iniface,
118                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
119                 return false;
120         }
121
122         ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
123
124         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
125                 dprintf("VIA out mismatch (%s vs %s).%s\n",
126                         outdev, ipinfo->outiface,
127                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
128                 return false;
129         }
130
131         /* Check specific protocol */
132         if (ipinfo->proto &&
133             FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
134                 dprintf("Packet protocol %hi does not match %hi.%s\n",
135                         ip->protocol, ipinfo->proto,
136                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
137                 return false;
138         }
139
140         /* If we have a fragment rule but the packet is not a fragment
141          * then we return zero */
142         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
143                 dprintf("Fragment rule but not fragment.%s\n",
144                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
145                 return false;
146         }
147
148         return true;
149 }
150
151 static bool
152 ip_checkentry(const struct ipt_ip *ip)
153 {
154         if (ip->flags & ~IPT_F_MASK) {
155                 duprintf("Unknown flag bits set: %08X\n",
156                          ip->flags & ~IPT_F_MASK);
157                 return false;
158         }
159         if (ip->invflags & ~IPT_INV_MASK) {
160                 duprintf("Unknown invflag bits set: %08X\n",
161                          ip->invflags & ~IPT_INV_MASK);
162                 return false;
163         }
164         return true;
165 }
166
167 static unsigned int
168 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
169 {
170         if (net_ratelimit())
171                 pr_info("error: `%s'\n", (const char *)par->targinfo);
172
173         return NF_DROP;
174 }
175
176 /* Performance critical - called for every packet */
177 static inline bool
178 do_match(const struct ipt_entry_match *m, const struct sk_buff *skb,
179          struct xt_match_param *par)
180 {
181         par->match     = m->u.kernel.match;
182         par->matchinfo = m->data;
183
184         /* Stop iteration if it doesn't match */
185         if (!m->u.kernel.match->match(skb, par))
186                 return true;
187         else
188                 return false;
189 }
190
191 /* Performance critical */
192 static inline struct ipt_entry *
193 get_entry(const void *base, unsigned int offset)
194 {
195         return (struct ipt_entry *)(base + offset);
196 }
197
198 /* All zeroes == unconditional rule. */
199 /* Mildly perf critical (only if packet tracing is on) */
200 static inline bool unconditional(const struct ipt_ip *ip)
201 {
202         static const struct ipt_ip uncond;
203
204         return memcmp(ip, &uncond, sizeof(uncond)) == 0;
205 #undef FWINV
206 }
207
208 /* for const-correctness */
209 static inline const struct ipt_entry_target *
210 ipt_get_target_c(const struct ipt_entry *e)
211 {
212         return ipt_get_target((struct ipt_entry *)e);
213 }
214
215 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
216     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
217 static const char *const hooknames[] = {
218         [NF_INET_PRE_ROUTING]           = "PREROUTING",
219         [NF_INET_LOCAL_IN]              = "INPUT",
220         [NF_INET_FORWARD]               = "FORWARD",
221         [NF_INET_LOCAL_OUT]             = "OUTPUT",
222         [NF_INET_POST_ROUTING]          = "POSTROUTING",
223 };
224
225 enum nf_ip_trace_comments {
226         NF_IP_TRACE_COMMENT_RULE,
227         NF_IP_TRACE_COMMENT_RETURN,
228         NF_IP_TRACE_COMMENT_POLICY,
229 };
230
231 static const char *const comments[] = {
232         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
233         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
234         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
235 };
236
237 static struct nf_loginfo trace_loginfo = {
238         .type = NF_LOG_TYPE_LOG,
239         .u = {
240                 .log = {
241                         .level = 4,
242                         .logflags = NF_LOG_MASK,
243                 },
244         },
245 };
246
247 /* Mildly perf critical (only if packet tracing is on) */
248 static inline int
249 get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
250                       const char *hookname, const char **chainname,
251                       const char **comment, unsigned int *rulenum)
252 {
253         const struct ipt_standard_target *t = (void *)ipt_get_target_c(s);
254
255         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
256                 /* Head of user chain: ERROR target with chainname */
257                 *chainname = t->target.data;
258                 (*rulenum) = 0;
259         } else if (s == e) {
260                 (*rulenum)++;
261
262                 if (s->target_offset == sizeof(struct ipt_entry) &&
263                     strcmp(t->target.u.kernel.target->name,
264                            IPT_STANDARD_TARGET) == 0 &&
265                    t->verdict < 0 &&
266                    unconditional(&s->ip)) {
267                         /* Tail of chains: STANDARD target (return/policy) */
268                         *comment = *chainname == hookname
269                                 ? comments[NF_IP_TRACE_COMMENT_POLICY]
270                                 : comments[NF_IP_TRACE_COMMENT_RETURN];
271                 }
272                 return 1;
273         } else
274                 (*rulenum)++;
275
276         return 0;
277 }
278
279 static void trace_packet(const struct sk_buff *skb,
280                          unsigned int hook,
281                          const struct net_device *in,
282                          const struct net_device *out,
283                          const char *tablename,
284                          const struct xt_table_info *private,
285                          const struct ipt_entry *e)
286 {
287         const void *table_base;
288         const struct ipt_entry *root;
289         const char *hookname, *chainname, *comment;
290         const struct ipt_entry *iter;
291         unsigned int rulenum = 0;
292
293         table_base = private->entries[smp_processor_id()];
294         root = get_entry(table_base, private->hook_entry[hook]);
295
296         hookname = chainname = hooknames[hook];
297         comment = comments[NF_IP_TRACE_COMMENT_RULE];
298
299         xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
300                 if (get_chainname_rulenum(iter, e, hookname,
301                     &chainname, &comment, &rulenum) != 0)
302                         break;
303
304         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
305                       "TRACE: %s:%s:%s:%u ",
306                       tablename, chainname, comment, rulenum);
307 }
308 #endif
309
310 static inline __pure
311 struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
312 {
313         return (void *)entry + entry->next_offset;
314 }
315
316 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
317 unsigned int
318 ipt_do_table(struct sk_buff *skb,
319              unsigned int hook,
320              const struct net_device *in,
321              const struct net_device *out,
322              struct xt_table *table)
323 {
324         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
325         const struct iphdr *ip;
326         bool hotdrop = false;
327         /* Initializing verdict to NF_DROP keeps gcc happy. */
328         unsigned int verdict = NF_DROP;
329         const char *indev, *outdev;
330         const void *table_base;
331         struct ipt_entry *e, **jumpstack;
332         unsigned int *stackptr, origptr, cpu;
333         const struct xt_table_info *private;
334         struct xt_match_param mtpar;
335         struct xt_target_param tgpar;
336
337         /* Initialization */
338         ip = ip_hdr(skb);
339         indev = in ? in->name : nulldevname;
340         outdev = out ? out->name : nulldevname;
341         /* We handle fragments by dealing with the first fragment as
342          * if it was a normal packet.  All other fragments are treated
343          * normally, except that they will NEVER match rules that ask
344          * things we don't know, ie. tcp syn flag or ports).  If the
345          * rule is also a fragment-specific rule, non-fragments won't
346          * match it. */
347         mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
348         mtpar.thoff   = ip_hdrlen(skb);
349         mtpar.hotdrop = &hotdrop;
350         mtpar.in      = tgpar.in  = in;
351         mtpar.out     = tgpar.out = out;
352         mtpar.family  = tgpar.family = NFPROTO_IPV4;
353         mtpar.hooknum = tgpar.hooknum = hook;
354
355         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
356         xt_info_rdlock_bh();
357         private = table->private;
358         cpu        = smp_processor_id();
359         table_base = private->entries[cpu];
360         jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
361         stackptr   = &private->stackptr[cpu];
362         origptr    = *stackptr;
363
364         e = get_entry(table_base, private->hook_entry[hook]);
365
366         pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
367                  table->name, hook, origptr,
368                  get_entry(table_base, private->underflow[hook]));
369
370         do {
371                 const struct ipt_entry_target *t;
372                 const struct xt_entry_match *ematch;
373
374                 IP_NF_ASSERT(e);
375                 if (!ip_packet_match(ip, indev, outdev,
376                     &e->ip, mtpar.fragoff)) {
377  no_match:
378                         e = ipt_next_entry(e);
379                         continue;
380                 }
381
382                 xt_ematch_foreach(ematch, e)
383                         if (do_match(ematch, skb, &mtpar) != 0)
384                                 goto no_match;
385
386                 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
387
388                 t = ipt_get_target(e);
389                 IP_NF_ASSERT(t->u.kernel.target);
390
391 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
392     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
393                 /* The packet is traced: log it */
394                 if (unlikely(skb->nf_trace))
395                         trace_packet(skb, hook, in, out,
396                                      table->name, private, e);
397 #endif
398                 /* Standard target? */
399                 if (!t->u.kernel.target->target) {
400                         int v;
401
402                         v = ((struct ipt_standard_target *)t)->verdict;
403                         if (v < 0) {
404                                 /* Pop from stack? */
405                                 if (v != IPT_RETURN) {
406                                         verdict = (unsigned)(-v) - 1;
407                                         break;
408                                 }
409                                 if (*stackptr == 0) {
410                                         e = get_entry(table_base,
411                                             private->underflow[hook]);
412                                         pr_debug("Underflow (this is normal) "
413                                                  "to %p\n", e);
414                                 } else {
415                                         e = jumpstack[--*stackptr];
416                                         pr_debug("Pulled %p out from pos %u\n",
417                                                  e, *stackptr);
418                                         e = ipt_next_entry(e);
419                                 }
420                                 continue;
421                         }
422                         if (table_base + v != ipt_next_entry(e) &&
423                             !(e->ip.flags & IPT_F_GOTO)) {
424                                 if (*stackptr >= private->stacksize) {
425                                         verdict = NF_DROP;
426                                         break;
427                                 }
428                                 jumpstack[(*stackptr)++] = e;
429                                 pr_debug("Pushed %p into pos %u\n",
430                                          e, *stackptr - 1);
431                         }
432
433                         e = get_entry(table_base, v);
434                         continue;
435                 }
436
437                 tgpar.target   = t->u.kernel.target;
438                 tgpar.targinfo = t->data;
439
440
441                 verdict = t->u.kernel.target->target(skb, &tgpar);
442                 /* Target might have changed stuff. */
443                 ip = ip_hdr(skb);
444                 if (verdict == IPT_CONTINUE)
445                         e = ipt_next_entry(e);
446                 else
447                         /* Verdict */
448                         break;
449         } while (!hotdrop);
450         xt_info_rdunlock_bh();
451         pr_debug("Exiting %s; resetting sp from %u to %u\n",
452                  __func__, *stackptr, origptr);
453         *stackptr = origptr;
454 #ifdef DEBUG_ALLOW_ALL
455         return NF_ACCEPT;
456 #else
457         if (hotdrop)
458                 return NF_DROP;
459         else return verdict;
460 #endif
461 }
462
463 /* Figures out from what hook each rule can be called: returns 0 if
464    there are loops.  Puts hook bitmask in comefrom. */
465 static int
466 mark_source_chains(const struct xt_table_info *newinfo,
467                    unsigned int valid_hooks, void *entry0)
468 {
469         unsigned int hook;
470
471         /* No recursion; use packet counter to save back ptrs (reset
472            to 0 as we leave), and comefrom to save source hook bitmask */
473         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
474                 unsigned int pos = newinfo->hook_entry[hook];
475                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
476
477                 if (!(valid_hooks & (1 << hook)))
478                         continue;
479
480                 /* Set initial back pointer. */
481                 e->counters.pcnt = pos;
482
483                 for (;;) {
484                         const struct ipt_standard_target *t
485                                 = (void *)ipt_get_target_c(e);
486                         int visited = e->comefrom & (1 << hook);
487
488                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
489                                 printk("iptables: loop hook %u pos %u %08X.\n",
490                                        hook, pos, e->comefrom);
491                                 return 0;
492                         }
493                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
494
495                         /* Unconditional return/END. */
496                         if ((e->target_offset == sizeof(struct ipt_entry) &&
497                              (strcmp(t->target.u.user.name,
498                                      IPT_STANDARD_TARGET) == 0) &&
499                              t->verdict < 0 && unconditional(&e->ip)) ||
500                             visited) {
501                                 unsigned int oldpos, size;
502
503                                 if ((strcmp(t->target.u.user.name,
504                                             IPT_STANDARD_TARGET) == 0) &&
505                                     t->verdict < -NF_MAX_VERDICT - 1) {
506                                         duprintf("mark_source_chains: bad "
507                                                 "negative verdict (%i)\n",
508                                                                 t->verdict);
509                                         return 0;
510                                 }
511
512                                 /* Return: backtrack through the last
513                                    big jump. */
514                                 do {
515                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
516 #ifdef DEBUG_IP_FIREWALL_USER
517                                         if (e->comefrom
518                                             & (1 << NF_INET_NUMHOOKS)) {
519                                                 duprintf("Back unset "
520                                                          "on hook %u "
521                                                          "rule %u\n",
522                                                          hook, pos);
523                                         }
524 #endif
525                                         oldpos = pos;
526                                         pos = e->counters.pcnt;
527                                         e->counters.pcnt = 0;
528
529                                         /* We're at the start. */
530                                         if (pos == oldpos)
531                                                 goto next;
532
533                                         e = (struct ipt_entry *)
534                                                 (entry0 + pos);
535                                 } while (oldpos == pos + e->next_offset);
536
537                                 /* Move along one */
538                                 size = e->next_offset;
539                                 e = (struct ipt_entry *)
540                                         (entry0 + pos + size);
541                                 e->counters.pcnt = pos;
542                                 pos += size;
543                         } else {
544                                 int newpos = t->verdict;
545
546                                 if (strcmp(t->target.u.user.name,
547                                            IPT_STANDARD_TARGET) == 0 &&
548                                     newpos >= 0) {
549                                         if (newpos > newinfo->size -
550                                                 sizeof(struct ipt_entry)) {
551                                                 duprintf("mark_source_chains: "
552                                                         "bad verdict (%i)\n",
553                                                                 newpos);
554                                                 return 0;
555                                         }
556                                         /* This a jump; chase it. */
557                                         duprintf("Jump rule %u -> %u\n",
558                                                  pos, newpos);
559                                 } else {
560                                         /* ... this is a fallthru */
561                                         newpos = pos + e->next_offset;
562                                 }
563                                 e = (struct ipt_entry *)
564                                         (entry0 + newpos);
565                                 e->counters.pcnt = pos;
566                                 pos = newpos;
567                         }
568                 }
569                 next:
570                 duprintf("Finished chain %u\n", hook);
571         }
572         return 1;
573 }
574
575 static void cleanup_match(struct ipt_entry_match *m, struct net *net)
576 {
577         struct xt_mtdtor_param par;
578
579         par.net       = net;
580         par.match     = m->u.kernel.match;
581         par.matchinfo = m->data;
582         par.family    = NFPROTO_IPV4;
583         if (par.match->destroy != NULL)
584                 par.match->destroy(&par);
585         module_put(par.match->me);
586 }
587
588 static int
589 check_entry(const struct ipt_entry *e, const char *name)
590 {
591         const struct ipt_entry_target *t;
592
593         if (!ip_checkentry(&e->ip)) {
594                 duprintf("ip check failed %p %s.\n", e, par->match->name);
595                 return -EINVAL;
596         }
597
598         if (e->target_offset + sizeof(struct ipt_entry_target) >
599             e->next_offset)
600                 return -EINVAL;
601
602         t = ipt_get_target_c(e);
603         if (e->target_offset + t->u.target_size > e->next_offset)
604                 return -EINVAL;
605
606         return 0;
607 }
608
609 static int
610 check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
611 {
612         const struct ipt_ip *ip = par->entryinfo;
613         int ret;
614
615         par->match     = m->u.kernel.match;
616         par->matchinfo = m->data;
617
618         ret = xt_check_match(par, m->u.match_size - sizeof(*m),
619               ip->proto, ip->invflags & IPT_INV_PROTO);
620         if (ret < 0) {
621                 duprintf("check failed for `%s'.\n", par->match->name);
622                 return ret;
623         }
624         return 0;
625 }
626
627 static int
628 find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
629 {
630         struct xt_match *match;
631         int ret;
632
633         match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
634                                       m->u.user.revision);
635         if (IS_ERR(match)) {
636                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
637                 return PTR_ERR(match);
638         }
639         m->u.kernel.match = match;
640
641         ret = check_match(m, par);
642         if (ret)
643                 goto err;
644
645         return 0;
646 err:
647         module_put(m->u.kernel.match->me);
648         return ret;
649 }
650
651 static int check_target(struct ipt_entry *e, struct net *net, const char *name)
652 {
653         struct ipt_entry_target *t = ipt_get_target(e);
654         struct xt_tgchk_param par = {
655                 .net       = net,
656                 .table     = name,
657                 .entryinfo = e,
658                 .target    = t->u.kernel.target,
659                 .targinfo  = t->data,
660                 .hook_mask = e->comefrom,
661                 .family    = NFPROTO_IPV4,
662         };
663         int ret;
664
665         ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
666               e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
667         if (ret < 0) {
668                 duprintf("check failed for `%s'.\n",
669                          t->u.kernel.target->name);
670                 return ret;
671         }
672         return 0;
673 }
674
675 static int
676 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
677                  unsigned int size)
678 {
679         struct ipt_entry_target *t;
680         struct xt_target *target;
681         int ret;
682         unsigned int j;
683         struct xt_mtchk_param mtpar;
684         struct xt_entry_match *ematch;
685
686         ret = check_entry(e, name);
687         if (ret)
688                 return ret;
689
690         j = 0;
691         mtpar.net       = net;
692         mtpar.table     = name;
693         mtpar.entryinfo = &e->ip;
694         mtpar.hook_mask = e->comefrom;
695         mtpar.family    = NFPROTO_IPV4;
696         xt_ematch_foreach(ematch, e) {
697                 ret = find_check_match(ematch, &mtpar);
698                 if (ret != 0)
699                         goto cleanup_matches;
700                 ++j;
701         }
702
703         t = ipt_get_target(e);
704         target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
705                                         t->u.user.revision);
706         if (IS_ERR(target)) {
707                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
708                 ret = PTR_ERR(target);
709                 goto cleanup_matches;
710         }
711         t->u.kernel.target = target;
712
713         ret = check_target(e, net, name);
714         if (ret)
715                 goto err;
716         return 0;
717  err:
718         module_put(t->u.kernel.target->me);
719  cleanup_matches:
720         xt_ematch_foreach(ematch, e) {
721                 if (j-- == 0)
722                         break;
723                 cleanup_match(ematch, net);
724         }
725         return ret;
726 }
727
728 static bool check_underflow(const struct ipt_entry *e)
729 {
730         const struct ipt_entry_target *t;
731         unsigned int verdict;
732
733         if (!unconditional(&e->ip))
734                 return false;
735         t = ipt_get_target_c(e);
736         if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
737                 return false;
738         verdict = ((struct ipt_standard_target *)t)->verdict;
739         verdict = -verdict - 1;
740         return verdict == NF_DROP || verdict == NF_ACCEPT;
741 }
742
743 static int
744 check_entry_size_and_hooks(struct ipt_entry *e,
745                            struct xt_table_info *newinfo,
746                            const unsigned char *base,
747                            const unsigned char *limit,
748                            const unsigned int *hook_entries,
749                            const unsigned int *underflows,
750                            unsigned int valid_hooks)
751 {
752         unsigned int h;
753
754         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
755             (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
756                 duprintf("Bad offset %p\n", e);
757                 return -EINVAL;
758         }
759
760         if (e->next_offset
761             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
762                 duprintf("checking: element %p size %u\n",
763                          e, e->next_offset);
764                 return -EINVAL;
765         }
766
767         /* Check hooks & underflows */
768         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
769                 if (!(valid_hooks & (1 << h)))
770                         continue;
771                 if ((unsigned char *)e - base == hook_entries[h])
772                         newinfo->hook_entry[h] = hook_entries[h];
773                 if ((unsigned char *)e - base == underflows[h]) {
774                         if (!check_underflow(e)) {
775                                 pr_err("Underflows must be unconditional and "
776                                        "use the STANDARD target with "
777                                        "ACCEPT/DROP\n");
778                                 return -EINVAL;
779                         }
780                         newinfo->underflow[h] = underflows[h];
781                 }
782         }
783
784         /* Clear counters and comefrom */
785         e->counters = ((struct xt_counters) { 0, 0 });
786         e->comefrom = 0;
787         return 0;
788 }
789
790 static void
791 cleanup_entry(struct ipt_entry *e, struct net *net)
792 {
793         struct xt_tgdtor_param par;
794         struct ipt_entry_target *t;
795         struct xt_entry_match *ematch;
796
797         /* Cleanup all matches */
798         xt_ematch_foreach(ematch, e)
799                 cleanup_match(ematch, net);
800         t = ipt_get_target(e);
801
802         par.net      = net;
803         par.target   = t->u.kernel.target;
804         par.targinfo = t->data;
805         par.family   = NFPROTO_IPV4;
806         if (par.target->destroy != NULL)
807                 par.target->destroy(&par);
808         module_put(par.target->me);
809 }
810
811 /* Checks and translates the user-supplied table segment (held in
812    newinfo) */
813 static int
814 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
815                 const struct ipt_replace *repl)
816 {
817         struct ipt_entry *iter;
818         unsigned int i;
819         int ret = 0;
820
821         newinfo->size = repl->size;
822         newinfo->number = repl->num_entries;
823
824         /* Init all hooks to impossible value. */
825         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
826                 newinfo->hook_entry[i] = 0xFFFFFFFF;
827                 newinfo->underflow[i] = 0xFFFFFFFF;
828         }
829
830         duprintf("translate_table: size %u\n", newinfo->size);
831         i = 0;
832         /* Walk through entries, checking offsets. */
833         xt_entry_foreach(iter, entry0, newinfo->size) {
834                 ret = check_entry_size_and_hooks(iter, newinfo, entry0,
835                                                  entry0 + repl->size,
836                                                  repl->hook_entry,
837                                                  repl->underflow,
838                                                  repl->valid_hooks);
839                 if (ret != 0)
840                         return ret;
841                 ++i;
842                 if (strcmp(ipt_get_target(iter)->u.user.name,
843                     XT_ERROR_TARGET) == 0)
844                         ++newinfo->stacksize;
845         }
846
847         if (i != repl->num_entries) {
848                 duprintf("translate_table: %u not %u entries\n",
849                          i, repl->num_entries);
850                 return -EINVAL;
851         }
852
853         /* Check hooks all assigned */
854         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
855                 /* Only hooks which are valid */
856                 if (!(repl->valid_hooks & (1 << i)))
857                         continue;
858                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
859                         duprintf("Invalid hook entry %u %u\n",
860                                  i, repl->hook_entry[i]);
861                         return -EINVAL;
862                 }
863                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
864                         duprintf("Invalid underflow %u %u\n",
865                                  i, repl->underflow[i]);
866                         return -EINVAL;
867                 }
868         }
869
870         if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
871                 return -ELOOP;
872
873         /* Finally, each sanity check must pass */
874         i = 0;
875         xt_entry_foreach(iter, entry0, newinfo->size) {
876                 ret = find_check_entry(iter, net, repl->name, repl->size);
877                 if (ret != 0)
878                         break;
879                 ++i;
880         }
881
882         if (ret != 0) {
883                 xt_entry_foreach(iter, entry0, newinfo->size) {
884                         if (i-- == 0)
885                                 break;
886                         cleanup_entry(iter, net);
887                 }
888                 return ret;
889         }
890
891         /* And one copy for every other CPU */
892         for_each_possible_cpu(i) {
893                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
894                         memcpy(newinfo->entries[i], entry0, newinfo->size);
895         }
896
897         return ret;
898 }
899
900 static void
901 get_counters(const struct xt_table_info *t,
902              struct xt_counters counters[])
903 {
904         struct ipt_entry *iter;
905         unsigned int cpu;
906         unsigned int i;
907         unsigned int curcpu;
908
909         /* Instead of clearing (by a previous call to memset())
910          * the counters and using adds, we set the counters
911          * with data used by 'current' CPU.
912          *
913          * Bottom half has to be disabled to prevent deadlock
914          * if new softirq were to run and call ipt_do_table
915          */
916         local_bh_disable();
917         curcpu = smp_processor_id();
918
919         i = 0;
920         xt_entry_foreach(iter, t->entries[curcpu], t->size) {
921                 SET_COUNTER(counters[i], iter->counters.bcnt,
922                             iter->counters.pcnt);
923                 ++i;
924         }
925
926         for_each_possible_cpu(cpu) {
927                 if (cpu == curcpu)
928                         continue;
929                 i = 0;
930                 xt_info_wrlock(cpu);
931                 xt_entry_foreach(iter, t->entries[cpu], t->size) {
932                         ADD_COUNTER(counters[i], iter->counters.bcnt,
933                                     iter->counters.pcnt);
934                         ++i; /* macro does multi eval of i */
935                 }
936                 xt_info_wrunlock(cpu);
937         }
938         local_bh_enable();
939 }
940
941 static struct xt_counters *alloc_counters(const struct xt_table *table)
942 {
943         unsigned int countersize;
944         struct xt_counters *counters;
945         const struct xt_table_info *private = table->private;
946
947         /* We need atomic snapshot of counters: rest doesn't change
948            (other than comefrom, which userspace doesn't care
949            about). */
950         countersize = sizeof(struct xt_counters) * private->number;
951         counters = vmalloc_node(countersize, numa_node_id());
952
953         if (counters == NULL)
954                 return ERR_PTR(-ENOMEM);
955
956         get_counters(private, counters);
957
958         return counters;
959 }
960
961 static int
962 copy_entries_to_user(unsigned int total_size,
963                      const struct xt_table *table,
964                      void __user *userptr)
965 {
966         unsigned int off, num;
967         const struct ipt_entry *e;
968         struct xt_counters *counters;
969         const struct xt_table_info *private = table->private;
970         int ret = 0;
971         const void *loc_cpu_entry;
972
973         counters = alloc_counters(table);
974         if (IS_ERR(counters))
975                 return PTR_ERR(counters);
976
977         /* choose the copy that is on our node/cpu, ...
978          * This choice is lazy (because current thread is
979          * allowed to migrate to another cpu)
980          */
981         loc_cpu_entry = private->entries[raw_smp_processor_id()];
982         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
983                 ret = -EFAULT;
984                 goto free_counters;
985         }
986
987         /* FIXME: use iterator macros --RR */
988         /* ... then go back and fix counters and names */
989         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
990                 unsigned int i;
991                 const struct ipt_entry_match *m;
992                 const struct ipt_entry_target *t;
993
994                 e = (struct ipt_entry *)(loc_cpu_entry + off);
995                 if (copy_to_user(userptr + off
996                                  + offsetof(struct ipt_entry, counters),
997                                  &counters[num],
998                                  sizeof(counters[num])) != 0) {
999                         ret = -EFAULT;
1000                         goto free_counters;
1001                 }
1002
1003                 for (i = sizeof(struct ipt_entry);
1004                      i < e->target_offset;
1005                      i += m->u.match_size) {
1006                         m = (void *)e + i;
1007
1008                         if (copy_to_user(userptr + off + i
1009                                          + offsetof(struct ipt_entry_match,
1010                                                     u.user.name),
1011                                          m->u.kernel.match->name,
1012                                          strlen(m->u.kernel.match->name)+1)
1013                             != 0) {
1014                                 ret = -EFAULT;
1015                                 goto free_counters;
1016                         }
1017                 }
1018
1019                 t = ipt_get_target_c(e);
1020                 if (copy_to_user(userptr + off + e->target_offset
1021                                  + offsetof(struct ipt_entry_target,
1022                                             u.user.name),
1023                                  t->u.kernel.target->name,
1024                                  strlen(t->u.kernel.target->name)+1) != 0) {
1025                         ret = -EFAULT;
1026                         goto free_counters;
1027                 }
1028         }
1029
1030  free_counters:
1031         vfree(counters);
1032         return ret;
1033 }
1034
1035 #ifdef CONFIG_COMPAT
1036 static void compat_standard_from_user(void *dst, const void *src)
1037 {
1038         int v = *(compat_int_t *)src;
1039
1040         if (v > 0)
1041                 v += xt_compat_calc_jump(AF_INET, v);
1042         memcpy(dst, &v, sizeof(v));
1043 }
1044
1045 static int compat_standard_to_user(void __user *dst, const void *src)
1046 {
1047         compat_int_t cv = *(int *)src;
1048
1049         if (cv > 0)
1050                 cv -= xt_compat_calc_jump(AF_INET, cv);
1051         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1052 }
1053
1054 static int compat_calc_entry(const struct ipt_entry *e,
1055                              const struct xt_table_info *info,
1056                              const void *base, struct xt_table_info *newinfo)
1057 {
1058         const struct xt_entry_match *ematch;
1059         const struct ipt_entry_target *t;
1060         unsigned int entry_offset;
1061         int off, i, ret;
1062
1063         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1064         entry_offset = (void *)e - base;
1065         xt_ematch_foreach(ematch, e)
1066                 off += xt_compat_match_offset(ematch->u.kernel.match);
1067         t = ipt_get_target_c(e);
1068         off += xt_compat_target_offset(t->u.kernel.target);
1069         newinfo->size -= off;
1070         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1071         if (ret)
1072                 return ret;
1073
1074         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1075                 if (info->hook_entry[i] &&
1076                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1077                         newinfo->hook_entry[i] -= off;
1078                 if (info->underflow[i] &&
1079                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1080                         newinfo->underflow[i] -= off;
1081         }
1082         return 0;
1083 }
1084
1085 static int compat_table_info(const struct xt_table_info *info,
1086                              struct xt_table_info *newinfo)
1087 {
1088         struct ipt_entry *iter;
1089         void *loc_cpu_entry;
1090         int ret;
1091
1092         if (!newinfo || !info)
1093                 return -EINVAL;
1094
1095         /* we dont care about newinfo->entries[] */
1096         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1097         newinfo->initial_entries = 0;
1098         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1099         xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1100                 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1101                 if (ret != 0)
1102                         return ret;
1103         }
1104         return 0;
1105 }
1106 #endif
1107
1108 static int get_info(struct net *net, void __user *user,
1109                     const int *len, int compat)
1110 {
1111         char name[IPT_TABLE_MAXNAMELEN];
1112         struct xt_table *t;
1113         int ret;
1114
1115         if (*len != sizeof(struct ipt_getinfo)) {
1116                 duprintf("length %u != %zu\n", *len,
1117                          sizeof(struct ipt_getinfo));
1118                 return -EINVAL;
1119         }
1120
1121         if (copy_from_user(name, user, sizeof(name)) != 0)
1122                 return -EFAULT;
1123
1124         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1125 #ifdef CONFIG_COMPAT
1126         if (compat)
1127                 xt_compat_lock(AF_INET);
1128 #endif
1129         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1130                                     "iptable_%s", name);
1131         if (t && !IS_ERR(t)) {
1132                 struct ipt_getinfo info;
1133                 const struct xt_table_info *private = t->private;
1134 #ifdef CONFIG_COMPAT
1135                 struct xt_table_info tmp;
1136
1137                 if (compat) {
1138                         ret = compat_table_info(private, &tmp);
1139                         xt_compat_flush_offsets(AF_INET);
1140                         private = &tmp;
1141                 }
1142 #endif
1143                 info.valid_hooks = t->valid_hooks;
1144                 memcpy(info.hook_entry, private->hook_entry,
1145                        sizeof(info.hook_entry));
1146                 memcpy(info.underflow, private->underflow,
1147                        sizeof(info.underflow));
1148                 info.num_entries = private->number;
1149                 info.size = private->size;
1150                 strcpy(info.name, name);
1151
1152                 if (copy_to_user(user, &info, *len) != 0)
1153                         ret = -EFAULT;
1154                 else
1155                         ret = 0;
1156
1157                 xt_table_unlock(t);
1158                 module_put(t->me);
1159         } else
1160                 ret = t ? PTR_ERR(t) : -ENOENT;
1161 #ifdef CONFIG_COMPAT
1162         if (compat)
1163                 xt_compat_unlock(AF_INET);
1164 #endif
1165         return ret;
1166 }
1167
1168 static int
1169 get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1170             const int *len)
1171 {
1172         int ret;
1173         struct ipt_get_entries get;
1174         struct xt_table *t;
1175
1176         if (*len < sizeof(get)) {
1177                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1178                 return -EINVAL;
1179         }
1180         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1181                 return -EFAULT;
1182         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1183                 duprintf("get_entries: %u != %zu\n",
1184                          *len, sizeof(get) + get.size);
1185                 return -EINVAL;
1186         }
1187
1188         t = xt_find_table_lock(net, AF_INET, get.name);
1189         if (t && !IS_ERR(t)) {
1190                 const struct xt_table_info *private = t->private;
1191                 duprintf("t->private->number = %u\n", private->number);
1192                 if (get.size == private->size)
1193                         ret = copy_entries_to_user(private->size,
1194                                                    t, uptr->entrytable);
1195                 else {
1196                         duprintf("get_entries: I've got %u not %u!\n",
1197                                  private->size, get.size);
1198                         ret = -EAGAIN;
1199                 }
1200                 module_put(t->me);
1201                 xt_table_unlock(t);
1202         } else
1203                 ret = t ? PTR_ERR(t) : -ENOENT;
1204
1205         return ret;
1206 }
1207
1208 static int
1209 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1210              struct xt_table_info *newinfo, unsigned int num_counters,
1211              void __user *counters_ptr)
1212 {
1213         int ret;
1214         struct xt_table *t;
1215         struct xt_table_info *oldinfo;
1216         struct xt_counters *counters;
1217         void *loc_cpu_old_entry;
1218         struct ipt_entry *iter;
1219
1220         ret = 0;
1221         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1222         if (!counters) {
1223                 ret = -ENOMEM;
1224                 goto out;
1225         }
1226
1227         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1228                                     "iptable_%s", name);
1229         if (!t || IS_ERR(t)) {
1230                 ret = t ? PTR_ERR(t) : -ENOENT;
1231                 goto free_newinfo_counters_untrans;
1232         }
1233
1234         /* You lied! */
1235         if (valid_hooks != t->valid_hooks) {
1236                 duprintf("Valid hook crap: %08X vs %08X\n",
1237                          valid_hooks, t->valid_hooks);
1238                 ret = -EINVAL;
1239                 goto put_module;
1240         }
1241
1242         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1243         if (!oldinfo)
1244                 goto put_module;
1245
1246         /* Update module usage count based on number of rules */
1247         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1248                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1249         if ((oldinfo->number > oldinfo->initial_entries) ||
1250             (newinfo->number <= oldinfo->initial_entries))
1251                 module_put(t->me);
1252         if ((oldinfo->number > oldinfo->initial_entries) &&
1253             (newinfo->number <= oldinfo->initial_entries))
1254                 module_put(t->me);
1255
1256         /* Get the old counters, and synchronize with replace */
1257         get_counters(oldinfo, counters);
1258
1259         /* Decrease module usage counts and free resource */
1260         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1261         xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
1262                 cleanup_entry(iter, net);
1263
1264         xt_free_table_info(oldinfo);
1265         if (copy_to_user(counters_ptr, counters,
1266                          sizeof(struct xt_counters) * num_counters) != 0)
1267                 ret = -EFAULT;
1268         vfree(counters);
1269         xt_table_unlock(t);
1270         return ret;
1271
1272  put_module:
1273         module_put(t->me);
1274         xt_table_unlock(t);
1275  free_newinfo_counters_untrans:
1276         vfree(counters);
1277  out:
1278         return ret;
1279 }
1280
1281 static int
1282 do_replace(struct net *net, const void __user *user, unsigned int len)
1283 {
1284         int ret;
1285         struct ipt_replace tmp;
1286         struct xt_table_info *newinfo;
1287         void *loc_cpu_entry;
1288         struct ipt_entry *iter;
1289
1290         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1291                 return -EFAULT;
1292
1293         /* overflow check */
1294         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1295                 return -ENOMEM;
1296
1297         newinfo = xt_alloc_table_info(tmp.size);
1298         if (!newinfo)
1299                 return -ENOMEM;
1300
1301         /* choose the copy that is on our node/cpu */
1302         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1303         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1304                            tmp.size) != 0) {
1305                 ret = -EFAULT;
1306                 goto free_newinfo;
1307         }
1308
1309         ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
1310         if (ret != 0)
1311                 goto free_newinfo;
1312
1313         duprintf("Translated table\n");
1314
1315         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1316                            tmp.num_counters, tmp.counters);
1317         if (ret)
1318                 goto free_newinfo_untrans;
1319         return 0;
1320
1321  free_newinfo_untrans:
1322         xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1323                 cleanup_entry(iter, net);
1324  free_newinfo:
1325         xt_free_table_info(newinfo);
1326         return ret;
1327 }
1328
1329 static int
1330 do_add_counters(struct net *net, const void __user *user,
1331                 unsigned int len, int compat)
1332 {
1333         unsigned int i, curcpu;
1334         struct xt_counters_info tmp;
1335         struct xt_counters *paddc;
1336         unsigned int num_counters;
1337         const char *name;
1338         int size;
1339         void *ptmp;
1340         struct xt_table *t;
1341         const struct xt_table_info *private;
1342         int ret = 0;
1343         void *loc_cpu_entry;
1344         struct ipt_entry *iter;
1345 #ifdef CONFIG_COMPAT
1346         struct compat_xt_counters_info compat_tmp;
1347
1348         if (compat) {
1349                 ptmp = &compat_tmp;
1350                 size = sizeof(struct compat_xt_counters_info);
1351         } else
1352 #endif
1353         {
1354                 ptmp = &tmp;
1355                 size = sizeof(struct xt_counters_info);
1356         }
1357
1358         if (copy_from_user(ptmp, user, size) != 0)
1359                 return -EFAULT;
1360
1361 #ifdef CONFIG_COMPAT
1362         if (compat) {
1363                 num_counters = compat_tmp.num_counters;
1364                 name = compat_tmp.name;
1365         } else
1366 #endif
1367         {
1368                 num_counters = tmp.num_counters;
1369                 name = tmp.name;
1370         }
1371
1372         if (len != size + num_counters * sizeof(struct xt_counters))
1373                 return -EINVAL;
1374
1375         paddc = vmalloc_node(len - size, numa_node_id());
1376         if (!paddc)
1377                 return -ENOMEM;
1378
1379         if (copy_from_user(paddc, user + size, len - size) != 0) {
1380                 ret = -EFAULT;
1381                 goto free;
1382         }
1383
1384         t = xt_find_table_lock(net, AF_INET, name);
1385         if (!t || IS_ERR(t)) {
1386                 ret = t ? PTR_ERR(t) : -ENOENT;
1387                 goto free;
1388         }
1389
1390         local_bh_disable();
1391         private = t->private;
1392         if (private->number != num_counters) {
1393                 ret = -EINVAL;
1394                 goto unlock_up_free;
1395         }
1396
1397         i = 0;
1398         /* Choose the copy that is on our node */
1399         curcpu = smp_processor_id();
1400         loc_cpu_entry = private->entries[curcpu];
1401         xt_info_wrlock(curcpu);
1402         xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1403                 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1404                 ++i;
1405         }
1406         xt_info_wrunlock(curcpu);
1407  unlock_up_free:
1408         local_bh_enable();
1409         xt_table_unlock(t);
1410         module_put(t->me);
1411  free:
1412         vfree(paddc);
1413
1414         return ret;
1415 }
1416
1417 #ifdef CONFIG_COMPAT
1418 struct compat_ipt_replace {
1419         char                    name[IPT_TABLE_MAXNAMELEN];
1420         u32                     valid_hooks;
1421         u32                     num_entries;
1422         u32                     size;
1423         u32                     hook_entry[NF_INET_NUMHOOKS];
1424         u32                     underflow[NF_INET_NUMHOOKS];
1425         u32                     num_counters;
1426         compat_uptr_t           counters;       /* struct ipt_counters * */
1427         struct compat_ipt_entry entries[0];
1428 };
1429
1430 static int
1431 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1432                           unsigned int *size, struct xt_counters *counters,
1433                           unsigned int i)
1434 {
1435         struct ipt_entry_target *t;
1436         struct compat_ipt_entry __user *ce;
1437         u_int16_t target_offset, next_offset;
1438         compat_uint_t origsize;
1439         const struct xt_entry_match *ematch;
1440         int ret = 0;
1441
1442         origsize = *size;
1443         ce = (struct compat_ipt_entry __user *)*dstptr;
1444         if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
1445             copy_to_user(&ce->counters, &counters[i],
1446             sizeof(counters[i])) != 0)
1447                 return -EFAULT;
1448
1449         *dstptr += sizeof(struct compat_ipt_entry);
1450         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1451
1452         xt_ematch_foreach(ematch, e) {
1453                 ret = xt_compat_match_to_user(ematch, dstptr, size);
1454                 if (ret != 0)
1455                         return ret;
1456         }
1457         target_offset = e->target_offset - (origsize - *size);
1458         t = ipt_get_target(e);
1459         ret = xt_compat_target_to_user(t, dstptr, size);
1460         if (ret)
1461                 return ret;
1462         next_offset = e->next_offset - (origsize - *size);
1463         if (put_user(target_offset, &ce->target_offset) != 0 ||
1464             put_user(next_offset, &ce->next_offset) != 0)
1465                 return -EFAULT;
1466         return 0;
1467 }
1468
1469 static int
1470 compat_find_calc_match(struct ipt_entry_match *m,
1471                        const char *name,
1472                        const struct ipt_ip *ip,
1473                        unsigned int hookmask,
1474                        int *size)
1475 {
1476         struct xt_match *match;
1477
1478         match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
1479                                       m->u.user.revision);
1480         if (IS_ERR(match)) {
1481                 duprintf("compat_check_calc_match: `%s' not found\n",
1482                          m->u.user.name);
1483                 return PTR_ERR(match);
1484         }
1485         m->u.kernel.match = match;
1486         *size += xt_compat_match_offset(match);
1487         return 0;
1488 }
1489
1490 static void compat_release_entry(struct compat_ipt_entry *e)
1491 {
1492         struct ipt_entry_target *t;
1493         struct xt_entry_match *ematch;
1494
1495         /* Cleanup all matches */
1496         xt_ematch_foreach(ematch, e)
1497                 module_put(ematch->u.kernel.match->me);
1498         t = compat_ipt_get_target(e);
1499         module_put(t->u.kernel.target->me);
1500 }
1501
1502 static int
1503 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1504                                   struct xt_table_info *newinfo,
1505                                   unsigned int *size,
1506                                   const unsigned char *base,
1507                                   const unsigned char *limit,
1508                                   const unsigned int *hook_entries,
1509                                   const unsigned int *underflows,
1510                                   const char *name)
1511 {
1512         struct xt_entry_match *ematch;
1513         struct ipt_entry_target *t;
1514         struct xt_target *target;
1515         unsigned int entry_offset;
1516         unsigned int j;
1517         int ret, off, h;
1518
1519         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1520         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1521             (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1522                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1523                 return -EINVAL;
1524         }
1525
1526         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1527                              sizeof(struct compat_xt_entry_target)) {
1528                 duprintf("checking: element %p size %u\n",
1529                          e, e->next_offset);
1530                 return -EINVAL;
1531         }
1532
1533         /* For purposes of check_entry casting the compat entry is fine */
1534         ret = check_entry((struct ipt_entry *)e, name);
1535         if (ret)
1536                 return ret;
1537
1538         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1539         entry_offset = (void *)e - (void *)base;
1540         j = 0;
1541         xt_ematch_foreach(ematch, e) {
1542                 ret = compat_find_calc_match(ematch, name,
1543                                              &e->ip, e->comefrom, &off);
1544                 if (ret != 0)
1545                         goto release_matches;
1546                 ++j;
1547         }
1548
1549         t = compat_ipt_get_target(e);
1550         target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
1551                                         t->u.user.revision);
1552         if (IS_ERR(target)) {
1553                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1554                          t->u.user.name);
1555                 ret = PTR_ERR(target);
1556                 goto release_matches;
1557         }
1558         t->u.kernel.target = target;
1559
1560         off += xt_compat_target_offset(target);
1561         *size += off;
1562         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1563         if (ret)
1564                 goto out;
1565
1566         /* Check hooks & underflows */
1567         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1568                 if ((unsigned char *)e - base == hook_entries[h])
1569                         newinfo->hook_entry[h] = hook_entries[h];
1570                 if ((unsigned char *)e - base == underflows[h])
1571                         newinfo->underflow[h] = underflows[h];
1572         }
1573
1574         /* Clear counters and comefrom */
1575         memset(&e->counters, 0, sizeof(e->counters));
1576         e->comefrom = 0;
1577         return 0;
1578
1579 out:
1580         module_put(t->u.kernel.target->me);
1581 release_matches:
1582         xt_ematch_foreach(ematch, e) {
1583                 if (j-- == 0)
1584                         break;
1585                 module_put(ematch->u.kernel.match->me);
1586         }
1587         return ret;
1588 }
1589
1590 static int
1591 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1592                             unsigned int *size, const char *name,
1593                             struct xt_table_info *newinfo, unsigned char *base)
1594 {
1595         struct ipt_entry_target *t;
1596         struct xt_target *target;
1597         struct ipt_entry *de;
1598         unsigned int origsize;
1599         int ret, h;
1600         struct xt_entry_match *ematch;
1601
1602         ret = 0;
1603         origsize = *size;
1604         de = (struct ipt_entry *)*dstptr;
1605         memcpy(de, e, sizeof(struct ipt_entry));
1606         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1607
1608         *dstptr += sizeof(struct ipt_entry);
1609         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1610
1611         xt_ematch_foreach(ematch, e) {
1612                 ret = xt_compat_match_from_user(ematch, dstptr, size);
1613                 if (ret != 0)
1614                         return ret;
1615         }
1616         de->target_offset = e->target_offset - (origsize - *size);
1617         t = compat_ipt_get_target(e);
1618         target = t->u.kernel.target;
1619         xt_compat_target_from_user(t, dstptr, size);
1620
1621         de->next_offset = e->next_offset - (origsize - *size);
1622         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1623                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1624                         newinfo->hook_entry[h] -= origsize - *size;
1625                 if ((unsigned char *)de - base < newinfo->underflow[h])
1626                         newinfo->underflow[h] -= origsize - *size;
1627         }
1628         return ret;
1629 }
1630
1631 static int
1632 compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
1633 {
1634         struct xt_entry_match *ematch;
1635         struct xt_mtchk_param mtpar;
1636         unsigned int j;
1637         int ret = 0;
1638
1639         j = 0;
1640         mtpar.net       = net;
1641         mtpar.table     = name;
1642         mtpar.entryinfo = &e->ip;
1643         mtpar.hook_mask = e->comefrom;
1644         mtpar.family    = NFPROTO_IPV4;
1645         xt_ematch_foreach(ematch, e) {
1646                 ret = check_match(ematch, &mtpar);
1647                 if (ret != 0)
1648                         goto cleanup_matches;
1649                 ++j;
1650         }
1651
1652         ret = check_target(e, net, name);
1653         if (ret)
1654                 goto cleanup_matches;
1655         return 0;
1656
1657  cleanup_matches:
1658         xt_ematch_foreach(ematch, e) {
1659                 if (j-- == 0)
1660                         break;
1661                 cleanup_match(ematch, net);
1662         }
1663         return ret;
1664 }
1665
1666 static int
1667 translate_compat_table(struct net *net,
1668                        const char *name,
1669                        unsigned int valid_hooks,
1670                        struct xt_table_info **pinfo,
1671                        void **pentry0,
1672                        unsigned int total_size,
1673                        unsigned int number,
1674                        unsigned int *hook_entries,
1675                        unsigned int *underflows)
1676 {
1677         unsigned int i, j;
1678         struct xt_table_info *newinfo, *info;
1679         void *pos, *entry0, *entry1;
1680         struct compat_ipt_entry *iter0;
1681         struct ipt_entry *iter1;
1682         unsigned int size;
1683         int ret;
1684
1685         info = *pinfo;
1686         entry0 = *pentry0;
1687         size = total_size;
1688         info->number = number;
1689
1690         /* Init all hooks to impossible value. */
1691         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1692                 info->hook_entry[i] = 0xFFFFFFFF;
1693                 info->underflow[i] = 0xFFFFFFFF;
1694         }
1695
1696         duprintf("translate_compat_table: size %u\n", info->size);
1697         j = 0;
1698         xt_compat_lock(AF_INET);
1699         /* Walk through entries, checking offsets. */
1700         xt_entry_foreach(iter0, entry0, total_size) {
1701                 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
1702                                                         entry0,
1703                                                         entry0 + total_size,
1704                                                         hook_entries,
1705                                                         underflows,
1706                                                         name);
1707                 if (ret != 0)
1708                         goto out_unlock;
1709                 ++j;
1710         }
1711
1712         ret = -EINVAL;
1713         if (j != number) {
1714                 duprintf("translate_compat_table: %u not %u entries\n",
1715                          j, number);
1716                 goto out_unlock;
1717         }
1718
1719         /* Check hooks all assigned */
1720         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1721                 /* Only hooks which are valid */
1722                 if (!(valid_hooks & (1 << i)))
1723                         continue;
1724                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1725                         duprintf("Invalid hook entry %u %u\n",
1726                                  i, hook_entries[i]);
1727                         goto out_unlock;
1728                 }
1729                 if (info->underflow[i] == 0xFFFFFFFF) {
1730                         duprintf("Invalid underflow %u %u\n",
1731                                  i, underflows[i]);
1732                         goto out_unlock;
1733                 }
1734         }
1735
1736         ret = -ENOMEM;
1737         newinfo = xt_alloc_table_info(size);
1738         if (!newinfo)
1739                 goto out_unlock;
1740
1741         newinfo->number = number;
1742         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1743                 newinfo->hook_entry[i] = info->hook_entry[i];
1744                 newinfo->underflow[i] = info->underflow[i];
1745         }
1746         entry1 = newinfo->entries[raw_smp_processor_id()];
1747         pos = entry1;
1748         size = total_size;
1749         xt_entry_foreach(iter0, entry0, total_size) {
1750                 ret = compat_copy_entry_from_user(iter0, &pos, &size,
1751                                                   name, newinfo, entry1);
1752                 if (ret != 0)
1753                         break;
1754         }
1755         xt_compat_flush_offsets(AF_INET);
1756         xt_compat_unlock(AF_INET);
1757         if (ret)
1758                 goto free_newinfo;
1759
1760         ret = -ELOOP;
1761         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1762                 goto free_newinfo;
1763
1764         i = 0;
1765         xt_entry_foreach(iter1, entry1, newinfo->size) {
1766                 ret = compat_check_entry(iter1, net, name);
1767                 if (ret != 0)
1768                         break;
1769                 ++i;
1770         }
1771         if (ret) {
1772                 /*
1773                  * The first i matches need cleanup_entry (calls ->destroy)
1774                  * because they had called ->check already. The other j-i
1775                  * entries need only release.
1776                  */
1777                 int skip = i;
1778                 j -= i;
1779                 xt_entry_foreach(iter0, entry0, newinfo->size) {
1780                         if (skip-- > 0)
1781                                 continue;
1782                         if (j-- == 0)
1783                                 break;
1784                         compat_release_entry(iter0);
1785                 }
1786                 xt_entry_foreach(iter1, entry1, newinfo->size) {
1787                         if (i-- == 0)
1788                                 break;
1789                         cleanup_entry(iter1, net);
1790                 }
1791                 xt_free_table_info(newinfo);
1792                 return ret;
1793         }
1794
1795         /* And one copy for every other CPU */
1796         for_each_possible_cpu(i)
1797                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1798                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1799
1800         *pinfo = newinfo;
1801         *pentry0 = entry1;
1802         xt_free_table_info(info);
1803         return 0;
1804
1805 free_newinfo:
1806         xt_free_table_info(newinfo);
1807 out:
1808         xt_entry_foreach(iter0, entry0, total_size) {
1809                 if (j-- == 0)
1810                         break;
1811                 compat_release_entry(iter0);
1812         }
1813         return ret;
1814 out_unlock:
1815         xt_compat_flush_offsets(AF_INET);
1816         xt_compat_unlock(AF_INET);
1817         goto out;
1818 }
1819
1820 static int
1821 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1822 {
1823         int ret;
1824         struct compat_ipt_replace tmp;
1825         struct xt_table_info *newinfo;
1826         void *loc_cpu_entry;
1827         struct ipt_entry *iter;
1828
1829         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1830                 return -EFAULT;
1831
1832         /* overflow check */
1833         if (tmp.size >= INT_MAX / num_possible_cpus())
1834                 return -ENOMEM;
1835         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1836                 return -ENOMEM;
1837
1838         newinfo = xt_alloc_table_info(tmp.size);
1839         if (!newinfo)
1840                 return -ENOMEM;
1841
1842         /* choose the copy that is on our node/cpu */
1843         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1844         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1845                            tmp.size) != 0) {
1846                 ret = -EFAULT;
1847                 goto free_newinfo;
1848         }
1849
1850         ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
1851                                      &newinfo, &loc_cpu_entry, tmp.size,
1852                                      tmp.num_entries, tmp.hook_entry,
1853                                      tmp.underflow);
1854         if (ret != 0)
1855                 goto free_newinfo;
1856
1857         duprintf("compat_do_replace: Translated table\n");
1858
1859         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1860                            tmp.num_counters, compat_ptr(tmp.counters));
1861         if (ret)
1862                 goto free_newinfo_untrans;
1863         return 0;
1864
1865  free_newinfo_untrans:
1866         xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1867                 cleanup_entry(iter, net);
1868  free_newinfo:
1869         xt_free_table_info(newinfo);
1870         return ret;
1871 }
1872
1873 static int
1874 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1875                       unsigned int len)
1876 {
1877         int ret;
1878
1879         if (!capable(CAP_NET_ADMIN))
1880                 return -EPERM;
1881
1882         switch (cmd) {
1883         case IPT_SO_SET_REPLACE:
1884                 ret = compat_do_replace(sock_net(sk), user, len);
1885                 break;
1886
1887         case IPT_SO_SET_ADD_COUNTERS:
1888                 ret = do_add_counters(sock_net(sk), user, len, 1);
1889                 break;
1890
1891         default:
1892                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1893                 ret = -EINVAL;
1894         }
1895
1896         return ret;
1897 }
1898
1899 struct compat_ipt_get_entries {
1900         char name[IPT_TABLE_MAXNAMELEN];
1901         compat_uint_t size;
1902         struct compat_ipt_entry entrytable[0];
1903 };
1904
1905 static int
1906 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1907                             void __user *userptr)
1908 {
1909         struct xt_counters *counters;
1910         const struct xt_table_info *private = table->private;
1911         void __user *pos;
1912         unsigned int size;
1913         int ret = 0;
1914         const void *loc_cpu_entry;
1915         unsigned int i = 0;
1916         struct ipt_entry *iter;
1917
1918         counters = alloc_counters(table);
1919         if (IS_ERR(counters))
1920                 return PTR_ERR(counters);
1921
1922         /* choose the copy that is on our node/cpu, ...
1923          * This choice is lazy (because current thread is
1924          * allowed to migrate to another cpu)
1925          */
1926         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1927         pos = userptr;
1928         size = total_size;
1929         xt_entry_foreach(iter, loc_cpu_entry, total_size) {
1930                 ret = compat_copy_entry_to_user(iter, &pos,
1931                                                 &size, counters, i++);
1932                 if (ret != 0)
1933                         break;
1934         }
1935
1936         vfree(counters);
1937         return ret;
1938 }
1939
1940 static int
1941 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1942                    int *len)
1943 {
1944         int ret;
1945         struct compat_ipt_get_entries get;
1946         struct xt_table *t;
1947
1948         if (*len < sizeof(get)) {
1949                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1950                 return -EINVAL;
1951         }
1952
1953         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1954                 return -EFAULT;
1955
1956         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1957                 duprintf("compat_get_entries: %u != %zu\n",
1958                          *len, sizeof(get) + get.size);
1959                 return -EINVAL;
1960         }
1961
1962         xt_compat_lock(AF_INET);
1963         t = xt_find_table_lock(net, AF_INET, get.name);
1964         if (t && !IS_ERR(t)) {
1965                 const struct xt_table_info *private = t->private;
1966                 struct xt_table_info info;
1967                 duprintf("t->private->number = %u\n", private->number);
1968                 ret = compat_table_info(private, &info);
1969                 if (!ret && get.size == info.size) {
1970                         ret = compat_copy_entries_to_user(private->size,
1971                                                           t, uptr->entrytable);
1972                 } else if (!ret) {
1973                         duprintf("compat_get_entries: I've got %u not %u!\n",
1974                                  private->size, get.size);
1975                         ret = -EAGAIN;
1976                 }
1977                 xt_compat_flush_offsets(AF_INET);
1978                 module_put(t->me);
1979                 xt_table_unlock(t);
1980         } else
1981                 ret = t ? PTR_ERR(t) : -ENOENT;
1982
1983         xt_compat_unlock(AF_INET);
1984         return ret;
1985 }
1986
1987 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1988
1989 static int
1990 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1991 {
1992         int ret;
1993
1994         if (!capable(CAP_NET_ADMIN))
1995                 return -EPERM;
1996
1997         switch (cmd) {
1998         case IPT_SO_GET_INFO:
1999                 ret = get_info(sock_net(sk), user, len, 1);
2000                 break;
2001         case IPT_SO_GET_ENTRIES:
2002                 ret = compat_get_entries(sock_net(sk), user, len);
2003                 break;
2004         default:
2005                 ret = do_ipt_get_ctl(sk, cmd, user, len);
2006         }
2007         return ret;
2008 }
2009 #endif
2010
2011 static int
2012 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2013 {
2014         int ret;
2015
2016         if (!capable(CAP_NET_ADMIN))
2017                 return -EPERM;
2018
2019         switch (cmd) {
2020         case IPT_SO_SET_REPLACE:
2021                 ret = do_replace(sock_net(sk), user, len);
2022                 break;
2023
2024         case IPT_SO_SET_ADD_COUNTERS:
2025                 ret = do_add_counters(sock_net(sk), user, len, 0);
2026                 break;
2027
2028         default:
2029                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
2030                 ret = -EINVAL;
2031         }
2032
2033         return ret;
2034 }
2035
2036 static int
2037 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2038 {
2039         int ret;
2040
2041         if (!capable(CAP_NET_ADMIN))
2042                 return -EPERM;
2043
2044         switch (cmd) {
2045         case IPT_SO_GET_INFO:
2046                 ret = get_info(sock_net(sk), user, len, 0);
2047                 break;
2048
2049         case IPT_SO_GET_ENTRIES:
2050                 ret = get_entries(sock_net(sk), user, len);
2051                 break;
2052
2053         case IPT_SO_GET_REVISION_MATCH:
2054         case IPT_SO_GET_REVISION_TARGET: {
2055                 struct ipt_get_revision rev;
2056                 int target;
2057
2058                 if (*len != sizeof(rev)) {
2059                         ret = -EINVAL;
2060                         break;
2061                 }
2062                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2063                         ret = -EFAULT;
2064                         break;
2065                 }
2066
2067                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2068                         target = 1;
2069                 else
2070                         target = 0;
2071
2072                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2073                                                          rev.revision,
2074                                                          target, &ret),
2075                                         "ipt_%s", rev.name);
2076                 break;
2077         }
2078
2079         default:
2080                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2081                 ret = -EINVAL;
2082         }
2083
2084         return ret;
2085 }
2086
2087 struct xt_table *ipt_register_table(struct net *net,
2088                                     const struct xt_table *table,
2089                                     const struct ipt_replace *repl)
2090 {
2091         int ret;
2092         struct xt_table_info *newinfo;
2093         struct xt_table_info bootstrap = {0};
2094         void *loc_cpu_entry;
2095         struct xt_table *new_table;
2096
2097         newinfo = xt_alloc_table_info(repl->size);
2098         if (!newinfo) {
2099                 ret = -ENOMEM;
2100                 goto out;
2101         }
2102
2103         /* choose the copy on our node/cpu, but dont care about preemption */
2104         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2105         memcpy(loc_cpu_entry, repl->entries, repl->size);
2106
2107         ret = translate_table(net, newinfo, loc_cpu_entry, repl);
2108         if (ret != 0)
2109                 goto out_free;
2110
2111         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2112         if (IS_ERR(new_table)) {
2113                 ret = PTR_ERR(new_table);
2114                 goto out_free;
2115         }
2116
2117         return new_table;
2118
2119 out_free:
2120         xt_free_table_info(newinfo);
2121 out:
2122         return ERR_PTR(ret);
2123 }
2124
2125 void ipt_unregister_table(struct net *net, struct xt_table *table)
2126 {
2127         struct xt_table_info *private;
2128         void *loc_cpu_entry;
2129         struct module *table_owner = table->me;
2130         struct ipt_entry *iter;
2131
2132         private = xt_unregister_table(table);
2133
2134         /* Decrease module usage counts and free resources */
2135         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2136         xt_entry_foreach(iter, loc_cpu_entry, private->size)
2137                 cleanup_entry(iter, net);
2138         if (private->number > private->initial_entries)
2139                 module_put(table_owner);
2140         xt_free_table_info(private);
2141 }
2142
2143 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2144 static inline bool
2145 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2146                      u_int8_t type, u_int8_t code,
2147                      bool invert)
2148 {
2149         return ((test_type == 0xFF) ||
2150                 (type == test_type && code >= min_code && code <= max_code))
2151                 ^ invert;
2152 }
2153
2154 static bool
2155 icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2156 {
2157         const struct icmphdr *ic;
2158         struct icmphdr _icmph;
2159         const struct ipt_icmp *icmpinfo = par->matchinfo;
2160
2161         /* Must not be a fragment. */
2162         if (par->fragoff != 0)
2163                 return false;
2164
2165         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
2166         if (ic == NULL) {
2167                 /* We've been asked to examine this packet, and we
2168                  * can't.  Hence, no choice but to drop.
2169                  */
2170                 duprintf("Dropping evil ICMP tinygram.\n");
2171                 *par->hotdrop = true;
2172                 return false;
2173         }
2174
2175         return icmp_type_code_match(icmpinfo->type,
2176                                     icmpinfo->code[0],
2177                                     icmpinfo->code[1],
2178                                     ic->type, ic->code,
2179                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2180 }
2181
2182 static int icmp_checkentry(const struct xt_mtchk_param *par)
2183 {
2184         const struct ipt_icmp *icmpinfo = par->matchinfo;
2185
2186         /* Must specify no unknown invflags */
2187         return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
2188 }
2189
2190 /* The built-in targets: standard (NULL) and error. */
2191 static struct xt_target ipt_standard_target __read_mostly = {
2192         .name           = IPT_STANDARD_TARGET,
2193         .targetsize     = sizeof(int),
2194         .family         = NFPROTO_IPV4,
2195 #ifdef CONFIG_COMPAT
2196         .compatsize     = sizeof(compat_int_t),
2197         .compat_from_user = compat_standard_from_user,
2198         .compat_to_user = compat_standard_to_user,
2199 #endif
2200 };
2201
2202 static struct xt_target ipt_error_target __read_mostly = {
2203         .name           = IPT_ERROR_TARGET,
2204         .target         = ipt_error,
2205         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2206         .family         = NFPROTO_IPV4,
2207 };
2208
2209 static struct nf_sockopt_ops ipt_sockopts = {
2210         .pf             = PF_INET,
2211         .set_optmin     = IPT_BASE_CTL,
2212         .set_optmax     = IPT_SO_SET_MAX+1,
2213         .set            = do_ipt_set_ctl,
2214 #ifdef CONFIG_COMPAT
2215         .compat_set     = compat_do_ipt_set_ctl,
2216 #endif
2217         .get_optmin     = IPT_BASE_CTL,
2218         .get_optmax     = IPT_SO_GET_MAX+1,
2219         .get            = do_ipt_get_ctl,
2220 #ifdef CONFIG_COMPAT
2221         .compat_get     = compat_do_ipt_get_ctl,
2222 #endif
2223         .owner          = THIS_MODULE,
2224 };
2225
2226 static struct xt_match icmp_matchstruct __read_mostly = {
2227         .name           = "icmp",
2228         .match          = icmp_match,
2229         .matchsize      = sizeof(struct ipt_icmp),
2230         .checkentry     = icmp_checkentry,
2231         .proto          = IPPROTO_ICMP,
2232         .family         = NFPROTO_IPV4,
2233 };
2234
2235 static int __net_init ip_tables_net_init(struct net *net)
2236 {
2237         return xt_proto_init(net, NFPROTO_IPV4);
2238 }
2239
2240 static void __net_exit ip_tables_net_exit(struct net *net)
2241 {
2242         xt_proto_fini(net, NFPROTO_IPV4);
2243 }
2244
2245 static struct pernet_operations ip_tables_net_ops = {
2246         .init = ip_tables_net_init,
2247         .exit = ip_tables_net_exit,
2248 };
2249
2250 static int __init ip_tables_init(void)
2251 {
2252         int ret;
2253
2254         ret = register_pernet_subsys(&ip_tables_net_ops);
2255         if (ret < 0)
2256                 goto err1;
2257
2258         /* Noone else will be downing sem now, so we won't sleep */
2259         ret = xt_register_target(&ipt_standard_target);
2260         if (ret < 0)
2261                 goto err2;
2262         ret = xt_register_target(&ipt_error_target);
2263         if (ret < 0)
2264                 goto err3;
2265         ret = xt_register_match(&icmp_matchstruct);
2266         if (ret < 0)
2267                 goto err4;
2268
2269         /* Register setsockopt */
2270         ret = nf_register_sockopt(&ipt_sockopts);
2271         if (ret < 0)
2272                 goto err5;
2273
2274         pr_info("(C) 2000-2006 Netfilter Core Team\n");
2275         return 0;
2276
2277 err5:
2278         xt_unregister_match(&icmp_matchstruct);
2279 err4:
2280         xt_unregister_target(&ipt_error_target);
2281 err3:
2282         xt_unregister_target(&ipt_standard_target);
2283 err2:
2284         unregister_pernet_subsys(&ip_tables_net_ops);
2285 err1:
2286         return ret;
2287 }
2288
2289 static void __exit ip_tables_fini(void)
2290 {
2291         nf_unregister_sockopt(&ipt_sockopts);
2292
2293         xt_unregister_match(&icmp_matchstruct);
2294         xt_unregister_target(&ipt_error_target);
2295         xt_unregister_target(&ipt_standard_target);
2296
2297         unregister_pernet_subsys(&ip_tables_net_ops);
2298 }
2299
2300 EXPORT_SYMBOL(ipt_register_table);
2301 EXPORT_SYMBOL(ipt_unregister_table);
2302 EXPORT_SYMBOL(ipt_do_table);
2303 module_init(ip_tables_init);
2304 module_exit(ip_tables_fini);