]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/netfilter/ip_tables.c
70900ecf88e2166d47466770b54a874f416831d7
[net-next-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 #include <linux/cache.h>
13 #include <linux/capability.h>
14 #include <linux/skbuff.h>
15 #include <linux/kmod.h>
16 #include <linux/vmalloc.h>
17 #include <linux/netdevice.h>
18 #include <linux/module.h>
19 #include <linux/icmp.h>
20 #include <net/ip.h>
21 #include <net/compat.h>
22 #include <asm/uaccess.h>
23 #include <linux/mutex.h>
24 #include <linux/proc_fs.h>
25 #include <linux/err.h>
26 #include <linux/cpumask.h>
27
28 #include <linux/netfilter/x_tables.h>
29 #include <linux/netfilter_ipv4/ip_tables.h>
30 #include <net/netfilter/nf_log.h>
31 #include "../../netfilter/xt_repldata.h"
32
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
35 MODULE_DESCRIPTION("IPv4 packet filter");
36
37 /*#define DEBUG_IP_FIREWALL*/
38 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39 /*#define DEBUG_IP_FIREWALL_USER*/
40
41 #ifdef DEBUG_IP_FIREWALL
42 #define dprintf(format, args...) pr_info(format , ## args)
43 #else
44 #define dprintf(format, args...)
45 #endif
46
47 #ifdef DEBUG_IP_FIREWALL_USER
48 #define duprintf(format, args...) pr_info(format , ## args)
49 #else
50 #define duprintf(format, args...)
51 #endif
52
53 #ifdef CONFIG_NETFILTER_DEBUG
54 #define IP_NF_ASSERT(x)                                         \
55 do {                                                            \
56         if (!(x))                                               \
57                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
58                        __func__, __FILE__, __LINE__);   \
59 } while(0)
60 #else
61 #define IP_NF_ASSERT(x)
62 #endif
63
64 #if 0
65 /* All the better to debug you with... */
66 #define static
67 #define inline
68 #endif
69
70 void *ipt_alloc_initial_table(const struct xt_table *info)
71 {
72         return xt_alloc_initial_table(ipt, IPT);
73 }
74 EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
75
76 /*
77    We keep a set of rules for each CPU, so we can avoid write-locking
78    them in the softirq when updating the counters and therefore
79    only need to read-lock in the softirq; doing a write_lock_bh() in user
80    context stops packets coming through and allows user context to read
81    the counters or update the rules.
82
83    Hence the start of any table is given by get_table() below.  */
84
85 /* Returns whether matches rule or not. */
86 /* Performance critical - called for every packet */
87 static inline bool
88 ip_packet_match(const struct iphdr *ip,
89                 const char *indev,
90                 const char *outdev,
91                 const struct ipt_ip *ipinfo,
92                 int isfrag)
93 {
94         unsigned long ret;
95
96 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
97
98         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
99                   IPT_INV_SRCIP) ||
100             FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
101                   IPT_INV_DSTIP)) {
102                 dprintf("Source or dest mismatch.\n");
103
104                 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
105                         &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
106                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
107                 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
108                         &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
109                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
110                 return false;
111         }
112
113         ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
114
115         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
116                 dprintf("VIA in mismatch (%s vs %s).%s\n",
117                         indev, ipinfo->iniface,
118                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
119                 return false;
120         }
121
122         ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
123
124         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
125                 dprintf("VIA out mismatch (%s vs %s).%s\n",
126                         outdev, ipinfo->outiface,
127                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
128                 return false;
129         }
130
131         /* Check specific protocol */
132         if (ipinfo->proto &&
133             FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
134                 dprintf("Packet protocol %hi does not match %hi.%s\n",
135                         ip->protocol, ipinfo->proto,
136                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
137                 return false;
138         }
139
140         /* If we have a fragment rule but the packet is not a fragment
141          * then we return zero */
142         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
143                 dprintf("Fragment rule but not fragment.%s\n",
144                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
145                 return false;
146         }
147
148         return true;
149 }
150
151 static bool
152 ip_checkentry(const struct ipt_ip *ip)
153 {
154         if (ip->flags & ~IPT_F_MASK) {
155                 duprintf("Unknown flag bits set: %08X\n",
156                          ip->flags & ~IPT_F_MASK);
157                 return false;
158         }
159         if (ip->invflags & ~IPT_INV_MASK) {
160                 duprintf("Unknown invflag bits set: %08X\n",
161                          ip->invflags & ~IPT_INV_MASK);
162                 return false;
163         }
164         return true;
165 }
166
167 static unsigned int
168 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
169 {
170         if (net_ratelimit())
171                 pr_info("error: `%s'\n", (const char *)par->targinfo);
172
173         return NF_DROP;
174 }
175
176 /* Performance critical - called for every packet */
177 static inline bool
178 do_match(const struct ipt_entry_match *m, const struct sk_buff *skb,
179          struct xt_match_param *par)
180 {
181         par->match     = m->u.kernel.match;
182         par->matchinfo = m->data;
183
184         /* Stop iteration if it doesn't match */
185         if (!m->u.kernel.match->match(skb, par))
186                 return true;
187         else
188                 return false;
189 }
190
191 /* Performance critical */
192 static inline struct ipt_entry *
193 get_entry(const void *base, unsigned int offset)
194 {
195         return (struct ipt_entry *)(base + offset);
196 }
197
198 /* All zeroes == unconditional rule. */
199 /* Mildly perf critical (only if packet tracing is on) */
200 static inline bool unconditional(const struct ipt_ip *ip)
201 {
202         static const struct ipt_ip uncond;
203
204         return memcmp(ip, &uncond, sizeof(uncond)) == 0;
205 #undef FWINV
206 }
207
208 /* for const-correctness */
209 static inline const struct ipt_entry_target *
210 ipt_get_target_c(const struct ipt_entry *e)
211 {
212         return ipt_get_target((struct ipt_entry *)e);
213 }
214
215 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
216     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
217 static const char *const hooknames[] = {
218         [NF_INET_PRE_ROUTING]           = "PREROUTING",
219         [NF_INET_LOCAL_IN]              = "INPUT",
220         [NF_INET_FORWARD]               = "FORWARD",
221         [NF_INET_LOCAL_OUT]             = "OUTPUT",
222         [NF_INET_POST_ROUTING]          = "POSTROUTING",
223 };
224
225 enum nf_ip_trace_comments {
226         NF_IP_TRACE_COMMENT_RULE,
227         NF_IP_TRACE_COMMENT_RETURN,
228         NF_IP_TRACE_COMMENT_POLICY,
229 };
230
231 static const char *const comments[] = {
232         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
233         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
234         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
235 };
236
237 static struct nf_loginfo trace_loginfo = {
238         .type = NF_LOG_TYPE_LOG,
239         .u = {
240                 .log = {
241                         .level = 4,
242                         .logflags = NF_LOG_MASK,
243                 },
244         },
245 };
246
247 /* Mildly perf critical (only if packet tracing is on) */
248 static inline int
249 get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
250                       const char *hookname, const char **chainname,
251                       const char **comment, unsigned int *rulenum)
252 {
253         const struct ipt_standard_target *t = (void *)ipt_get_target_c(s);
254
255         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
256                 /* Head of user chain: ERROR target with chainname */
257                 *chainname = t->target.data;
258                 (*rulenum) = 0;
259         } else if (s == e) {
260                 (*rulenum)++;
261
262                 if (s->target_offset == sizeof(struct ipt_entry) &&
263                     strcmp(t->target.u.kernel.target->name,
264                            IPT_STANDARD_TARGET) == 0 &&
265                    t->verdict < 0 &&
266                    unconditional(&s->ip)) {
267                         /* Tail of chains: STANDARD target (return/policy) */
268                         *comment = *chainname == hookname
269                                 ? comments[NF_IP_TRACE_COMMENT_POLICY]
270                                 : comments[NF_IP_TRACE_COMMENT_RETURN];
271                 }
272                 return 1;
273         } else
274                 (*rulenum)++;
275
276         return 0;
277 }
278
279 static void trace_packet(const struct sk_buff *skb,
280                          unsigned int hook,
281                          const struct net_device *in,
282                          const struct net_device *out,
283                          const char *tablename,
284                          const struct xt_table_info *private,
285                          const struct ipt_entry *e)
286 {
287         const void *table_base;
288         const struct ipt_entry *root;
289         const char *hookname, *chainname, *comment;
290         const struct ipt_entry *iter;
291         unsigned int rulenum = 0;
292
293         table_base = private->entries[smp_processor_id()];
294         root = get_entry(table_base, private->hook_entry[hook]);
295
296         hookname = chainname = hooknames[hook];
297         comment = comments[NF_IP_TRACE_COMMENT_RULE];
298
299         xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
300                 if (get_chainname_rulenum(iter, e, hookname,
301                     &chainname, &comment, &rulenum) != 0)
302                         break;
303
304         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
305                       "TRACE: %s:%s:%s:%u ",
306                       tablename, chainname, comment, rulenum);
307 }
308 #endif
309
310 static inline __pure
311 struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
312 {
313         return (void *)entry + entry->next_offset;
314 }
315
316 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
317 unsigned int
318 ipt_do_table(struct sk_buff *skb,
319              unsigned int hook,
320              const struct net_device *in,
321              const struct net_device *out,
322              struct xt_table *table)
323 {
324         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
325         const struct iphdr *ip;
326         bool hotdrop = false;
327         /* Initializing verdict to NF_DROP keeps gcc happy. */
328         unsigned int verdict = NF_DROP;
329         const char *indev, *outdev;
330         const void *table_base;
331         struct ipt_entry *e, **jumpstack;
332         unsigned int *stackptr, origptr, cpu;
333         const struct xt_table_info *private;
334         struct xt_match_param mtpar;
335         struct xt_target_param tgpar;
336
337         /* Initialization */
338         ip = ip_hdr(skb);
339         indev = in ? in->name : nulldevname;
340         outdev = out ? out->name : nulldevname;
341         /* We handle fragments by dealing with the first fragment as
342          * if it was a normal packet.  All other fragments are treated
343          * normally, except that they will NEVER match rules that ask
344          * things we don't know, ie. tcp syn flag or ports).  If the
345          * rule is also a fragment-specific rule, non-fragments won't
346          * match it. */
347         mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
348         mtpar.thoff   = ip_hdrlen(skb);
349         mtpar.hotdrop = &hotdrop;
350         mtpar.in      = tgpar.in  = in;
351         mtpar.out     = tgpar.out = out;
352         mtpar.family  = tgpar.family = NFPROTO_IPV4;
353         mtpar.hooknum = tgpar.hooknum = hook;
354
355         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
356         xt_info_rdlock_bh();
357         private = table->private;
358         cpu        = smp_processor_id();
359         table_base = private->entries[cpu];
360         jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
361         stackptr   = &private->stackptr[cpu];
362         origptr    = *stackptr;
363
364         e = get_entry(table_base, private->hook_entry[hook]);
365
366         pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
367                  table->name, hook, origptr,
368                  get_entry(table_base, private->underflow[hook]));
369
370         do {
371                 const struct ipt_entry_target *t;
372                 const struct xt_entry_match *ematch;
373
374                 IP_NF_ASSERT(e);
375                 if (!ip_packet_match(ip, indev, outdev,
376                     &e->ip, mtpar.fragoff)) {
377  no_match:
378                         e = ipt_next_entry(e);
379                         continue;
380                 }
381
382                 xt_ematch_foreach(ematch, e)
383                         if (do_match(ematch, skb, &mtpar) != 0)
384                                 goto no_match;
385
386                 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
387
388                 t = ipt_get_target(e);
389                 IP_NF_ASSERT(t->u.kernel.target);
390
391 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
392     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
393                 /* The packet is traced: log it */
394                 if (unlikely(skb->nf_trace))
395                         trace_packet(skb, hook, in, out,
396                                      table->name, private, e);
397 #endif
398                 /* Standard target? */
399                 if (!t->u.kernel.target->target) {
400                         int v;
401
402                         v = ((struct ipt_standard_target *)t)->verdict;
403                         if (v < 0) {
404                                 /* Pop from stack? */
405                                 if (v != IPT_RETURN) {
406                                         verdict = (unsigned)(-v) - 1;
407                                         break;
408                                 }
409                                 if (*stackptr == 0) {
410                                         e = get_entry(table_base,
411                                             private->underflow[hook]);
412                                         pr_devel("Underflow (this is normal) "
413                                                  "to %p\n", e);
414                                 } else {
415                                         e = jumpstack[--*stackptr];
416                                         pr_devel("Pulled %p out from pos %u\n",
417                                                  e, *stackptr);
418                                         e = ipt_next_entry(e);
419                                 }
420                                 continue;
421                         }
422                         if (table_base + v != ipt_next_entry(e) &&
423                             !(e->ip.flags & IPT_F_GOTO)) {
424                                 if (*stackptr >= private->stacksize) {
425                                         verdict = NF_DROP;
426                                         break;
427                                 }
428                                 jumpstack[(*stackptr)++] = e;
429                                 pr_devel("Pushed %p into pos %u\n",
430                                          e, *stackptr - 1);
431                         }
432
433                         e = get_entry(table_base, v);
434                         continue;
435                 }
436
437                 /* Targets which reenter must return
438                    abs. verdicts */
439                 tgpar.target   = t->u.kernel.target;
440                 tgpar.targinfo = t->data;
441
442
443                 verdict = t->u.kernel.target->target(skb, &tgpar);
444                 /* Target might have changed stuff. */
445                 ip = ip_hdr(skb);
446                 if (verdict == IPT_CONTINUE)
447                         e = ipt_next_entry(e);
448                 else
449                         /* Verdict */
450                         break;
451         } while (!hotdrop);
452         xt_info_rdunlock_bh();
453         pr_devel("Exiting %s; resetting sp from %u to %u\n",
454                  __func__, *stackptr, origptr);
455         *stackptr = origptr;
456 #ifdef DEBUG_ALLOW_ALL
457         return NF_ACCEPT;
458 #else
459         if (hotdrop)
460                 return NF_DROP;
461         else return verdict;
462 #endif
463 }
464
465 /* Figures out from what hook each rule can be called: returns 0 if
466    there are loops.  Puts hook bitmask in comefrom. */
467 static int
468 mark_source_chains(const struct xt_table_info *newinfo,
469                    unsigned int valid_hooks, void *entry0)
470 {
471         unsigned int hook;
472
473         /* No recursion; use packet counter to save back ptrs (reset
474            to 0 as we leave), and comefrom to save source hook bitmask */
475         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
476                 unsigned int pos = newinfo->hook_entry[hook];
477                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
478
479                 if (!(valid_hooks & (1 << hook)))
480                         continue;
481
482                 /* Set initial back pointer. */
483                 e->counters.pcnt = pos;
484
485                 for (;;) {
486                         const struct ipt_standard_target *t
487                                 = (void *)ipt_get_target_c(e);
488                         int visited = e->comefrom & (1 << hook);
489
490                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
491                                 printk("iptables: loop hook %u pos %u %08X.\n",
492                                        hook, pos, e->comefrom);
493                                 return 0;
494                         }
495                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
496
497                         /* Unconditional return/END. */
498                         if ((e->target_offset == sizeof(struct ipt_entry) &&
499                              (strcmp(t->target.u.user.name,
500                                      IPT_STANDARD_TARGET) == 0) &&
501                              t->verdict < 0 && unconditional(&e->ip)) ||
502                             visited) {
503                                 unsigned int oldpos, size;
504
505                                 if ((strcmp(t->target.u.user.name,
506                                             IPT_STANDARD_TARGET) == 0) &&
507                                     t->verdict < -NF_MAX_VERDICT - 1) {
508                                         duprintf("mark_source_chains: bad "
509                                                 "negative verdict (%i)\n",
510                                                                 t->verdict);
511                                         return 0;
512                                 }
513
514                                 /* Return: backtrack through the last
515                                    big jump. */
516                                 do {
517                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
518 #ifdef DEBUG_IP_FIREWALL_USER
519                                         if (e->comefrom
520                                             & (1 << NF_INET_NUMHOOKS)) {
521                                                 duprintf("Back unset "
522                                                          "on hook %u "
523                                                          "rule %u\n",
524                                                          hook, pos);
525                                         }
526 #endif
527                                         oldpos = pos;
528                                         pos = e->counters.pcnt;
529                                         e->counters.pcnt = 0;
530
531                                         /* We're at the start. */
532                                         if (pos == oldpos)
533                                                 goto next;
534
535                                         e = (struct ipt_entry *)
536                                                 (entry0 + pos);
537                                 } while (oldpos == pos + e->next_offset);
538
539                                 /* Move along one */
540                                 size = e->next_offset;
541                                 e = (struct ipt_entry *)
542                                         (entry0 + pos + size);
543                                 e->counters.pcnt = pos;
544                                 pos += size;
545                         } else {
546                                 int newpos = t->verdict;
547
548                                 if (strcmp(t->target.u.user.name,
549                                            IPT_STANDARD_TARGET) == 0 &&
550                                     newpos >= 0) {
551                                         if (newpos > newinfo->size -
552                                                 sizeof(struct ipt_entry)) {
553                                                 duprintf("mark_source_chains: "
554                                                         "bad verdict (%i)\n",
555                                                                 newpos);
556                                                 return 0;
557                                         }
558                                         /* This a jump; chase it. */
559                                         duprintf("Jump rule %u -> %u\n",
560                                                  pos, newpos);
561                                 } else {
562                                         /* ... this is a fallthru */
563                                         newpos = pos + e->next_offset;
564                                 }
565                                 e = (struct ipt_entry *)
566                                         (entry0 + newpos);
567                                 e->counters.pcnt = pos;
568                                 pos = newpos;
569                         }
570                 }
571                 next:
572                 duprintf("Finished chain %u\n", hook);
573         }
574         return 1;
575 }
576
577 static void cleanup_match(struct ipt_entry_match *m, struct net *net)
578 {
579         struct xt_mtdtor_param par;
580
581         par.net       = net;
582         par.match     = m->u.kernel.match;
583         par.matchinfo = m->data;
584         par.family    = NFPROTO_IPV4;
585         if (par.match->destroy != NULL)
586                 par.match->destroy(&par);
587         module_put(par.match->me);
588 }
589
590 static int
591 check_entry(const struct ipt_entry *e, const char *name)
592 {
593         const struct ipt_entry_target *t;
594
595         if (!ip_checkentry(&e->ip)) {
596                 duprintf("ip check failed %p %s.\n", e, name);
597                 return -EINVAL;
598         }
599
600         if (e->target_offset + sizeof(struct ipt_entry_target) >
601             e->next_offset)
602                 return -EINVAL;
603
604         t = ipt_get_target_c(e);
605         if (e->target_offset + t->u.target_size > e->next_offset)
606                 return -EINVAL;
607
608         return 0;
609 }
610
611 static int
612 check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
613 {
614         const struct ipt_ip *ip = par->entryinfo;
615         int ret;
616
617         par->match     = m->u.kernel.match;
618         par->matchinfo = m->data;
619
620         ret = xt_check_match(par, m->u.match_size - sizeof(*m),
621               ip->proto, ip->invflags & IPT_INV_PROTO);
622         if (ret < 0) {
623                 duprintf("check failed for `%s'.\n", par.match->name);
624                 return ret;
625         }
626         return 0;
627 }
628
629 static int
630 find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
631 {
632         struct xt_match *match;
633         int ret;
634
635         match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
636                                       m->u.user.revision);
637         if (IS_ERR(match)) {
638                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
639                 return PTR_ERR(match);
640         }
641         m->u.kernel.match = match;
642
643         ret = check_match(m, par);
644         if (ret)
645                 goto err;
646
647         return 0;
648 err:
649         module_put(m->u.kernel.match->me);
650         return ret;
651 }
652
653 static int check_target(struct ipt_entry *e, struct net *net, const char *name)
654 {
655         struct ipt_entry_target *t = ipt_get_target(e);
656         struct xt_tgchk_param par = {
657                 .net       = net,
658                 .table     = name,
659                 .entryinfo = e,
660                 .target    = t->u.kernel.target,
661                 .targinfo  = t->data,
662                 .hook_mask = e->comefrom,
663                 .family    = NFPROTO_IPV4,
664         };
665         int ret;
666
667         ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
668               e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
669         if (ret < 0) {
670                 duprintf("check failed for `%s'.\n",
671                          t->u.kernel.target->name);
672                 return ret;
673         }
674         return 0;
675 }
676
677 static int
678 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
679                  unsigned int size)
680 {
681         struct ipt_entry_target *t;
682         struct xt_target *target;
683         int ret;
684         unsigned int j;
685         struct xt_mtchk_param mtpar;
686         struct xt_entry_match *ematch;
687
688         ret = check_entry(e, name);
689         if (ret)
690                 return ret;
691
692         j = 0;
693         mtpar.net       = net;
694         mtpar.table     = name;
695         mtpar.entryinfo = &e->ip;
696         mtpar.hook_mask = e->comefrom;
697         mtpar.family    = NFPROTO_IPV4;
698         xt_ematch_foreach(ematch, e) {
699                 ret = find_check_match(ematch, &mtpar);
700                 if (ret != 0)
701                         goto cleanup_matches;
702                 ++j;
703         }
704
705         t = ipt_get_target(e);
706         target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
707                                         t->u.user.revision);
708         if (IS_ERR(target)) {
709                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
710                 ret = PTR_ERR(target);
711                 goto cleanup_matches;
712         }
713         t->u.kernel.target = target;
714
715         ret = check_target(e, net, name);
716         if (ret)
717                 goto err;
718         return 0;
719  err:
720         module_put(t->u.kernel.target->me);
721  cleanup_matches:
722         xt_ematch_foreach(ematch, e) {
723                 if (j-- == 0)
724                         break;
725                 cleanup_match(ematch, net);
726         }
727         return ret;
728 }
729
730 static bool check_underflow(const struct ipt_entry *e)
731 {
732         const struct ipt_entry_target *t;
733         unsigned int verdict;
734
735         if (!unconditional(&e->ip))
736                 return false;
737         t = ipt_get_target_c(e);
738         if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
739                 return false;
740         verdict = ((struct ipt_standard_target *)t)->verdict;
741         verdict = -verdict - 1;
742         return verdict == NF_DROP || verdict == NF_ACCEPT;
743 }
744
745 static int
746 check_entry_size_and_hooks(struct ipt_entry *e,
747                            struct xt_table_info *newinfo,
748                            const unsigned char *base,
749                            const unsigned char *limit,
750                            const unsigned int *hook_entries,
751                            const unsigned int *underflows,
752                            unsigned int valid_hooks)
753 {
754         unsigned int h;
755
756         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
757             (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
758                 duprintf("Bad offset %p\n", e);
759                 return -EINVAL;
760         }
761
762         if (e->next_offset
763             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
764                 duprintf("checking: element %p size %u\n",
765                          e, e->next_offset);
766                 return -EINVAL;
767         }
768
769         /* Check hooks & underflows */
770         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
771                 if (!(valid_hooks & (1 << h)))
772                         continue;
773                 if ((unsigned char *)e - base == hook_entries[h])
774                         newinfo->hook_entry[h] = hook_entries[h];
775                 if ((unsigned char *)e - base == underflows[h]) {
776                         if (!check_underflow(e)) {
777                                 pr_err("Underflows must be unconditional and "
778                                        "use the STANDARD target with "
779                                        "ACCEPT/DROP\n");
780                                 return -EINVAL;
781                         }
782                         newinfo->underflow[h] = underflows[h];
783                 }
784         }
785
786         /* Clear counters and comefrom */
787         e->counters = ((struct xt_counters) { 0, 0 });
788         e->comefrom = 0;
789         return 0;
790 }
791
792 static void
793 cleanup_entry(struct ipt_entry *e, struct net *net)
794 {
795         struct xt_tgdtor_param par;
796         struct ipt_entry_target *t;
797         struct xt_entry_match *ematch;
798
799         /* Cleanup all matches */
800         xt_ematch_foreach(ematch, e)
801                 cleanup_match(ematch, net);
802         t = ipt_get_target(e);
803
804         par.net      = net;
805         par.target   = t->u.kernel.target;
806         par.targinfo = t->data;
807         par.family   = NFPROTO_IPV4;
808         if (par.target->destroy != NULL)
809                 par.target->destroy(&par);
810         module_put(par.target->me);
811 }
812
813 /* Checks and translates the user-supplied table segment (held in
814    newinfo) */
815 static int
816 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
817                 const struct ipt_replace *repl)
818 {
819         struct ipt_entry *iter;
820         unsigned int i;
821         int ret = 0;
822
823         newinfo->size = repl->size;
824         newinfo->number = repl->num_entries;
825
826         /* Init all hooks to impossible value. */
827         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
828                 newinfo->hook_entry[i] = 0xFFFFFFFF;
829                 newinfo->underflow[i] = 0xFFFFFFFF;
830         }
831
832         duprintf("translate_table: size %u\n", newinfo->size);
833         i = 0;
834         /* Walk through entries, checking offsets. */
835         xt_entry_foreach(iter, entry0, newinfo->size) {
836                 ret = check_entry_size_and_hooks(iter, newinfo, entry0,
837                                                  entry0 + repl->size,
838                                                  repl->hook_entry,
839                                                  repl->underflow,
840                                                  repl->valid_hooks);
841                 if (ret != 0)
842                         return ret;
843                 ++i;
844                 if (strcmp(ipt_get_target(iter)->u.user.name,
845                     XT_ERROR_TARGET) == 0)
846                         ++newinfo->stacksize;
847         }
848
849         if (i != repl->num_entries) {
850                 duprintf("translate_table: %u not %u entries\n",
851                          i, repl->num_entries);
852                 return -EINVAL;
853         }
854
855         /* Check hooks all assigned */
856         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
857                 /* Only hooks which are valid */
858                 if (!(repl->valid_hooks & (1 << i)))
859                         continue;
860                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
861                         duprintf("Invalid hook entry %u %u\n",
862                                  i, repl->hook_entry[i]);
863                         return -EINVAL;
864                 }
865                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
866                         duprintf("Invalid underflow %u %u\n",
867                                  i, repl->underflow[i]);
868                         return -EINVAL;
869                 }
870         }
871
872         if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
873                 return -ELOOP;
874
875         /* Finally, each sanity check must pass */
876         i = 0;
877         xt_entry_foreach(iter, entry0, newinfo->size) {
878                 ret = find_check_entry(iter, net, repl->name, repl->size);
879                 if (ret != 0)
880                         break;
881                 ++i;
882         }
883
884         if (ret != 0) {
885                 xt_entry_foreach(iter, entry0, newinfo->size) {
886                         if (i-- == 0)
887                                 break;
888                         cleanup_entry(iter, net);
889                 }
890                 return ret;
891         }
892
893         /* And one copy for every other CPU */
894         for_each_possible_cpu(i) {
895                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
896                         memcpy(newinfo->entries[i], entry0, newinfo->size);
897         }
898
899         return ret;
900 }
901
902 static void
903 get_counters(const struct xt_table_info *t,
904              struct xt_counters counters[])
905 {
906         struct ipt_entry *iter;
907         unsigned int cpu;
908         unsigned int i;
909         unsigned int curcpu;
910
911         /* Instead of clearing (by a previous call to memset())
912          * the counters and using adds, we set the counters
913          * with data used by 'current' CPU.
914          *
915          * Bottom half has to be disabled to prevent deadlock
916          * if new softirq were to run and call ipt_do_table
917          */
918         local_bh_disable();
919         curcpu = smp_processor_id();
920
921         i = 0;
922         xt_entry_foreach(iter, t->entries[curcpu], t->size) {
923                 SET_COUNTER(counters[i], iter->counters.bcnt,
924                             iter->counters.pcnt);
925                 ++i;
926         }
927
928         for_each_possible_cpu(cpu) {
929                 if (cpu == curcpu)
930                         continue;
931                 i = 0;
932                 xt_info_wrlock(cpu);
933                 xt_entry_foreach(iter, t->entries[cpu], t->size) {
934                         ADD_COUNTER(counters[i], iter->counters.bcnt,
935                                     iter->counters.pcnt);
936                         ++i; /* macro does multi eval of i */
937                 }
938                 xt_info_wrunlock(cpu);
939         }
940         local_bh_enable();
941 }
942
943 static struct xt_counters *alloc_counters(const struct xt_table *table)
944 {
945         unsigned int countersize;
946         struct xt_counters *counters;
947         const struct xt_table_info *private = table->private;
948
949         /* We need atomic snapshot of counters: rest doesn't change
950            (other than comefrom, which userspace doesn't care
951            about). */
952         countersize = sizeof(struct xt_counters) * private->number;
953         counters = vmalloc_node(countersize, numa_node_id());
954
955         if (counters == NULL)
956                 return ERR_PTR(-ENOMEM);
957
958         get_counters(private, counters);
959
960         return counters;
961 }
962
963 static int
964 copy_entries_to_user(unsigned int total_size,
965                      const struct xt_table *table,
966                      void __user *userptr)
967 {
968         unsigned int off, num;
969         const struct ipt_entry *e;
970         struct xt_counters *counters;
971         const struct xt_table_info *private = table->private;
972         int ret = 0;
973         const void *loc_cpu_entry;
974
975         counters = alloc_counters(table);
976         if (IS_ERR(counters))
977                 return PTR_ERR(counters);
978
979         /* choose the copy that is on our node/cpu, ...
980          * This choice is lazy (because current thread is
981          * allowed to migrate to another cpu)
982          */
983         loc_cpu_entry = private->entries[raw_smp_processor_id()];
984         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
985                 ret = -EFAULT;
986                 goto free_counters;
987         }
988
989         /* FIXME: use iterator macros --RR */
990         /* ... then go back and fix counters and names */
991         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
992                 unsigned int i;
993                 const struct ipt_entry_match *m;
994                 const struct ipt_entry_target *t;
995
996                 e = (struct ipt_entry *)(loc_cpu_entry + off);
997                 if (copy_to_user(userptr + off
998                                  + offsetof(struct ipt_entry, counters),
999                                  &counters[num],
1000                                  sizeof(counters[num])) != 0) {
1001                         ret = -EFAULT;
1002                         goto free_counters;
1003                 }
1004
1005                 for (i = sizeof(struct ipt_entry);
1006                      i < e->target_offset;
1007                      i += m->u.match_size) {
1008                         m = (void *)e + i;
1009
1010                         if (copy_to_user(userptr + off + i
1011                                          + offsetof(struct ipt_entry_match,
1012                                                     u.user.name),
1013                                          m->u.kernel.match->name,
1014                                          strlen(m->u.kernel.match->name)+1)
1015                             != 0) {
1016                                 ret = -EFAULT;
1017                                 goto free_counters;
1018                         }
1019                 }
1020
1021                 t = ipt_get_target_c(e);
1022                 if (copy_to_user(userptr + off + e->target_offset
1023                                  + offsetof(struct ipt_entry_target,
1024                                             u.user.name),
1025                                  t->u.kernel.target->name,
1026                                  strlen(t->u.kernel.target->name)+1) != 0) {
1027                         ret = -EFAULT;
1028                         goto free_counters;
1029                 }
1030         }
1031
1032  free_counters:
1033         vfree(counters);
1034         return ret;
1035 }
1036
1037 #ifdef CONFIG_COMPAT
1038 static void compat_standard_from_user(void *dst, const void *src)
1039 {
1040         int v = *(compat_int_t *)src;
1041
1042         if (v > 0)
1043                 v += xt_compat_calc_jump(AF_INET, v);
1044         memcpy(dst, &v, sizeof(v));
1045 }
1046
1047 static int compat_standard_to_user(void __user *dst, const void *src)
1048 {
1049         compat_int_t cv = *(int *)src;
1050
1051         if (cv > 0)
1052                 cv -= xt_compat_calc_jump(AF_INET, cv);
1053         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1054 }
1055
1056 static int compat_calc_entry(const struct ipt_entry *e,
1057                              const struct xt_table_info *info,
1058                              const void *base, struct xt_table_info *newinfo)
1059 {
1060         const struct xt_entry_match *ematch;
1061         const struct ipt_entry_target *t;
1062         unsigned int entry_offset;
1063         int off, i, ret;
1064
1065         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1066         entry_offset = (void *)e - base;
1067         xt_ematch_foreach(ematch, e)
1068                 off += xt_compat_match_offset(ematch->u.kernel.match);
1069         t = ipt_get_target_c(e);
1070         off += xt_compat_target_offset(t->u.kernel.target);
1071         newinfo->size -= off;
1072         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1073         if (ret)
1074                 return ret;
1075
1076         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1077                 if (info->hook_entry[i] &&
1078                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1079                         newinfo->hook_entry[i] -= off;
1080                 if (info->underflow[i] &&
1081                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1082                         newinfo->underflow[i] -= off;
1083         }
1084         return 0;
1085 }
1086
1087 static int compat_table_info(const struct xt_table_info *info,
1088                              struct xt_table_info *newinfo)
1089 {
1090         struct ipt_entry *iter;
1091         void *loc_cpu_entry;
1092         int ret;
1093
1094         if (!newinfo || !info)
1095                 return -EINVAL;
1096
1097         /* we dont care about newinfo->entries[] */
1098         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1099         newinfo->initial_entries = 0;
1100         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1101         xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1102                 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1103                 if (ret != 0)
1104                         return ret;
1105         }
1106         return 0;
1107 }
1108 #endif
1109
1110 static int get_info(struct net *net, void __user *user,
1111                     const int *len, int compat)
1112 {
1113         char name[IPT_TABLE_MAXNAMELEN];
1114         struct xt_table *t;
1115         int ret;
1116
1117         if (*len != sizeof(struct ipt_getinfo)) {
1118                 duprintf("length %u != %zu\n", *len,
1119                          sizeof(struct ipt_getinfo));
1120                 return -EINVAL;
1121         }
1122
1123         if (copy_from_user(name, user, sizeof(name)) != 0)
1124                 return -EFAULT;
1125
1126         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1127 #ifdef CONFIG_COMPAT
1128         if (compat)
1129                 xt_compat_lock(AF_INET);
1130 #endif
1131         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1132                                     "iptable_%s", name);
1133         if (t && !IS_ERR(t)) {
1134                 struct ipt_getinfo info;
1135                 const struct xt_table_info *private = t->private;
1136 #ifdef CONFIG_COMPAT
1137                 struct xt_table_info tmp;
1138
1139                 if (compat) {
1140                         ret = compat_table_info(private, &tmp);
1141                         xt_compat_flush_offsets(AF_INET);
1142                         private = &tmp;
1143                 }
1144 #endif
1145                 info.valid_hooks = t->valid_hooks;
1146                 memcpy(info.hook_entry, private->hook_entry,
1147                        sizeof(info.hook_entry));
1148                 memcpy(info.underflow, private->underflow,
1149                        sizeof(info.underflow));
1150                 info.num_entries = private->number;
1151                 info.size = private->size;
1152                 strcpy(info.name, name);
1153
1154                 if (copy_to_user(user, &info, *len) != 0)
1155                         ret = -EFAULT;
1156                 else
1157                         ret = 0;
1158
1159                 xt_table_unlock(t);
1160                 module_put(t->me);
1161         } else
1162                 ret = t ? PTR_ERR(t) : -ENOENT;
1163 #ifdef CONFIG_COMPAT
1164         if (compat)
1165                 xt_compat_unlock(AF_INET);
1166 #endif
1167         return ret;
1168 }
1169
1170 static int
1171 get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1172             const int *len)
1173 {
1174         int ret;
1175         struct ipt_get_entries get;
1176         struct xt_table *t;
1177
1178         if (*len < sizeof(get)) {
1179                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1180                 return -EINVAL;
1181         }
1182         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1183                 return -EFAULT;
1184         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1185                 duprintf("get_entries: %u != %zu\n",
1186                          *len, sizeof(get) + get.size);
1187                 return -EINVAL;
1188         }
1189
1190         t = xt_find_table_lock(net, AF_INET, get.name);
1191         if (t && !IS_ERR(t)) {
1192                 const struct xt_table_info *private = t->private;
1193                 duprintf("t->private->number = %u\n", private->number);
1194                 if (get.size == private->size)
1195                         ret = copy_entries_to_user(private->size,
1196                                                    t, uptr->entrytable);
1197                 else {
1198                         duprintf("get_entries: I've got %u not %u!\n",
1199                                  private->size, get.size);
1200                         ret = -EAGAIN;
1201                 }
1202                 module_put(t->me);
1203                 xt_table_unlock(t);
1204         } else
1205                 ret = t ? PTR_ERR(t) : -ENOENT;
1206
1207         return ret;
1208 }
1209
1210 static int
1211 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1212              struct xt_table_info *newinfo, unsigned int num_counters,
1213              void __user *counters_ptr)
1214 {
1215         int ret;
1216         struct xt_table *t;
1217         struct xt_table_info *oldinfo;
1218         struct xt_counters *counters;
1219         void *loc_cpu_old_entry;
1220         struct ipt_entry *iter;
1221
1222         ret = 0;
1223         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1224         if (!counters) {
1225                 ret = -ENOMEM;
1226                 goto out;
1227         }
1228
1229         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1230                                     "iptable_%s", name);
1231         if (!t || IS_ERR(t)) {
1232                 ret = t ? PTR_ERR(t) : -ENOENT;
1233                 goto free_newinfo_counters_untrans;
1234         }
1235
1236         /* You lied! */
1237         if (valid_hooks != t->valid_hooks) {
1238                 duprintf("Valid hook crap: %08X vs %08X\n",
1239                          valid_hooks, t->valid_hooks);
1240                 ret = -EINVAL;
1241                 goto put_module;
1242         }
1243
1244         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1245         if (!oldinfo)
1246                 goto put_module;
1247
1248         /* Update module usage count based on number of rules */
1249         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1250                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1251         if ((oldinfo->number > oldinfo->initial_entries) ||
1252             (newinfo->number <= oldinfo->initial_entries))
1253                 module_put(t->me);
1254         if ((oldinfo->number > oldinfo->initial_entries) &&
1255             (newinfo->number <= oldinfo->initial_entries))
1256                 module_put(t->me);
1257
1258         /* Get the old counters, and synchronize with replace */
1259         get_counters(oldinfo, counters);
1260
1261         /* Decrease module usage counts and free resource */
1262         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1263         xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
1264                 cleanup_entry(iter, net);
1265
1266         xt_free_table_info(oldinfo);
1267         if (copy_to_user(counters_ptr, counters,
1268                          sizeof(struct xt_counters) * num_counters) != 0)
1269                 ret = -EFAULT;
1270         vfree(counters);
1271         xt_table_unlock(t);
1272         return ret;
1273
1274  put_module:
1275         module_put(t->me);
1276         xt_table_unlock(t);
1277  free_newinfo_counters_untrans:
1278         vfree(counters);
1279  out:
1280         return ret;
1281 }
1282
1283 static int
1284 do_replace(struct net *net, const void __user *user, unsigned int len)
1285 {
1286         int ret;
1287         struct ipt_replace tmp;
1288         struct xt_table_info *newinfo;
1289         void *loc_cpu_entry;
1290         struct ipt_entry *iter;
1291
1292         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1293                 return -EFAULT;
1294
1295         /* overflow check */
1296         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1297                 return -ENOMEM;
1298
1299         newinfo = xt_alloc_table_info(tmp.size);
1300         if (!newinfo)
1301                 return -ENOMEM;
1302
1303         /* choose the copy that is on our node/cpu */
1304         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1305         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1306                            tmp.size) != 0) {
1307                 ret = -EFAULT;
1308                 goto free_newinfo;
1309         }
1310
1311         ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
1312         if (ret != 0)
1313                 goto free_newinfo;
1314
1315         duprintf("Translated table\n");
1316
1317         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1318                            tmp.num_counters, tmp.counters);
1319         if (ret)
1320                 goto free_newinfo_untrans;
1321         return 0;
1322
1323  free_newinfo_untrans:
1324         xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1325                 cleanup_entry(iter, net);
1326  free_newinfo:
1327         xt_free_table_info(newinfo);
1328         return ret;
1329 }
1330
1331 static int
1332 do_add_counters(struct net *net, const void __user *user,
1333                 unsigned int len, int compat)
1334 {
1335         unsigned int i, curcpu;
1336         struct xt_counters_info tmp;
1337         struct xt_counters *paddc;
1338         unsigned int num_counters;
1339         const char *name;
1340         int size;
1341         void *ptmp;
1342         struct xt_table *t;
1343         const struct xt_table_info *private;
1344         int ret = 0;
1345         void *loc_cpu_entry;
1346         struct ipt_entry *iter;
1347 #ifdef CONFIG_COMPAT
1348         struct compat_xt_counters_info compat_tmp;
1349
1350         if (compat) {
1351                 ptmp = &compat_tmp;
1352                 size = sizeof(struct compat_xt_counters_info);
1353         } else
1354 #endif
1355         {
1356                 ptmp = &tmp;
1357                 size = sizeof(struct xt_counters_info);
1358         }
1359
1360         if (copy_from_user(ptmp, user, size) != 0)
1361                 return -EFAULT;
1362
1363 #ifdef CONFIG_COMPAT
1364         if (compat) {
1365                 num_counters = compat_tmp.num_counters;
1366                 name = compat_tmp.name;
1367         } else
1368 #endif
1369         {
1370                 num_counters = tmp.num_counters;
1371                 name = tmp.name;
1372         }
1373
1374         if (len != size + num_counters * sizeof(struct xt_counters))
1375                 return -EINVAL;
1376
1377         paddc = vmalloc_node(len - size, numa_node_id());
1378         if (!paddc)
1379                 return -ENOMEM;
1380
1381         if (copy_from_user(paddc, user + size, len - size) != 0) {
1382                 ret = -EFAULT;
1383                 goto free;
1384         }
1385
1386         t = xt_find_table_lock(net, AF_INET, name);
1387         if (!t || IS_ERR(t)) {
1388                 ret = t ? PTR_ERR(t) : -ENOENT;
1389                 goto free;
1390         }
1391
1392         local_bh_disable();
1393         private = t->private;
1394         if (private->number != num_counters) {
1395                 ret = -EINVAL;
1396                 goto unlock_up_free;
1397         }
1398
1399         i = 0;
1400         /* Choose the copy that is on our node */
1401         curcpu = smp_processor_id();
1402         loc_cpu_entry = private->entries[curcpu];
1403         xt_info_wrlock(curcpu);
1404         xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1405                 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1406                 ++i;
1407         }
1408         xt_info_wrunlock(curcpu);
1409  unlock_up_free:
1410         local_bh_enable();
1411         xt_table_unlock(t);
1412         module_put(t->me);
1413  free:
1414         vfree(paddc);
1415
1416         return ret;
1417 }
1418
1419 #ifdef CONFIG_COMPAT
1420 struct compat_ipt_replace {
1421         char                    name[IPT_TABLE_MAXNAMELEN];
1422         u32                     valid_hooks;
1423         u32                     num_entries;
1424         u32                     size;
1425         u32                     hook_entry[NF_INET_NUMHOOKS];
1426         u32                     underflow[NF_INET_NUMHOOKS];
1427         u32                     num_counters;
1428         compat_uptr_t           counters;       /* struct ipt_counters * */
1429         struct compat_ipt_entry entries[0];
1430 };
1431
1432 static int
1433 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1434                           unsigned int *size, struct xt_counters *counters,
1435                           unsigned int i)
1436 {
1437         struct ipt_entry_target *t;
1438         struct compat_ipt_entry __user *ce;
1439         u_int16_t target_offset, next_offset;
1440         compat_uint_t origsize;
1441         const struct xt_entry_match *ematch;
1442         int ret = 0;
1443
1444         origsize = *size;
1445         ce = (struct compat_ipt_entry __user *)*dstptr;
1446         if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
1447             copy_to_user(&ce->counters, &counters[i],
1448             sizeof(counters[i])) != 0)
1449                 return -EFAULT;
1450
1451         *dstptr += sizeof(struct compat_ipt_entry);
1452         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1453
1454         xt_ematch_foreach(ematch, e) {
1455                 ret = xt_compat_match_to_user(ematch, dstptr, size);
1456                 if (ret != 0)
1457                         return ret;
1458         }
1459         target_offset = e->target_offset - (origsize - *size);
1460         t = ipt_get_target(e);
1461         ret = xt_compat_target_to_user(t, dstptr, size);
1462         if (ret)
1463                 return ret;
1464         next_offset = e->next_offset - (origsize - *size);
1465         if (put_user(target_offset, &ce->target_offset) != 0 ||
1466             put_user(next_offset, &ce->next_offset) != 0)
1467                 return -EFAULT;
1468         return 0;
1469 }
1470
1471 static int
1472 compat_find_calc_match(struct ipt_entry_match *m,
1473                        const char *name,
1474                        const struct ipt_ip *ip,
1475                        unsigned int hookmask,
1476                        int *size)
1477 {
1478         struct xt_match *match;
1479
1480         match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
1481                                       m->u.user.revision);
1482         if (IS_ERR(match)) {
1483                 duprintf("compat_check_calc_match: `%s' not found\n",
1484                          m->u.user.name);
1485                 return PTR_ERR(match);
1486         }
1487         m->u.kernel.match = match;
1488         *size += xt_compat_match_offset(match);
1489         return 0;
1490 }
1491
1492 static void compat_release_entry(struct compat_ipt_entry *e)
1493 {
1494         struct ipt_entry_target *t;
1495         struct xt_entry_match *ematch;
1496
1497         /* Cleanup all matches */
1498         xt_ematch_foreach(ematch, e)
1499                 module_put(ematch->u.kernel.match->me);
1500         t = compat_ipt_get_target(e);
1501         module_put(t->u.kernel.target->me);
1502 }
1503
1504 static int
1505 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1506                                   struct xt_table_info *newinfo,
1507                                   unsigned int *size,
1508                                   const unsigned char *base,
1509                                   const unsigned char *limit,
1510                                   const unsigned int *hook_entries,
1511                                   const unsigned int *underflows,
1512                                   const char *name)
1513 {
1514         struct xt_entry_match *ematch;
1515         struct ipt_entry_target *t;
1516         struct xt_target *target;
1517         unsigned int entry_offset;
1518         unsigned int j;
1519         int ret, off, h;
1520
1521         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1522         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1523             (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1524                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1525                 return -EINVAL;
1526         }
1527
1528         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1529                              sizeof(struct compat_xt_entry_target)) {
1530                 duprintf("checking: element %p size %u\n",
1531                          e, e->next_offset);
1532                 return -EINVAL;
1533         }
1534
1535         /* For purposes of check_entry casting the compat entry is fine */
1536         ret = check_entry((struct ipt_entry *)e, name);
1537         if (ret)
1538                 return ret;
1539
1540         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1541         entry_offset = (void *)e - (void *)base;
1542         j = 0;
1543         xt_ematch_foreach(ematch, e) {
1544                 ret = compat_find_calc_match(ematch, name,
1545                                              &e->ip, e->comefrom, &off);
1546                 if (ret != 0)
1547                         goto release_matches;
1548                 ++j;
1549         }
1550
1551         t = compat_ipt_get_target(e);
1552         target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
1553                                         t->u.user.revision);
1554         if (IS_ERR(target)) {
1555                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1556                          t->u.user.name);
1557                 ret = PTR_ERR(target);
1558                 goto release_matches;
1559         }
1560         t->u.kernel.target = target;
1561
1562         off += xt_compat_target_offset(target);
1563         *size += off;
1564         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1565         if (ret)
1566                 goto out;
1567
1568         /* Check hooks & underflows */
1569         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1570                 if ((unsigned char *)e - base == hook_entries[h])
1571                         newinfo->hook_entry[h] = hook_entries[h];
1572                 if ((unsigned char *)e - base == underflows[h])
1573                         newinfo->underflow[h] = underflows[h];
1574         }
1575
1576         /* Clear counters and comefrom */
1577         memset(&e->counters, 0, sizeof(e->counters));
1578         e->comefrom = 0;
1579         return 0;
1580
1581 out:
1582         module_put(t->u.kernel.target->me);
1583 release_matches:
1584         xt_ematch_foreach(ematch, e) {
1585                 if (j-- == 0)
1586                         break;
1587                 module_put(ematch->u.kernel.match->me);
1588         }
1589         return ret;
1590 }
1591
1592 static int
1593 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1594                             unsigned int *size, const char *name,
1595                             struct xt_table_info *newinfo, unsigned char *base)
1596 {
1597         struct ipt_entry_target *t;
1598         struct xt_target *target;
1599         struct ipt_entry *de;
1600         unsigned int origsize;
1601         int ret, h;
1602         struct xt_entry_match *ematch;
1603
1604         ret = 0;
1605         origsize = *size;
1606         de = (struct ipt_entry *)*dstptr;
1607         memcpy(de, e, sizeof(struct ipt_entry));
1608         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1609
1610         *dstptr += sizeof(struct ipt_entry);
1611         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1612
1613         xt_ematch_foreach(ematch, e) {
1614                 ret = xt_compat_match_from_user(ematch, dstptr, size);
1615                 if (ret != 0)
1616                         return ret;
1617         }
1618         de->target_offset = e->target_offset - (origsize - *size);
1619         t = compat_ipt_get_target(e);
1620         target = t->u.kernel.target;
1621         xt_compat_target_from_user(t, dstptr, size);
1622
1623         de->next_offset = e->next_offset - (origsize - *size);
1624         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1625                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1626                         newinfo->hook_entry[h] -= origsize - *size;
1627                 if ((unsigned char *)de - base < newinfo->underflow[h])
1628                         newinfo->underflow[h] -= origsize - *size;
1629         }
1630         return ret;
1631 }
1632
1633 static int
1634 compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
1635 {
1636         struct xt_entry_match *ematch;
1637         struct xt_mtchk_param mtpar;
1638         unsigned int j;
1639         int ret = 0;
1640
1641         j = 0;
1642         mtpar.net       = net;
1643         mtpar.table     = name;
1644         mtpar.entryinfo = &e->ip;
1645         mtpar.hook_mask = e->comefrom;
1646         mtpar.family    = NFPROTO_IPV4;
1647         xt_ematch_foreach(ematch, e) {
1648                 ret = check_match(ematch, &mtpar);
1649                 if (ret != 0)
1650                         goto cleanup_matches;
1651                 ++j;
1652         }
1653
1654         ret = check_target(e, net, name);
1655         if (ret)
1656                 goto cleanup_matches;
1657         return 0;
1658
1659  cleanup_matches:
1660         xt_ematch_foreach(ematch, e) {
1661                 if (j-- == 0)
1662                         break;
1663                 cleanup_match(ematch, net);
1664         }
1665         return ret;
1666 }
1667
1668 static int
1669 translate_compat_table(struct net *net,
1670                        const char *name,
1671                        unsigned int valid_hooks,
1672                        struct xt_table_info **pinfo,
1673                        void **pentry0,
1674                        unsigned int total_size,
1675                        unsigned int number,
1676                        unsigned int *hook_entries,
1677                        unsigned int *underflows)
1678 {
1679         unsigned int i, j;
1680         struct xt_table_info *newinfo, *info;
1681         void *pos, *entry0, *entry1;
1682         struct compat_ipt_entry *iter0;
1683         struct ipt_entry *iter1;
1684         unsigned int size;
1685         int ret;
1686
1687         info = *pinfo;
1688         entry0 = *pentry0;
1689         size = total_size;
1690         info->number = number;
1691
1692         /* Init all hooks to impossible value. */
1693         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1694                 info->hook_entry[i] = 0xFFFFFFFF;
1695                 info->underflow[i] = 0xFFFFFFFF;
1696         }
1697
1698         duprintf("translate_compat_table: size %u\n", info->size);
1699         j = 0;
1700         xt_compat_lock(AF_INET);
1701         /* Walk through entries, checking offsets. */
1702         xt_entry_foreach(iter0, entry0, total_size) {
1703                 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
1704                                                         entry0,
1705                                                         entry0 + total_size,
1706                                                         hook_entries,
1707                                                         underflows,
1708                                                         name);
1709                 if (ret != 0)
1710                         goto out_unlock;
1711                 ++j;
1712         }
1713
1714         ret = -EINVAL;
1715         if (j != number) {
1716                 duprintf("translate_compat_table: %u not %u entries\n",
1717                          j, number);
1718                 goto out_unlock;
1719         }
1720
1721         /* Check hooks all assigned */
1722         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1723                 /* Only hooks which are valid */
1724                 if (!(valid_hooks & (1 << i)))
1725                         continue;
1726                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1727                         duprintf("Invalid hook entry %u %u\n",
1728                                  i, hook_entries[i]);
1729                         goto out_unlock;
1730                 }
1731                 if (info->underflow[i] == 0xFFFFFFFF) {
1732                         duprintf("Invalid underflow %u %u\n",
1733                                  i, underflows[i]);
1734                         goto out_unlock;
1735                 }
1736         }
1737
1738         ret = -ENOMEM;
1739         newinfo = xt_alloc_table_info(size);
1740         if (!newinfo)
1741                 goto out_unlock;
1742
1743         newinfo->number = number;
1744         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1745                 newinfo->hook_entry[i] = info->hook_entry[i];
1746                 newinfo->underflow[i] = info->underflow[i];
1747         }
1748         entry1 = newinfo->entries[raw_smp_processor_id()];
1749         pos = entry1;
1750         size = total_size;
1751         xt_entry_foreach(iter0, entry0, total_size) {
1752                 ret = compat_copy_entry_from_user(iter0, &pos, &size,
1753                                                   name, newinfo, entry1);
1754                 if (ret != 0)
1755                         break;
1756         }
1757         xt_compat_flush_offsets(AF_INET);
1758         xt_compat_unlock(AF_INET);
1759         if (ret)
1760                 goto free_newinfo;
1761
1762         ret = -ELOOP;
1763         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1764                 goto free_newinfo;
1765
1766         i = 0;
1767         xt_entry_foreach(iter1, entry1, newinfo->size) {
1768                 ret = compat_check_entry(iter1, net, name);
1769                 if (ret != 0)
1770                         break;
1771                 ++i;
1772         }
1773         if (ret) {
1774                 /*
1775                  * The first i matches need cleanup_entry (calls ->destroy)
1776                  * because they had called ->check already. The other j-i
1777                  * entries need only release.
1778                  */
1779                 int skip = i;
1780                 j -= i;
1781                 xt_entry_foreach(iter0, entry0, newinfo->size) {
1782                         if (skip-- > 0)
1783                                 continue;
1784                         if (j-- == 0)
1785                                 break;
1786                         compat_release_entry(iter0);
1787                 }
1788                 xt_entry_foreach(iter1, entry1, newinfo->size) {
1789                         if (i-- == 0)
1790                                 break;
1791                         cleanup_entry(iter1, net);
1792                 }
1793                 xt_free_table_info(newinfo);
1794                 return ret;
1795         }
1796
1797         /* And one copy for every other CPU */
1798         for_each_possible_cpu(i)
1799                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1800                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1801
1802         *pinfo = newinfo;
1803         *pentry0 = entry1;
1804         xt_free_table_info(info);
1805         return 0;
1806
1807 free_newinfo:
1808         xt_free_table_info(newinfo);
1809 out:
1810         xt_entry_foreach(iter0, entry0, total_size) {
1811                 if (j-- == 0)
1812                         break;
1813                 compat_release_entry(iter0);
1814         }
1815         return ret;
1816 out_unlock:
1817         xt_compat_flush_offsets(AF_INET);
1818         xt_compat_unlock(AF_INET);
1819         goto out;
1820 }
1821
1822 static int
1823 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1824 {
1825         int ret;
1826         struct compat_ipt_replace tmp;
1827         struct xt_table_info *newinfo;
1828         void *loc_cpu_entry;
1829         struct ipt_entry *iter;
1830
1831         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1832                 return -EFAULT;
1833
1834         /* overflow check */
1835         if (tmp.size >= INT_MAX / num_possible_cpus())
1836                 return -ENOMEM;
1837         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1838                 return -ENOMEM;
1839
1840         newinfo = xt_alloc_table_info(tmp.size);
1841         if (!newinfo)
1842                 return -ENOMEM;
1843
1844         /* choose the copy that is on our node/cpu */
1845         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1846         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1847                            tmp.size) != 0) {
1848                 ret = -EFAULT;
1849                 goto free_newinfo;
1850         }
1851
1852         ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
1853                                      &newinfo, &loc_cpu_entry, tmp.size,
1854                                      tmp.num_entries, tmp.hook_entry,
1855                                      tmp.underflow);
1856         if (ret != 0)
1857                 goto free_newinfo;
1858
1859         duprintf("compat_do_replace: Translated table\n");
1860
1861         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1862                            tmp.num_counters, compat_ptr(tmp.counters));
1863         if (ret)
1864                 goto free_newinfo_untrans;
1865         return 0;
1866
1867  free_newinfo_untrans:
1868         xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1869                 cleanup_entry(iter, net);
1870  free_newinfo:
1871         xt_free_table_info(newinfo);
1872         return ret;
1873 }
1874
1875 static int
1876 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1877                       unsigned int len)
1878 {
1879         int ret;
1880
1881         if (!capable(CAP_NET_ADMIN))
1882                 return -EPERM;
1883
1884         switch (cmd) {
1885         case IPT_SO_SET_REPLACE:
1886                 ret = compat_do_replace(sock_net(sk), user, len);
1887                 break;
1888
1889         case IPT_SO_SET_ADD_COUNTERS:
1890                 ret = do_add_counters(sock_net(sk), user, len, 1);
1891                 break;
1892
1893         default:
1894                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1895                 ret = -EINVAL;
1896         }
1897
1898         return ret;
1899 }
1900
1901 struct compat_ipt_get_entries {
1902         char name[IPT_TABLE_MAXNAMELEN];
1903         compat_uint_t size;
1904         struct compat_ipt_entry entrytable[0];
1905 };
1906
1907 static int
1908 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1909                             void __user *userptr)
1910 {
1911         struct xt_counters *counters;
1912         const struct xt_table_info *private = table->private;
1913         void __user *pos;
1914         unsigned int size;
1915         int ret = 0;
1916         const void *loc_cpu_entry;
1917         unsigned int i = 0;
1918         struct ipt_entry *iter;
1919
1920         counters = alloc_counters(table);
1921         if (IS_ERR(counters))
1922                 return PTR_ERR(counters);
1923
1924         /* choose the copy that is on our node/cpu, ...
1925          * This choice is lazy (because current thread is
1926          * allowed to migrate to another cpu)
1927          */
1928         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1929         pos = userptr;
1930         size = total_size;
1931         xt_entry_foreach(iter, loc_cpu_entry, total_size) {
1932                 ret = compat_copy_entry_to_user(iter, &pos,
1933                                                 &size, counters, i++);
1934                 if (ret != 0)
1935                         break;
1936         }
1937
1938         vfree(counters);
1939         return ret;
1940 }
1941
1942 static int
1943 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1944                    int *len)
1945 {
1946         int ret;
1947         struct compat_ipt_get_entries get;
1948         struct xt_table *t;
1949
1950         if (*len < sizeof(get)) {
1951                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1952                 return -EINVAL;
1953         }
1954
1955         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1956                 return -EFAULT;
1957
1958         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1959                 duprintf("compat_get_entries: %u != %zu\n",
1960                          *len, sizeof(get) + get.size);
1961                 return -EINVAL;
1962         }
1963
1964         xt_compat_lock(AF_INET);
1965         t = xt_find_table_lock(net, AF_INET, get.name);
1966         if (t && !IS_ERR(t)) {
1967                 const struct xt_table_info *private = t->private;
1968                 struct xt_table_info info;
1969                 duprintf("t->private->number = %u\n", private->number);
1970                 ret = compat_table_info(private, &info);
1971                 if (!ret && get.size == info.size) {
1972                         ret = compat_copy_entries_to_user(private->size,
1973                                                           t, uptr->entrytable);
1974                 } else if (!ret) {
1975                         duprintf("compat_get_entries: I've got %u not %u!\n",
1976                                  private->size, get.size);
1977                         ret = -EAGAIN;
1978                 }
1979                 xt_compat_flush_offsets(AF_INET);
1980                 module_put(t->me);
1981                 xt_table_unlock(t);
1982         } else
1983                 ret = t ? PTR_ERR(t) : -ENOENT;
1984
1985         xt_compat_unlock(AF_INET);
1986         return ret;
1987 }
1988
1989 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1990
1991 static int
1992 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1993 {
1994         int ret;
1995
1996         if (!capable(CAP_NET_ADMIN))
1997                 return -EPERM;
1998
1999         switch (cmd) {
2000         case IPT_SO_GET_INFO:
2001                 ret = get_info(sock_net(sk), user, len, 1);
2002                 break;
2003         case IPT_SO_GET_ENTRIES:
2004                 ret = compat_get_entries(sock_net(sk), user, len);
2005                 break;
2006         default:
2007                 ret = do_ipt_get_ctl(sk, cmd, user, len);
2008         }
2009         return ret;
2010 }
2011 #endif
2012
2013 static int
2014 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2015 {
2016         int ret;
2017
2018         if (!capable(CAP_NET_ADMIN))
2019                 return -EPERM;
2020
2021         switch (cmd) {
2022         case IPT_SO_SET_REPLACE:
2023                 ret = do_replace(sock_net(sk), user, len);
2024                 break;
2025
2026         case IPT_SO_SET_ADD_COUNTERS:
2027                 ret = do_add_counters(sock_net(sk), user, len, 0);
2028                 break;
2029
2030         default:
2031                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
2032                 ret = -EINVAL;
2033         }
2034
2035         return ret;
2036 }
2037
2038 static int
2039 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2040 {
2041         int ret;
2042
2043         if (!capable(CAP_NET_ADMIN))
2044                 return -EPERM;
2045
2046         switch (cmd) {
2047         case IPT_SO_GET_INFO:
2048                 ret = get_info(sock_net(sk), user, len, 0);
2049                 break;
2050
2051         case IPT_SO_GET_ENTRIES:
2052                 ret = get_entries(sock_net(sk), user, len);
2053                 break;
2054
2055         case IPT_SO_GET_REVISION_MATCH:
2056         case IPT_SO_GET_REVISION_TARGET: {
2057                 struct ipt_get_revision rev;
2058                 int target;
2059
2060                 if (*len != sizeof(rev)) {
2061                         ret = -EINVAL;
2062                         break;
2063                 }
2064                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2065                         ret = -EFAULT;
2066                         break;
2067                 }
2068
2069                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2070                         target = 1;
2071                 else
2072                         target = 0;
2073
2074                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2075                                                          rev.revision,
2076                                                          target, &ret),
2077                                         "ipt_%s", rev.name);
2078                 break;
2079         }
2080
2081         default:
2082                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2083                 ret = -EINVAL;
2084         }
2085
2086         return ret;
2087 }
2088
2089 struct xt_table *ipt_register_table(struct net *net,
2090                                     const struct xt_table *table,
2091                                     const struct ipt_replace *repl)
2092 {
2093         int ret;
2094         struct xt_table_info *newinfo;
2095         struct xt_table_info bootstrap = {0};
2096         void *loc_cpu_entry;
2097         struct xt_table *new_table;
2098
2099         newinfo = xt_alloc_table_info(repl->size);
2100         if (!newinfo) {
2101                 ret = -ENOMEM;
2102                 goto out;
2103         }
2104
2105         /* choose the copy on our node/cpu, but dont care about preemption */
2106         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2107         memcpy(loc_cpu_entry, repl->entries, repl->size);
2108
2109         ret = translate_table(net, newinfo, loc_cpu_entry, repl);
2110         if (ret != 0)
2111                 goto out_free;
2112
2113         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2114         if (IS_ERR(new_table)) {
2115                 ret = PTR_ERR(new_table);
2116                 goto out_free;
2117         }
2118
2119         return new_table;
2120
2121 out_free:
2122         xt_free_table_info(newinfo);
2123 out:
2124         return ERR_PTR(ret);
2125 }
2126
2127 void ipt_unregister_table(struct net *net, struct xt_table *table)
2128 {
2129         struct xt_table_info *private;
2130         void *loc_cpu_entry;
2131         struct module *table_owner = table->me;
2132         struct ipt_entry *iter;
2133
2134         private = xt_unregister_table(table);
2135
2136         /* Decrease module usage counts and free resources */
2137         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2138         xt_entry_foreach(iter, loc_cpu_entry, private->size)
2139                 cleanup_entry(iter, net);
2140         if (private->number > private->initial_entries)
2141                 module_put(table_owner);
2142         xt_free_table_info(private);
2143 }
2144
2145 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2146 static inline bool
2147 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2148                      u_int8_t type, u_int8_t code,
2149                      bool invert)
2150 {
2151         return ((test_type == 0xFF) ||
2152                 (type == test_type && code >= min_code && code <= max_code))
2153                 ^ invert;
2154 }
2155
2156 static bool
2157 icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2158 {
2159         const struct icmphdr *ic;
2160         struct icmphdr _icmph;
2161         const struct ipt_icmp *icmpinfo = par->matchinfo;
2162
2163         /* Must not be a fragment. */
2164         if (par->fragoff != 0)
2165                 return false;
2166
2167         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
2168         if (ic == NULL) {
2169                 /* We've been asked to examine this packet, and we
2170                  * can't.  Hence, no choice but to drop.
2171                  */
2172                 duprintf("Dropping evil ICMP tinygram.\n");
2173                 *par->hotdrop = true;
2174                 return false;
2175         }
2176
2177         return icmp_type_code_match(icmpinfo->type,
2178                                     icmpinfo->code[0],
2179                                     icmpinfo->code[1],
2180                                     ic->type, ic->code,
2181                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2182 }
2183
2184 static int icmp_checkentry(const struct xt_mtchk_param *par)
2185 {
2186         const struct ipt_icmp *icmpinfo = par->matchinfo;
2187
2188         /* Must specify no unknown invflags */
2189         return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
2190 }
2191
2192 /* The built-in targets: standard (NULL) and error. */
2193 static struct xt_target ipt_standard_target __read_mostly = {
2194         .name           = IPT_STANDARD_TARGET,
2195         .targetsize     = sizeof(int),
2196         .family         = NFPROTO_IPV4,
2197 #ifdef CONFIG_COMPAT
2198         .compatsize     = sizeof(compat_int_t),
2199         .compat_from_user = compat_standard_from_user,
2200         .compat_to_user = compat_standard_to_user,
2201 #endif
2202 };
2203
2204 static struct xt_target ipt_error_target __read_mostly = {
2205         .name           = IPT_ERROR_TARGET,
2206         .target         = ipt_error,
2207         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2208         .family         = NFPROTO_IPV4,
2209 };
2210
2211 static struct nf_sockopt_ops ipt_sockopts = {
2212         .pf             = PF_INET,
2213         .set_optmin     = IPT_BASE_CTL,
2214         .set_optmax     = IPT_SO_SET_MAX+1,
2215         .set            = do_ipt_set_ctl,
2216 #ifdef CONFIG_COMPAT
2217         .compat_set     = compat_do_ipt_set_ctl,
2218 #endif
2219         .get_optmin     = IPT_BASE_CTL,
2220         .get_optmax     = IPT_SO_GET_MAX+1,
2221         .get            = do_ipt_get_ctl,
2222 #ifdef CONFIG_COMPAT
2223         .compat_get     = compat_do_ipt_get_ctl,
2224 #endif
2225         .owner          = THIS_MODULE,
2226 };
2227
2228 static struct xt_match icmp_matchstruct __read_mostly = {
2229         .name           = "icmp",
2230         .match          = icmp_match,
2231         .matchsize      = sizeof(struct ipt_icmp),
2232         .checkentry     = icmp_checkentry,
2233         .proto          = IPPROTO_ICMP,
2234         .family         = NFPROTO_IPV4,
2235 };
2236
2237 static int __net_init ip_tables_net_init(struct net *net)
2238 {
2239         return xt_proto_init(net, NFPROTO_IPV4);
2240 }
2241
2242 static void __net_exit ip_tables_net_exit(struct net *net)
2243 {
2244         xt_proto_fini(net, NFPROTO_IPV4);
2245 }
2246
2247 static struct pernet_operations ip_tables_net_ops = {
2248         .init = ip_tables_net_init,
2249         .exit = ip_tables_net_exit,
2250 };
2251
2252 static int __init ip_tables_init(void)
2253 {
2254         int ret;
2255
2256         ret = register_pernet_subsys(&ip_tables_net_ops);
2257         if (ret < 0)
2258                 goto err1;
2259
2260         /* Noone else will be downing sem now, so we won't sleep */
2261         ret = xt_register_target(&ipt_standard_target);
2262         if (ret < 0)
2263                 goto err2;
2264         ret = xt_register_target(&ipt_error_target);
2265         if (ret < 0)
2266                 goto err3;
2267         ret = xt_register_match(&icmp_matchstruct);
2268         if (ret < 0)
2269                 goto err4;
2270
2271         /* Register setsockopt */
2272         ret = nf_register_sockopt(&ipt_sockopts);
2273         if (ret < 0)
2274                 goto err5;
2275
2276         pr_info("(C) 2000-2006 Netfilter Core Team\n");
2277         return 0;
2278
2279 err5:
2280         xt_unregister_match(&icmp_matchstruct);
2281 err4:
2282         xt_unregister_target(&ipt_error_target);
2283 err3:
2284         xt_unregister_target(&ipt_standard_target);
2285 err2:
2286         unregister_pernet_subsys(&ip_tables_net_ops);
2287 err1:
2288         return ret;
2289 }
2290
2291 static void __exit ip_tables_fini(void)
2292 {
2293         nf_unregister_sockopt(&ipt_sockopts);
2294
2295         xt_unregister_match(&icmp_matchstruct);
2296         xt_unregister_target(&ipt_error_target);
2297         xt_unregister_target(&ipt_standard_target);
2298
2299         unregister_pernet_subsys(&ip_tables_net_ops);
2300 }
2301
2302 EXPORT_SYMBOL(ipt_register_table);
2303 EXPORT_SYMBOL(ipt_unregister_table);
2304 EXPORT_SYMBOL(ipt_do_table);
2305 module_init(ip_tables_init);
2306 module_exit(ip_tables_fini);