]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/netfilter/ip_tables.c
netfilter: xtables: add struct xt_mtdtor_param::net
[net-next-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 #include <linux/cache.h>
13 #include <linux/capability.h>
14 #include <linux/skbuff.h>
15 #include <linux/kmod.h>
16 #include <linux/vmalloc.h>
17 #include <linux/netdevice.h>
18 #include <linux/module.h>
19 #include <linux/icmp.h>
20 #include <net/ip.h>
21 #include <net/compat.h>
22 #include <asm/uaccess.h>
23 #include <linux/mutex.h>
24 #include <linux/proc_fs.h>
25 #include <linux/err.h>
26 #include <linux/cpumask.h>
27
28 #include <linux/netfilter/x_tables.h>
29 #include <linux/netfilter_ipv4/ip_tables.h>
30 #include <net/netfilter/nf_log.h>
31
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
35
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
39
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...)  printk(format , ## args)
42 #else
43 #define dprintf(format, args...)
44 #endif
45
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
48 #else
49 #define duprintf(format, args...)
50 #endif
51
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x)                                         \
54 do {                                                            \
55         if (!(x))                                               \
56                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
57                        __func__, __FILE__, __LINE__);   \
58 } while(0)
59 #else
60 #define IP_NF_ASSERT(x)
61 #endif
62
63 #if 0
64 /* All the better to debug you with... */
65 #define static
66 #define inline
67 #endif
68
69 /*
70    We keep a set of rules for each CPU, so we can avoid write-locking
71    them in the softirq when updating the counters and therefore
72    only need to read-lock in the softirq; doing a write_lock_bh() in user
73    context stops packets coming through and allows user context to read
74    the counters or update the rules.
75
76    Hence the start of any table is given by get_table() below.  */
77
78 /* Returns whether matches rule or not. */
79 /* Performance critical - called for every packet */
80 static inline bool
81 ip_packet_match(const struct iphdr *ip,
82                 const char *indev,
83                 const char *outdev,
84                 const struct ipt_ip *ipinfo,
85                 int isfrag)
86 {
87         unsigned long ret;
88
89 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
90
91         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
92                   IPT_INV_SRCIP) ||
93             FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
94                   IPT_INV_DSTIP)) {
95                 dprintf("Source or dest mismatch.\n");
96
97                 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
98                         &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
99                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
100                 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
101                         &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
102                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
103                 return false;
104         }
105
106         ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
107
108         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
109                 dprintf("VIA in mismatch (%s vs %s).%s\n",
110                         indev, ipinfo->iniface,
111                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
112                 return false;
113         }
114
115         ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
116
117         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
118                 dprintf("VIA out mismatch (%s vs %s).%s\n",
119                         outdev, ipinfo->outiface,
120                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
121                 return false;
122         }
123
124         /* Check specific protocol */
125         if (ipinfo->proto &&
126             FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
127                 dprintf("Packet protocol %hi does not match %hi.%s\n",
128                         ip->protocol, ipinfo->proto,
129                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
130                 return false;
131         }
132
133         /* If we have a fragment rule but the packet is not a fragment
134          * then we return zero */
135         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
136                 dprintf("Fragment rule but not fragment.%s\n",
137                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
138                 return false;
139         }
140
141         return true;
142 }
143
144 static bool
145 ip_checkentry(const struct ipt_ip *ip)
146 {
147         if (ip->flags & ~IPT_F_MASK) {
148                 duprintf("Unknown flag bits set: %08X\n",
149                          ip->flags & ~IPT_F_MASK);
150                 return false;
151         }
152         if (ip->invflags & ~IPT_INV_MASK) {
153                 duprintf("Unknown invflag bits set: %08X\n",
154                          ip->invflags & ~IPT_INV_MASK);
155                 return false;
156         }
157         return true;
158 }
159
160 static unsigned int
161 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
162 {
163         if (net_ratelimit())
164                 printk("ip_tables: error: `%s'\n",
165                        (const char *)par->targinfo);
166
167         return NF_DROP;
168 }
169
170 /* Performance critical - called for every packet */
171 static inline bool
172 do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
173          struct xt_match_param *par)
174 {
175         par->match     = m->u.kernel.match;
176         par->matchinfo = m->data;
177
178         /* Stop iteration if it doesn't match */
179         if (!m->u.kernel.match->match(skb, par))
180                 return true;
181         else
182                 return false;
183 }
184
185 /* Performance critical */
186 static inline struct ipt_entry *
187 get_entry(void *base, unsigned int offset)
188 {
189         return (struct ipt_entry *)(base + offset);
190 }
191
192 /* All zeroes == unconditional rule. */
193 /* Mildly perf critical (only if packet tracing is on) */
194 static inline bool unconditional(const struct ipt_ip *ip)
195 {
196         static const struct ipt_ip uncond;
197
198         return memcmp(ip, &uncond, sizeof(uncond)) == 0;
199 #undef FWINV
200 }
201
202 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
203     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
204 static const char *const hooknames[] = {
205         [NF_INET_PRE_ROUTING]           = "PREROUTING",
206         [NF_INET_LOCAL_IN]              = "INPUT",
207         [NF_INET_FORWARD]               = "FORWARD",
208         [NF_INET_LOCAL_OUT]             = "OUTPUT",
209         [NF_INET_POST_ROUTING]          = "POSTROUTING",
210 };
211
212 enum nf_ip_trace_comments {
213         NF_IP_TRACE_COMMENT_RULE,
214         NF_IP_TRACE_COMMENT_RETURN,
215         NF_IP_TRACE_COMMENT_POLICY,
216 };
217
218 static const char *const comments[] = {
219         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
220         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
221         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
222 };
223
224 static struct nf_loginfo trace_loginfo = {
225         .type = NF_LOG_TYPE_LOG,
226         .u = {
227                 .log = {
228                         .level = 4,
229                         .logflags = NF_LOG_MASK,
230                 },
231         },
232 };
233
234 /* Mildly perf critical (only if packet tracing is on) */
235 static inline int
236 get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
237                       const char *hookname, const char **chainname,
238                       const char **comment, unsigned int *rulenum)
239 {
240         struct ipt_standard_target *t = (void *)ipt_get_target(s);
241
242         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
243                 /* Head of user chain: ERROR target with chainname */
244                 *chainname = t->target.data;
245                 (*rulenum) = 0;
246         } else if (s == e) {
247                 (*rulenum)++;
248
249                 if (s->target_offset == sizeof(struct ipt_entry) &&
250                     strcmp(t->target.u.kernel.target->name,
251                            IPT_STANDARD_TARGET) == 0 &&
252                    t->verdict < 0 &&
253                    unconditional(&s->ip)) {
254                         /* Tail of chains: STANDARD target (return/policy) */
255                         *comment = *chainname == hookname
256                                 ? comments[NF_IP_TRACE_COMMENT_POLICY]
257                                 : comments[NF_IP_TRACE_COMMENT_RETURN];
258                 }
259                 return 1;
260         } else
261                 (*rulenum)++;
262
263         return 0;
264 }
265
266 static void trace_packet(struct sk_buff *skb,
267                          unsigned int hook,
268                          const struct net_device *in,
269                          const struct net_device *out,
270                          const char *tablename,
271                          struct xt_table_info *private,
272                          struct ipt_entry *e)
273 {
274         void *table_base;
275         const struct ipt_entry *root;
276         const char *hookname, *chainname, *comment;
277         unsigned int rulenum = 0;
278
279         table_base = private->entries[smp_processor_id()];
280         root = get_entry(table_base, private->hook_entry[hook]);
281
282         hookname = chainname = hooknames[hook];
283         comment = comments[NF_IP_TRACE_COMMENT_RULE];
284
285         IPT_ENTRY_ITERATE(root,
286                           private->size - private->hook_entry[hook],
287                           get_chainname_rulenum,
288                           e, hookname, &chainname, &comment, &rulenum);
289
290         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
291                       "TRACE: %s:%s:%s:%u ",
292                       tablename, chainname, comment, rulenum);
293 }
294 #endif
295
296 static inline __pure
297 struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
298 {
299         return (void *)entry + entry->next_offset;
300 }
301
302 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
303 unsigned int
304 ipt_do_table(struct sk_buff *skb,
305              unsigned int hook,
306              const struct net_device *in,
307              const struct net_device *out,
308              struct xt_table *table)
309 {
310 #define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
311
312         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
313         const struct iphdr *ip;
314         bool hotdrop = false;
315         /* Initializing verdict to NF_DROP keeps gcc happy. */
316         unsigned int verdict = NF_DROP;
317         const char *indev, *outdev;
318         void *table_base;
319         struct ipt_entry *e, *back;
320         struct xt_table_info *private;
321         struct xt_match_param mtpar;
322         struct xt_target_param tgpar;
323
324         /* Initialization */
325         ip = ip_hdr(skb);
326         indev = in ? in->name : nulldevname;
327         outdev = out ? out->name : nulldevname;
328         /* We handle fragments by dealing with the first fragment as
329          * if it was a normal packet.  All other fragments are treated
330          * normally, except that they will NEVER match rules that ask
331          * things we don't know, ie. tcp syn flag or ports).  If the
332          * rule is also a fragment-specific rule, non-fragments won't
333          * match it. */
334         mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
335         mtpar.thoff   = ip_hdrlen(skb);
336         mtpar.hotdrop = &hotdrop;
337         mtpar.in      = tgpar.in  = in;
338         mtpar.out     = tgpar.out = out;
339         mtpar.family  = tgpar.family = NFPROTO_IPV4;
340         mtpar.hooknum = tgpar.hooknum = hook;
341
342         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
343         xt_info_rdlock_bh();
344         private = table->private;
345         table_base = private->entries[smp_processor_id()];
346
347         e = get_entry(table_base, private->hook_entry[hook]);
348
349         /* For return from builtin chain */
350         back = get_entry(table_base, private->underflow[hook]);
351
352         do {
353                 struct ipt_entry_target *t;
354
355                 IP_NF_ASSERT(e);
356                 IP_NF_ASSERT(back);
357                 if (!ip_packet_match(ip, indev, outdev,
358                     &e->ip, mtpar.fragoff) ||
359                     IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
360                         e = ipt_next_entry(e);
361                         continue;
362                 }
363
364                 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
365
366                 t = ipt_get_target(e);
367                 IP_NF_ASSERT(t->u.kernel.target);
368
369 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
370     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
371                 /* The packet is traced: log it */
372                 if (unlikely(skb->nf_trace))
373                         trace_packet(skb, hook, in, out,
374                                      table->name, private, e);
375 #endif
376                 /* Standard target? */
377                 if (!t->u.kernel.target->target) {
378                         int v;
379
380                         v = ((struct ipt_standard_target *)t)->verdict;
381                         if (v < 0) {
382                                 /* Pop from stack? */
383                                 if (v != IPT_RETURN) {
384                                         verdict = (unsigned)(-v) - 1;
385                                         break;
386                                 }
387                                 e = back;
388                                 back = get_entry(table_base, back->comefrom);
389                                 continue;
390                         }
391                         if (table_base + v != ipt_next_entry(e) &&
392                             !(e->ip.flags & IPT_F_GOTO)) {
393                                 /* Save old back ptr in next entry */
394                                 struct ipt_entry *next = ipt_next_entry(e);
395                                 next->comefrom = (void *)back - table_base;
396                                 /* set back pointer to next entry */
397                                 back = next;
398                         }
399
400                         e = get_entry(table_base, v);
401                         continue;
402                 }
403
404                 /* Targets which reenter must return
405                    abs. verdicts */
406                 tgpar.target   = t->u.kernel.target;
407                 tgpar.targinfo = t->data;
408
409
410 #ifdef CONFIG_NETFILTER_DEBUG
411                 tb_comefrom = 0xeeeeeeec;
412 #endif
413                 verdict = t->u.kernel.target->target(skb, &tgpar);
414 #ifdef CONFIG_NETFILTER_DEBUG
415                 if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
416                         printk("Target %s reentered!\n",
417                                t->u.kernel.target->name);
418                         verdict = NF_DROP;
419                 }
420                 tb_comefrom = 0x57acc001;
421 #endif
422                 /* Target might have changed stuff. */
423                 ip = ip_hdr(skb);
424                 if (verdict == IPT_CONTINUE)
425                         e = ipt_next_entry(e);
426                 else
427                         /* Verdict */
428                         break;
429         } while (!hotdrop);
430         xt_info_rdunlock_bh();
431
432 #ifdef DEBUG_ALLOW_ALL
433         return NF_ACCEPT;
434 #else
435         if (hotdrop)
436                 return NF_DROP;
437         else return verdict;
438 #endif
439
440 #undef tb_comefrom
441 }
442
443 /* Figures out from what hook each rule can be called: returns 0 if
444    there are loops.  Puts hook bitmask in comefrom. */
445 static int
446 mark_source_chains(struct xt_table_info *newinfo,
447                    unsigned int valid_hooks, void *entry0)
448 {
449         unsigned int hook;
450
451         /* No recursion; use packet counter to save back ptrs (reset
452            to 0 as we leave), and comefrom to save source hook bitmask */
453         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
454                 unsigned int pos = newinfo->hook_entry[hook];
455                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
456
457                 if (!(valid_hooks & (1 << hook)))
458                         continue;
459
460                 /* Set initial back pointer. */
461                 e->counters.pcnt = pos;
462
463                 for (;;) {
464                         struct ipt_standard_target *t
465                                 = (void *)ipt_get_target(e);
466                         int visited = e->comefrom & (1 << hook);
467
468                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
469                                 printk("iptables: loop hook %u pos %u %08X.\n",
470                                        hook, pos, e->comefrom);
471                                 return 0;
472                         }
473                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
474
475                         /* Unconditional return/END. */
476                         if ((e->target_offset == sizeof(struct ipt_entry) &&
477                              (strcmp(t->target.u.user.name,
478                                      IPT_STANDARD_TARGET) == 0) &&
479                              t->verdict < 0 && unconditional(&e->ip)) ||
480                             visited) {
481                                 unsigned int oldpos, size;
482
483                                 if ((strcmp(t->target.u.user.name,
484                                             IPT_STANDARD_TARGET) == 0) &&
485                                     t->verdict < -NF_MAX_VERDICT - 1) {
486                                         duprintf("mark_source_chains: bad "
487                                                 "negative verdict (%i)\n",
488                                                                 t->verdict);
489                                         return 0;
490                                 }
491
492                                 /* Return: backtrack through the last
493                                    big jump. */
494                                 do {
495                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
496 #ifdef DEBUG_IP_FIREWALL_USER
497                                         if (e->comefrom
498                                             & (1 << NF_INET_NUMHOOKS)) {
499                                                 duprintf("Back unset "
500                                                          "on hook %u "
501                                                          "rule %u\n",
502                                                          hook, pos);
503                                         }
504 #endif
505                                         oldpos = pos;
506                                         pos = e->counters.pcnt;
507                                         e->counters.pcnt = 0;
508
509                                         /* We're at the start. */
510                                         if (pos == oldpos)
511                                                 goto next;
512
513                                         e = (struct ipt_entry *)
514                                                 (entry0 + pos);
515                                 } while (oldpos == pos + e->next_offset);
516
517                                 /* Move along one */
518                                 size = e->next_offset;
519                                 e = (struct ipt_entry *)
520                                         (entry0 + pos + size);
521                                 e->counters.pcnt = pos;
522                                 pos += size;
523                         } else {
524                                 int newpos = t->verdict;
525
526                                 if (strcmp(t->target.u.user.name,
527                                            IPT_STANDARD_TARGET) == 0 &&
528                                     newpos >= 0) {
529                                         if (newpos > newinfo->size -
530                                                 sizeof(struct ipt_entry)) {
531                                                 duprintf("mark_source_chains: "
532                                                         "bad verdict (%i)\n",
533                                                                 newpos);
534                                                 return 0;
535                                         }
536                                         /* This a jump; chase it. */
537                                         duprintf("Jump rule %u -> %u\n",
538                                                  pos, newpos);
539                                 } else {
540                                         /* ... this is a fallthru */
541                                         newpos = pos + e->next_offset;
542                                 }
543                                 e = (struct ipt_entry *)
544                                         (entry0 + newpos);
545                                 e->counters.pcnt = pos;
546                                 pos = newpos;
547                         }
548                 }
549                 next:
550                 duprintf("Finished chain %u\n", hook);
551         }
552         return 1;
553 }
554
555 static int
556 cleanup_match(struct ipt_entry_match *m, struct net *net, unsigned int *i)
557 {
558         struct xt_mtdtor_param par;
559
560         if (i && (*i)-- == 0)
561                 return 1;
562
563         par.net       = net;
564         par.match     = m->u.kernel.match;
565         par.matchinfo = m->data;
566         par.family    = NFPROTO_IPV4;
567         if (par.match->destroy != NULL)
568                 par.match->destroy(&par);
569         module_put(par.match->me);
570         return 0;
571 }
572
573 static int
574 check_entry(struct ipt_entry *e, const char *name)
575 {
576         struct ipt_entry_target *t;
577
578         if (!ip_checkentry(&e->ip)) {
579                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
580                 return -EINVAL;
581         }
582
583         if (e->target_offset + sizeof(struct ipt_entry_target) >
584             e->next_offset)
585                 return -EINVAL;
586
587         t = ipt_get_target(e);
588         if (e->target_offset + t->u.target_size > e->next_offset)
589                 return -EINVAL;
590
591         return 0;
592 }
593
594 static int
595 check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
596             unsigned int *i)
597 {
598         const struct ipt_ip *ip = par->entryinfo;
599         int ret;
600
601         par->match     = m->u.kernel.match;
602         par->matchinfo = m->data;
603
604         ret = xt_check_match(par, m->u.match_size - sizeof(*m),
605               ip->proto, ip->invflags & IPT_INV_PROTO);
606         if (ret < 0) {
607                 duprintf("ip_tables: check failed for `%s'.\n",
608                          par.match->name);
609                 return ret;
610         }
611         ++*i;
612         return 0;
613 }
614
615 static int
616 find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
617                  unsigned int *i)
618 {
619         struct xt_match *match;
620         int ret;
621
622         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
623                                                       m->u.user.revision),
624                                         "ipt_%s", m->u.user.name);
625         if (IS_ERR(match) || !match) {
626                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
627                 return match ? PTR_ERR(match) : -ENOENT;
628         }
629         m->u.kernel.match = match;
630
631         ret = check_match(m, par, i);
632         if (ret)
633                 goto err;
634
635         return 0;
636 err:
637         module_put(m->u.kernel.match->me);
638         return ret;
639 }
640
641 static int check_target(struct ipt_entry *e, const char *name)
642 {
643         struct ipt_entry_target *t = ipt_get_target(e);
644         struct xt_tgchk_param par = {
645                 .table     = name,
646                 .entryinfo = e,
647                 .target    = t->u.kernel.target,
648                 .targinfo  = t->data,
649                 .hook_mask = e->comefrom,
650                 .family    = NFPROTO_IPV4,
651         };
652         int ret;
653
654         ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
655               e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
656         if (ret < 0) {
657                 duprintf("ip_tables: check failed for `%s'.\n",
658                          t->u.kernel.target->name);
659                 return ret;
660         }
661         return 0;
662 }
663
664 static int
665 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
666                  unsigned int size, unsigned int *i)
667 {
668         struct ipt_entry_target *t;
669         struct xt_target *target;
670         int ret;
671         unsigned int j;
672         struct xt_mtchk_param mtpar;
673
674         ret = check_entry(e, name);
675         if (ret)
676                 return ret;
677
678         j = 0;
679         mtpar.net       = net;
680         mtpar.table     = name;
681         mtpar.entryinfo = &e->ip;
682         mtpar.hook_mask = e->comefrom;
683         mtpar.family    = NFPROTO_IPV4;
684         ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
685         if (ret != 0)
686                 goto cleanup_matches;
687
688         t = ipt_get_target(e);
689         target = try_then_request_module(xt_find_target(AF_INET,
690                                                         t->u.user.name,
691                                                         t->u.user.revision),
692                                          "ipt_%s", t->u.user.name);
693         if (IS_ERR(target) || !target) {
694                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
695                 ret = target ? PTR_ERR(target) : -ENOENT;
696                 goto cleanup_matches;
697         }
698         t->u.kernel.target = target;
699
700         ret = check_target(e, name);
701         if (ret)
702                 goto err;
703
704         (*i)++;
705         return 0;
706  err:
707         module_put(t->u.kernel.target->me);
708  cleanup_matches:
709         IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
710         return ret;
711 }
712
713 static bool check_underflow(struct ipt_entry *e)
714 {
715         const struct ipt_entry_target *t;
716         unsigned int verdict;
717
718         if (!unconditional(&e->ip))
719                 return false;
720         t = ipt_get_target(e);
721         if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
722                 return false;
723         verdict = ((struct ipt_standard_target *)t)->verdict;
724         verdict = -verdict - 1;
725         return verdict == NF_DROP || verdict == NF_ACCEPT;
726 }
727
728 static int
729 check_entry_size_and_hooks(struct ipt_entry *e,
730                            struct xt_table_info *newinfo,
731                            unsigned char *base,
732                            unsigned char *limit,
733                            const unsigned int *hook_entries,
734                            const unsigned int *underflows,
735                            unsigned int valid_hooks,
736                            unsigned int *i)
737 {
738         unsigned int h;
739
740         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
741             (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
742                 duprintf("Bad offset %p\n", e);
743                 return -EINVAL;
744         }
745
746         if (e->next_offset
747             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
748                 duprintf("checking: element %p size %u\n",
749                          e, e->next_offset);
750                 return -EINVAL;
751         }
752
753         /* Check hooks & underflows */
754         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
755                 if (!(valid_hooks & (1 << h)))
756                         continue;
757                 if ((unsigned char *)e - base == hook_entries[h])
758                         newinfo->hook_entry[h] = hook_entries[h];
759                 if ((unsigned char *)e - base == underflows[h]) {
760                         if (!check_underflow(e)) {
761                                 pr_err("Underflows must be unconditional and "
762                                        "use the STANDARD target with "
763                                        "ACCEPT/DROP\n");
764                                 return -EINVAL;
765                         }
766                         newinfo->underflow[h] = underflows[h];
767                 }
768         }
769
770         /* Clear counters and comefrom */
771         e->counters = ((struct xt_counters) { 0, 0 });
772         e->comefrom = 0;
773
774         (*i)++;
775         return 0;
776 }
777
778 static int
779 cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i)
780 {
781         struct xt_tgdtor_param par;
782         struct ipt_entry_target *t;
783
784         if (i && (*i)-- == 0)
785                 return 1;
786
787         /* Cleanup all matches */
788         IPT_MATCH_ITERATE(e, cleanup_match, net, NULL);
789         t = ipt_get_target(e);
790
791         par.target   = t->u.kernel.target;
792         par.targinfo = t->data;
793         par.family   = NFPROTO_IPV4;
794         if (par.target->destroy != NULL)
795                 par.target->destroy(&par);
796         module_put(par.target->me);
797         return 0;
798 }
799
800 /* Checks and translates the user-supplied table segment (held in
801    newinfo) */
802 static int
803 translate_table(struct net *net,
804                 const char *name,
805                 unsigned int valid_hooks,
806                 struct xt_table_info *newinfo,
807                 void *entry0,
808                 unsigned int size,
809                 unsigned int number,
810                 const unsigned int *hook_entries,
811                 const unsigned int *underflows)
812 {
813         unsigned int i;
814         int ret;
815
816         newinfo->size = size;
817         newinfo->number = number;
818
819         /* Init all hooks to impossible value. */
820         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
821                 newinfo->hook_entry[i] = 0xFFFFFFFF;
822                 newinfo->underflow[i] = 0xFFFFFFFF;
823         }
824
825         duprintf("translate_table: size %u\n", newinfo->size);
826         i = 0;
827         /* Walk through entries, checking offsets. */
828         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
829                                 check_entry_size_and_hooks,
830                                 newinfo,
831                                 entry0,
832                                 entry0 + size,
833                                 hook_entries, underflows, valid_hooks, &i);
834         if (ret != 0)
835                 return ret;
836
837         if (i != number) {
838                 duprintf("translate_table: %u not %u entries\n",
839                          i, number);
840                 return -EINVAL;
841         }
842
843         /* Check hooks all assigned */
844         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
845                 /* Only hooks which are valid */
846                 if (!(valid_hooks & (1 << i)))
847                         continue;
848                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
849                         duprintf("Invalid hook entry %u %u\n",
850                                  i, hook_entries[i]);
851                         return -EINVAL;
852                 }
853                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
854                         duprintf("Invalid underflow %u %u\n",
855                                  i, underflows[i]);
856                         return -EINVAL;
857                 }
858         }
859
860         if (!mark_source_chains(newinfo, valid_hooks, entry0))
861                 return -ELOOP;
862
863         /* Finally, each sanity check must pass */
864         i = 0;
865         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
866                                 find_check_entry, net, name, size, &i);
867
868         if (ret != 0) {
869                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
870                                 cleanup_entry, net, &i);
871                 return ret;
872         }
873
874         /* And one copy for every other CPU */
875         for_each_possible_cpu(i) {
876                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
877                         memcpy(newinfo->entries[i], entry0, newinfo->size);
878         }
879
880         return ret;
881 }
882
883 /* Gets counters. */
884 static inline int
885 add_entry_to_counter(const struct ipt_entry *e,
886                      struct xt_counters total[],
887                      unsigned int *i)
888 {
889         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
890
891         (*i)++;
892         return 0;
893 }
894
895 static inline int
896 set_entry_to_counter(const struct ipt_entry *e,
897                      struct ipt_counters total[],
898                      unsigned int *i)
899 {
900         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
901
902         (*i)++;
903         return 0;
904 }
905
906 static void
907 get_counters(const struct xt_table_info *t,
908              struct xt_counters counters[])
909 {
910         unsigned int cpu;
911         unsigned int i;
912         unsigned int curcpu;
913
914         /* Instead of clearing (by a previous call to memset())
915          * the counters and using adds, we set the counters
916          * with data used by 'current' CPU.
917          *
918          * Bottom half has to be disabled to prevent deadlock
919          * if new softirq were to run and call ipt_do_table
920          */
921         local_bh_disable();
922         curcpu = smp_processor_id();
923
924         i = 0;
925         IPT_ENTRY_ITERATE(t->entries[curcpu],
926                           t->size,
927                           set_entry_to_counter,
928                           counters,
929                           &i);
930
931         for_each_possible_cpu(cpu) {
932                 if (cpu == curcpu)
933                         continue;
934                 i = 0;
935                 xt_info_wrlock(cpu);
936                 IPT_ENTRY_ITERATE(t->entries[cpu],
937                                   t->size,
938                                   add_entry_to_counter,
939                                   counters,
940                                   &i);
941                 xt_info_wrunlock(cpu);
942         }
943         local_bh_enable();
944 }
945
946 static struct xt_counters * alloc_counters(struct xt_table *table)
947 {
948         unsigned int countersize;
949         struct xt_counters *counters;
950         struct xt_table_info *private = table->private;
951
952         /* We need atomic snapshot of counters: rest doesn't change
953            (other than comefrom, which userspace doesn't care
954            about). */
955         countersize = sizeof(struct xt_counters) * private->number;
956         counters = vmalloc_node(countersize, numa_node_id());
957
958         if (counters == NULL)
959                 return ERR_PTR(-ENOMEM);
960
961         get_counters(private, counters);
962
963         return counters;
964 }
965
966 static int
967 copy_entries_to_user(unsigned int total_size,
968                      struct xt_table *table,
969                      void __user *userptr)
970 {
971         unsigned int off, num;
972         struct ipt_entry *e;
973         struct xt_counters *counters;
974         const struct xt_table_info *private = table->private;
975         int ret = 0;
976         const void *loc_cpu_entry;
977
978         counters = alloc_counters(table);
979         if (IS_ERR(counters))
980                 return PTR_ERR(counters);
981
982         /* choose the copy that is on our node/cpu, ...
983          * This choice is lazy (because current thread is
984          * allowed to migrate to another cpu)
985          */
986         loc_cpu_entry = private->entries[raw_smp_processor_id()];
987         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
988                 ret = -EFAULT;
989                 goto free_counters;
990         }
991
992         /* FIXME: use iterator macros --RR */
993         /* ... then go back and fix counters and names */
994         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
995                 unsigned int i;
996                 const struct ipt_entry_match *m;
997                 const struct ipt_entry_target *t;
998
999                 e = (struct ipt_entry *)(loc_cpu_entry + off);
1000                 if (copy_to_user(userptr + off
1001                                  + offsetof(struct ipt_entry, counters),
1002                                  &counters[num],
1003                                  sizeof(counters[num])) != 0) {
1004                         ret = -EFAULT;
1005                         goto free_counters;
1006                 }
1007
1008                 for (i = sizeof(struct ipt_entry);
1009                      i < e->target_offset;
1010                      i += m->u.match_size) {
1011                         m = (void *)e + i;
1012
1013                         if (copy_to_user(userptr + off + i
1014                                          + offsetof(struct ipt_entry_match,
1015                                                     u.user.name),
1016                                          m->u.kernel.match->name,
1017                                          strlen(m->u.kernel.match->name)+1)
1018                             != 0) {
1019                                 ret = -EFAULT;
1020                                 goto free_counters;
1021                         }
1022                 }
1023
1024                 t = ipt_get_target(e);
1025                 if (copy_to_user(userptr + off + e->target_offset
1026                                  + offsetof(struct ipt_entry_target,
1027                                             u.user.name),
1028                                  t->u.kernel.target->name,
1029                                  strlen(t->u.kernel.target->name)+1) != 0) {
1030                         ret = -EFAULT;
1031                         goto free_counters;
1032                 }
1033         }
1034
1035  free_counters:
1036         vfree(counters);
1037         return ret;
1038 }
1039
1040 #ifdef CONFIG_COMPAT
1041 static void compat_standard_from_user(void *dst, void *src)
1042 {
1043         int v = *(compat_int_t *)src;
1044
1045         if (v > 0)
1046                 v += xt_compat_calc_jump(AF_INET, v);
1047         memcpy(dst, &v, sizeof(v));
1048 }
1049
1050 static int compat_standard_to_user(void __user *dst, void *src)
1051 {
1052         compat_int_t cv = *(int *)src;
1053
1054         if (cv > 0)
1055                 cv -= xt_compat_calc_jump(AF_INET, cv);
1056         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1057 }
1058
1059 static inline int
1060 compat_calc_match(struct ipt_entry_match *m, int *size)
1061 {
1062         *size += xt_compat_match_offset(m->u.kernel.match);
1063         return 0;
1064 }
1065
1066 static int compat_calc_entry(struct ipt_entry *e,
1067                              const struct xt_table_info *info,
1068                              void *base, struct xt_table_info *newinfo)
1069 {
1070         struct ipt_entry_target *t;
1071         unsigned int entry_offset;
1072         int off, i, ret;
1073
1074         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1075         entry_offset = (void *)e - base;
1076         IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1077         t = ipt_get_target(e);
1078         off += xt_compat_target_offset(t->u.kernel.target);
1079         newinfo->size -= off;
1080         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1081         if (ret)
1082                 return ret;
1083
1084         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1085                 if (info->hook_entry[i] &&
1086                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1087                         newinfo->hook_entry[i] -= off;
1088                 if (info->underflow[i] &&
1089                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1090                         newinfo->underflow[i] -= off;
1091         }
1092         return 0;
1093 }
1094
1095 static int compat_table_info(const struct xt_table_info *info,
1096                              struct xt_table_info *newinfo)
1097 {
1098         void *loc_cpu_entry;
1099
1100         if (!newinfo || !info)
1101                 return -EINVAL;
1102
1103         /* we dont care about newinfo->entries[] */
1104         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1105         newinfo->initial_entries = 0;
1106         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1107         return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
1108                                  compat_calc_entry, info, loc_cpu_entry,
1109                                  newinfo);
1110 }
1111 #endif
1112
1113 static int get_info(struct net *net, void __user *user, int *len, int compat)
1114 {
1115         char name[IPT_TABLE_MAXNAMELEN];
1116         struct xt_table *t;
1117         int ret;
1118
1119         if (*len != sizeof(struct ipt_getinfo)) {
1120                 duprintf("length %u != %zu\n", *len,
1121                          sizeof(struct ipt_getinfo));
1122                 return -EINVAL;
1123         }
1124
1125         if (copy_from_user(name, user, sizeof(name)) != 0)
1126                 return -EFAULT;
1127
1128         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1129 #ifdef CONFIG_COMPAT
1130         if (compat)
1131                 xt_compat_lock(AF_INET);
1132 #endif
1133         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1134                                     "iptable_%s", name);
1135         if (t && !IS_ERR(t)) {
1136                 struct ipt_getinfo info;
1137                 const struct xt_table_info *private = t->private;
1138
1139 #ifdef CONFIG_COMPAT
1140                 if (compat) {
1141                         struct xt_table_info tmp;
1142                         ret = compat_table_info(private, &tmp);
1143                         xt_compat_flush_offsets(AF_INET);
1144                         private = &tmp;
1145                 }
1146 #endif
1147                 info.valid_hooks = t->valid_hooks;
1148                 memcpy(info.hook_entry, private->hook_entry,
1149                        sizeof(info.hook_entry));
1150                 memcpy(info.underflow, private->underflow,
1151                        sizeof(info.underflow));
1152                 info.num_entries = private->number;
1153                 info.size = private->size;
1154                 strcpy(info.name, name);
1155
1156                 if (copy_to_user(user, &info, *len) != 0)
1157                         ret = -EFAULT;
1158                 else
1159                         ret = 0;
1160
1161                 xt_table_unlock(t);
1162                 module_put(t->me);
1163         } else
1164                 ret = t ? PTR_ERR(t) : -ENOENT;
1165 #ifdef CONFIG_COMPAT
1166         if (compat)
1167                 xt_compat_unlock(AF_INET);
1168 #endif
1169         return ret;
1170 }
1171
1172 static int
1173 get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1174 {
1175         int ret;
1176         struct ipt_get_entries get;
1177         struct xt_table *t;
1178
1179         if (*len < sizeof(get)) {
1180                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1181                 return -EINVAL;
1182         }
1183         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1184                 return -EFAULT;
1185         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1186                 duprintf("get_entries: %u != %zu\n",
1187                          *len, sizeof(get) + get.size);
1188                 return -EINVAL;
1189         }
1190
1191         t = xt_find_table_lock(net, AF_INET, get.name);
1192         if (t && !IS_ERR(t)) {
1193                 const struct xt_table_info *private = t->private;
1194                 duprintf("t->private->number = %u\n", private->number);
1195                 if (get.size == private->size)
1196                         ret = copy_entries_to_user(private->size,
1197                                                    t, uptr->entrytable);
1198                 else {
1199                         duprintf("get_entries: I've got %u not %u!\n",
1200                                  private->size, get.size);
1201                         ret = -EAGAIN;
1202                 }
1203                 module_put(t->me);
1204                 xt_table_unlock(t);
1205         } else
1206                 ret = t ? PTR_ERR(t) : -ENOENT;
1207
1208         return ret;
1209 }
1210
1211 static int
1212 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1213              struct xt_table_info *newinfo, unsigned int num_counters,
1214              void __user *counters_ptr)
1215 {
1216         int ret;
1217         struct xt_table *t;
1218         struct xt_table_info *oldinfo;
1219         struct xt_counters *counters;
1220         void *loc_cpu_old_entry;
1221
1222         ret = 0;
1223         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1224         if (!counters) {
1225                 ret = -ENOMEM;
1226                 goto out;
1227         }
1228
1229         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1230                                     "iptable_%s", name);
1231         if (!t || IS_ERR(t)) {
1232                 ret = t ? PTR_ERR(t) : -ENOENT;
1233                 goto free_newinfo_counters_untrans;
1234         }
1235
1236         /* You lied! */
1237         if (valid_hooks != t->valid_hooks) {
1238                 duprintf("Valid hook crap: %08X vs %08X\n",
1239                          valid_hooks, t->valid_hooks);
1240                 ret = -EINVAL;
1241                 goto put_module;
1242         }
1243
1244         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1245         if (!oldinfo)
1246                 goto put_module;
1247
1248         /* Update module usage count based on number of rules */
1249         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1250                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1251         if ((oldinfo->number > oldinfo->initial_entries) ||
1252             (newinfo->number <= oldinfo->initial_entries))
1253                 module_put(t->me);
1254         if ((oldinfo->number > oldinfo->initial_entries) &&
1255             (newinfo->number <= oldinfo->initial_entries))
1256                 module_put(t->me);
1257
1258         /* Get the old counters, and synchronize with replace */
1259         get_counters(oldinfo, counters);
1260
1261         /* Decrease module usage counts and free resource */
1262         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1263         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1264                           net, NULL);
1265         xt_free_table_info(oldinfo);
1266         if (copy_to_user(counters_ptr, counters,
1267                          sizeof(struct xt_counters) * num_counters) != 0)
1268                 ret = -EFAULT;
1269         vfree(counters);
1270         xt_table_unlock(t);
1271         return ret;
1272
1273  put_module:
1274         module_put(t->me);
1275         xt_table_unlock(t);
1276  free_newinfo_counters_untrans:
1277         vfree(counters);
1278  out:
1279         return ret;
1280 }
1281
1282 static int
1283 do_replace(struct net *net, void __user *user, unsigned int len)
1284 {
1285         int ret;
1286         struct ipt_replace tmp;
1287         struct xt_table_info *newinfo;
1288         void *loc_cpu_entry;
1289
1290         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1291                 return -EFAULT;
1292
1293         /* overflow check */
1294         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1295                 return -ENOMEM;
1296
1297         newinfo = xt_alloc_table_info(tmp.size);
1298         if (!newinfo)
1299                 return -ENOMEM;
1300
1301         /* choose the copy that is on our node/cpu */
1302         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1303         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1304                            tmp.size) != 0) {
1305                 ret = -EFAULT;
1306                 goto free_newinfo;
1307         }
1308
1309         ret = translate_table(net, tmp.name, tmp.valid_hooks,
1310                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1311                               tmp.hook_entry, tmp.underflow);
1312         if (ret != 0)
1313                 goto free_newinfo;
1314
1315         duprintf("ip_tables: Translated table\n");
1316
1317         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1318                            tmp.num_counters, tmp.counters);
1319         if (ret)
1320                 goto free_newinfo_untrans;
1321         return 0;
1322
1323  free_newinfo_untrans:
1324         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
1325  free_newinfo:
1326         xt_free_table_info(newinfo);
1327         return ret;
1328 }
1329
1330 /* We're lazy, and add to the first CPU; overflow works its fey magic
1331  * and everything is OK. */
1332 static int
1333 add_counter_to_entry(struct ipt_entry *e,
1334                      const struct xt_counters addme[],
1335                      unsigned int *i)
1336 {
1337         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1338
1339         (*i)++;
1340         return 0;
1341 }
1342
1343 static int
1344 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1345 {
1346         unsigned int i, curcpu;
1347         struct xt_counters_info tmp;
1348         struct xt_counters *paddc;
1349         unsigned int num_counters;
1350         const char *name;
1351         int size;
1352         void *ptmp;
1353         struct xt_table *t;
1354         const struct xt_table_info *private;
1355         int ret = 0;
1356         void *loc_cpu_entry;
1357 #ifdef CONFIG_COMPAT
1358         struct compat_xt_counters_info compat_tmp;
1359
1360         if (compat) {
1361                 ptmp = &compat_tmp;
1362                 size = sizeof(struct compat_xt_counters_info);
1363         } else
1364 #endif
1365         {
1366                 ptmp = &tmp;
1367                 size = sizeof(struct xt_counters_info);
1368         }
1369
1370         if (copy_from_user(ptmp, user, size) != 0)
1371                 return -EFAULT;
1372
1373 #ifdef CONFIG_COMPAT
1374         if (compat) {
1375                 num_counters = compat_tmp.num_counters;
1376                 name = compat_tmp.name;
1377         } else
1378 #endif
1379         {
1380                 num_counters = tmp.num_counters;
1381                 name = tmp.name;
1382         }
1383
1384         if (len != size + num_counters * sizeof(struct xt_counters))
1385                 return -EINVAL;
1386
1387         paddc = vmalloc_node(len - size, numa_node_id());
1388         if (!paddc)
1389                 return -ENOMEM;
1390
1391         if (copy_from_user(paddc, user + size, len - size) != 0) {
1392                 ret = -EFAULT;
1393                 goto free;
1394         }
1395
1396         t = xt_find_table_lock(net, AF_INET, name);
1397         if (!t || IS_ERR(t)) {
1398                 ret = t ? PTR_ERR(t) : -ENOENT;
1399                 goto free;
1400         }
1401
1402         local_bh_disable();
1403         private = t->private;
1404         if (private->number != num_counters) {
1405                 ret = -EINVAL;
1406                 goto unlock_up_free;
1407         }
1408
1409         i = 0;
1410         /* Choose the copy that is on our node */
1411         curcpu = smp_processor_id();
1412         loc_cpu_entry = private->entries[curcpu];
1413         xt_info_wrlock(curcpu);
1414         IPT_ENTRY_ITERATE(loc_cpu_entry,
1415                           private->size,
1416                           add_counter_to_entry,
1417                           paddc,
1418                           &i);
1419         xt_info_wrunlock(curcpu);
1420  unlock_up_free:
1421         local_bh_enable();
1422         xt_table_unlock(t);
1423         module_put(t->me);
1424  free:
1425         vfree(paddc);
1426
1427         return ret;
1428 }
1429
1430 #ifdef CONFIG_COMPAT
1431 struct compat_ipt_replace {
1432         char                    name[IPT_TABLE_MAXNAMELEN];
1433         u32                     valid_hooks;
1434         u32                     num_entries;
1435         u32                     size;
1436         u32                     hook_entry[NF_INET_NUMHOOKS];
1437         u32                     underflow[NF_INET_NUMHOOKS];
1438         u32                     num_counters;
1439         compat_uptr_t           counters;       /* struct ipt_counters * */
1440         struct compat_ipt_entry entries[0];
1441 };
1442
1443 static int
1444 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1445                           unsigned int *size, struct xt_counters *counters,
1446                           unsigned int *i)
1447 {
1448         struct ipt_entry_target *t;
1449         struct compat_ipt_entry __user *ce;
1450         u_int16_t target_offset, next_offset;
1451         compat_uint_t origsize;
1452         int ret;
1453
1454         ret = -EFAULT;
1455         origsize = *size;
1456         ce = (struct compat_ipt_entry __user *)*dstptr;
1457         if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
1458                 goto out;
1459
1460         if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1461                 goto out;
1462
1463         *dstptr += sizeof(struct compat_ipt_entry);
1464         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1465
1466         ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1467         target_offset = e->target_offset - (origsize - *size);
1468         if (ret)
1469                 goto out;
1470         t = ipt_get_target(e);
1471         ret = xt_compat_target_to_user(t, dstptr, size);
1472         if (ret)
1473                 goto out;
1474         ret = -EFAULT;
1475         next_offset = e->next_offset - (origsize - *size);
1476         if (put_user(target_offset, &ce->target_offset))
1477                 goto out;
1478         if (put_user(next_offset, &ce->next_offset))
1479                 goto out;
1480
1481         (*i)++;
1482         return 0;
1483 out:
1484         return ret;
1485 }
1486
1487 static int
1488 compat_find_calc_match(struct ipt_entry_match *m,
1489                        const char *name,
1490                        const struct ipt_ip *ip,
1491                        unsigned int hookmask,
1492                        int *size, unsigned int *i)
1493 {
1494         struct xt_match *match;
1495
1496         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1497                                                       m->u.user.revision),
1498                                         "ipt_%s", m->u.user.name);
1499         if (IS_ERR(match) || !match) {
1500                 duprintf("compat_check_calc_match: `%s' not found\n",
1501                          m->u.user.name);
1502                 return match ? PTR_ERR(match) : -ENOENT;
1503         }
1504         m->u.kernel.match = match;
1505         *size += xt_compat_match_offset(match);
1506
1507         (*i)++;
1508         return 0;
1509 }
1510
1511 static int
1512 compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1513 {
1514         if (i && (*i)-- == 0)
1515                 return 1;
1516
1517         module_put(m->u.kernel.match->me);
1518         return 0;
1519 }
1520
1521 static int
1522 compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1523 {
1524         struct ipt_entry_target *t;
1525
1526         if (i && (*i)-- == 0)
1527                 return 1;
1528
1529         /* Cleanup all matches */
1530         COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1531         t = compat_ipt_get_target(e);
1532         module_put(t->u.kernel.target->me);
1533         return 0;
1534 }
1535
1536 static int
1537 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1538                                   struct xt_table_info *newinfo,
1539                                   unsigned int *size,
1540                                   unsigned char *base,
1541                                   unsigned char *limit,
1542                                   unsigned int *hook_entries,
1543                                   unsigned int *underflows,
1544                                   unsigned int *i,
1545                                   const char *name)
1546 {
1547         struct ipt_entry_target *t;
1548         struct xt_target *target;
1549         unsigned int entry_offset;
1550         unsigned int j;
1551         int ret, off, h;
1552
1553         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1554         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1555             (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1556                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1557                 return -EINVAL;
1558         }
1559
1560         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1561                              sizeof(struct compat_xt_entry_target)) {
1562                 duprintf("checking: element %p size %u\n",
1563                          e, e->next_offset);
1564                 return -EINVAL;
1565         }
1566
1567         /* For purposes of check_entry casting the compat entry is fine */
1568         ret = check_entry((struct ipt_entry *)e, name);
1569         if (ret)
1570                 return ret;
1571
1572         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1573         entry_offset = (void *)e - (void *)base;
1574         j = 0;
1575         ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1576                                        &e->ip, e->comefrom, &off, &j);
1577         if (ret != 0)
1578                 goto release_matches;
1579
1580         t = compat_ipt_get_target(e);
1581         target = try_then_request_module(xt_find_target(AF_INET,
1582                                                         t->u.user.name,
1583                                                         t->u.user.revision),
1584                                          "ipt_%s", t->u.user.name);
1585         if (IS_ERR(target) || !target) {
1586                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1587                          t->u.user.name);
1588                 ret = target ? PTR_ERR(target) : -ENOENT;
1589                 goto release_matches;
1590         }
1591         t->u.kernel.target = target;
1592
1593         off += xt_compat_target_offset(target);
1594         *size += off;
1595         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1596         if (ret)
1597                 goto out;
1598
1599         /* Check hooks & underflows */
1600         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1601                 if ((unsigned char *)e - base == hook_entries[h])
1602                         newinfo->hook_entry[h] = hook_entries[h];
1603                 if ((unsigned char *)e - base == underflows[h])
1604                         newinfo->underflow[h] = underflows[h];
1605         }
1606
1607         /* Clear counters and comefrom */
1608         memset(&e->counters, 0, sizeof(e->counters));
1609         e->comefrom = 0;
1610
1611         (*i)++;
1612         return 0;
1613
1614 out:
1615         module_put(t->u.kernel.target->me);
1616 release_matches:
1617         IPT_MATCH_ITERATE(e, compat_release_match, &j);
1618         return ret;
1619 }
1620
1621 static int
1622 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1623                             unsigned int *size, const char *name,
1624                             struct xt_table_info *newinfo, unsigned char *base)
1625 {
1626         struct ipt_entry_target *t;
1627         struct xt_target *target;
1628         struct ipt_entry *de;
1629         unsigned int origsize;
1630         int ret, h;
1631
1632         ret = 0;
1633         origsize = *size;
1634         de = (struct ipt_entry *)*dstptr;
1635         memcpy(de, e, sizeof(struct ipt_entry));
1636         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1637
1638         *dstptr += sizeof(struct ipt_entry);
1639         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1640
1641         ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1642                                        dstptr, size);
1643         if (ret)
1644                 return ret;
1645         de->target_offset = e->target_offset - (origsize - *size);
1646         t = compat_ipt_get_target(e);
1647         target = t->u.kernel.target;
1648         xt_compat_target_from_user(t, dstptr, size);
1649
1650         de->next_offset = e->next_offset - (origsize - *size);
1651         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1652                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1653                         newinfo->hook_entry[h] -= origsize - *size;
1654                 if ((unsigned char *)de - base < newinfo->underflow[h])
1655                         newinfo->underflow[h] -= origsize - *size;
1656         }
1657         return ret;
1658 }
1659
1660 static int
1661 compat_check_entry(struct ipt_entry *e, struct net *net, const char *name,
1662                                      unsigned int *i)
1663 {
1664         struct xt_mtchk_param mtpar;
1665         unsigned int j;
1666         int ret;
1667
1668         j = 0;
1669         mtpar.net       = net;
1670         mtpar.table     = name;
1671         mtpar.entryinfo = &e->ip;
1672         mtpar.hook_mask = e->comefrom;
1673         mtpar.family    = NFPROTO_IPV4;
1674         ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
1675         if (ret)
1676                 goto cleanup_matches;
1677
1678         ret = check_target(e, name);
1679         if (ret)
1680                 goto cleanup_matches;
1681
1682         (*i)++;
1683         return 0;
1684
1685  cleanup_matches:
1686         IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
1687         return ret;
1688 }
1689
1690 static int
1691 translate_compat_table(struct net *net,
1692                        const char *name,
1693                        unsigned int valid_hooks,
1694                        struct xt_table_info **pinfo,
1695                        void **pentry0,
1696                        unsigned int total_size,
1697                        unsigned int number,
1698                        unsigned int *hook_entries,
1699                        unsigned int *underflows)
1700 {
1701         unsigned int i, j;
1702         struct xt_table_info *newinfo, *info;
1703         void *pos, *entry0, *entry1;
1704         unsigned int size;
1705         int ret;
1706
1707         info = *pinfo;
1708         entry0 = *pentry0;
1709         size = total_size;
1710         info->number = number;
1711
1712         /* Init all hooks to impossible value. */
1713         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1714                 info->hook_entry[i] = 0xFFFFFFFF;
1715                 info->underflow[i] = 0xFFFFFFFF;
1716         }
1717
1718         duprintf("translate_compat_table: size %u\n", info->size);
1719         j = 0;
1720         xt_compat_lock(AF_INET);
1721         /* Walk through entries, checking offsets. */
1722         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1723                                        check_compat_entry_size_and_hooks,
1724                                        info, &size, entry0,
1725                                        entry0 + total_size,
1726                                        hook_entries, underflows, &j, name);
1727         if (ret != 0)
1728                 goto out_unlock;
1729
1730         ret = -EINVAL;
1731         if (j != number) {
1732                 duprintf("translate_compat_table: %u not %u entries\n",
1733                          j, number);
1734                 goto out_unlock;
1735         }
1736
1737         /* Check hooks all assigned */
1738         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1739                 /* Only hooks which are valid */
1740                 if (!(valid_hooks & (1 << i)))
1741                         continue;
1742                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1743                         duprintf("Invalid hook entry %u %u\n",
1744                                  i, hook_entries[i]);
1745                         goto out_unlock;
1746                 }
1747                 if (info->underflow[i] == 0xFFFFFFFF) {
1748                         duprintf("Invalid underflow %u %u\n",
1749                                  i, underflows[i]);
1750                         goto out_unlock;
1751                 }
1752         }
1753
1754         ret = -ENOMEM;
1755         newinfo = xt_alloc_table_info(size);
1756         if (!newinfo)
1757                 goto out_unlock;
1758
1759         newinfo->number = number;
1760         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1761                 newinfo->hook_entry[i] = info->hook_entry[i];
1762                 newinfo->underflow[i] = info->underflow[i];
1763         }
1764         entry1 = newinfo->entries[raw_smp_processor_id()];
1765         pos = entry1;
1766         size = total_size;
1767         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1768                                        compat_copy_entry_from_user,
1769                                        &pos, &size, name, newinfo, entry1);
1770         xt_compat_flush_offsets(AF_INET);
1771         xt_compat_unlock(AF_INET);
1772         if (ret)
1773                 goto free_newinfo;
1774
1775         ret = -ELOOP;
1776         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1777                 goto free_newinfo;
1778
1779         i = 0;
1780         ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1781                                 net, name, &i);
1782         if (ret) {
1783                 j -= i;
1784                 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1785                                                   compat_release_entry, &j);
1786                 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i);
1787                 xt_free_table_info(newinfo);
1788                 return ret;
1789         }
1790
1791         /* And one copy for every other CPU */
1792         for_each_possible_cpu(i)
1793                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1794                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1795
1796         *pinfo = newinfo;
1797         *pentry0 = entry1;
1798         xt_free_table_info(info);
1799         return 0;
1800
1801 free_newinfo:
1802         xt_free_table_info(newinfo);
1803 out:
1804         COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1805         return ret;
1806 out_unlock:
1807         xt_compat_flush_offsets(AF_INET);
1808         xt_compat_unlock(AF_INET);
1809         goto out;
1810 }
1811
1812 static int
1813 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1814 {
1815         int ret;
1816         struct compat_ipt_replace tmp;
1817         struct xt_table_info *newinfo;
1818         void *loc_cpu_entry;
1819
1820         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1821                 return -EFAULT;
1822
1823         /* overflow check */
1824         if (tmp.size >= INT_MAX / num_possible_cpus())
1825                 return -ENOMEM;
1826         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1827                 return -ENOMEM;
1828
1829         newinfo = xt_alloc_table_info(tmp.size);
1830         if (!newinfo)
1831                 return -ENOMEM;
1832
1833         /* choose the copy that is on our node/cpu */
1834         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1835         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1836                            tmp.size) != 0) {
1837                 ret = -EFAULT;
1838                 goto free_newinfo;
1839         }
1840
1841         ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
1842                                      &newinfo, &loc_cpu_entry, tmp.size,
1843                                      tmp.num_entries, tmp.hook_entry,
1844                                      tmp.underflow);
1845         if (ret != 0)
1846                 goto free_newinfo;
1847
1848         duprintf("compat_do_replace: Translated table\n");
1849
1850         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1851                            tmp.num_counters, compat_ptr(tmp.counters));
1852         if (ret)
1853                 goto free_newinfo_untrans;
1854         return 0;
1855
1856  free_newinfo_untrans:
1857         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
1858  free_newinfo:
1859         xt_free_table_info(newinfo);
1860         return ret;
1861 }
1862
1863 static int
1864 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1865                       unsigned int len)
1866 {
1867         int ret;
1868
1869         if (!capable(CAP_NET_ADMIN))
1870                 return -EPERM;
1871
1872         switch (cmd) {
1873         case IPT_SO_SET_REPLACE:
1874                 ret = compat_do_replace(sock_net(sk), user, len);
1875                 break;
1876
1877         case IPT_SO_SET_ADD_COUNTERS:
1878                 ret = do_add_counters(sock_net(sk), user, len, 1);
1879                 break;
1880
1881         default:
1882                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1883                 ret = -EINVAL;
1884         }
1885
1886         return ret;
1887 }
1888
1889 struct compat_ipt_get_entries {
1890         char name[IPT_TABLE_MAXNAMELEN];
1891         compat_uint_t size;
1892         struct compat_ipt_entry entrytable[0];
1893 };
1894
1895 static int
1896 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1897                             void __user *userptr)
1898 {
1899         struct xt_counters *counters;
1900         const struct xt_table_info *private = table->private;
1901         void __user *pos;
1902         unsigned int size;
1903         int ret = 0;
1904         const void *loc_cpu_entry;
1905         unsigned int i = 0;
1906
1907         counters = alloc_counters(table);
1908         if (IS_ERR(counters))
1909                 return PTR_ERR(counters);
1910
1911         /* choose the copy that is on our node/cpu, ...
1912          * This choice is lazy (because current thread is
1913          * allowed to migrate to another cpu)
1914          */
1915         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1916         pos = userptr;
1917         size = total_size;
1918         ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1919                                 compat_copy_entry_to_user,
1920                                 &pos, &size, counters, &i);
1921
1922         vfree(counters);
1923         return ret;
1924 }
1925
1926 static int
1927 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1928                    int *len)
1929 {
1930         int ret;
1931         struct compat_ipt_get_entries get;
1932         struct xt_table *t;
1933
1934         if (*len < sizeof(get)) {
1935                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1936                 return -EINVAL;
1937         }
1938
1939         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1940                 return -EFAULT;
1941
1942         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1943                 duprintf("compat_get_entries: %u != %zu\n",
1944                          *len, sizeof(get) + get.size);
1945                 return -EINVAL;
1946         }
1947
1948         xt_compat_lock(AF_INET);
1949         t = xt_find_table_lock(net, AF_INET, get.name);
1950         if (t && !IS_ERR(t)) {
1951                 const struct xt_table_info *private = t->private;
1952                 struct xt_table_info info;
1953                 duprintf("t->private->number = %u\n", private->number);
1954                 ret = compat_table_info(private, &info);
1955                 if (!ret && get.size == info.size) {
1956                         ret = compat_copy_entries_to_user(private->size,
1957                                                           t, uptr->entrytable);
1958                 } else if (!ret) {
1959                         duprintf("compat_get_entries: I've got %u not %u!\n",
1960                                  private->size, get.size);
1961                         ret = -EAGAIN;
1962                 }
1963                 xt_compat_flush_offsets(AF_INET);
1964                 module_put(t->me);
1965                 xt_table_unlock(t);
1966         } else
1967                 ret = t ? PTR_ERR(t) : -ENOENT;
1968
1969         xt_compat_unlock(AF_INET);
1970         return ret;
1971 }
1972
1973 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1974
1975 static int
1976 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1977 {
1978         int ret;
1979
1980         if (!capable(CAP_NET_ADMIN))
1981                 return -EPERM;
1982
1983         switch (cmd) {
1984         case IPT_SO_GET_INFO:
1985                 ret = get_info(sock_net(sk), user, len, 1);
1986                 break;
1987         case IPT_SO_GET_ENTRIES:
1988                 ret = compat_get_entries(sock_net(sk), user, len);
1989                 break;
1990         default:
1991                 ret = do_ipt_get_ctl(sk, cmd, user, len);
1992         }
1993         return ret;
1994 }
1995 #endif
1996
1997 static int
1998 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1999 {
2000         int ret;
2001
2002         if (!capable(CAP_NET_ADMIN))
2003                 return -EPERM;
2004
2005         switch (cmd) {
2006         case IPT_SO_SET_REPLACE:
2007                 ret = do_replace(sock_net(sk), user, len);
2008                 break;
2009
2010         case IPT_SO_SET_ADD_COUNTERS:
2011                 ret = do_add_counters(sock_net(sk), user, len, 0);
2012                 break;
2013
2014         default:
2015                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
2016                 ret = -EINVAL;
2017         }
2018
2019         return ret;
2020 }
2021
2022 static int
2023 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2024 {
2025         int ret;
2026
2027         if (!capable(CAP_NET_ADMIN))
2028                 return -EPERM;
2029
2030         switch (cmd) {
2031         case IPT_SO_GET_INFO:
2032                 ret = get_info(sock_net(sk), user, len, 0);
2033                 break;
2034
2035         case IPT_SO_GET_ENTRIES:
2036                 ret = get_entries(sock_net(sk), user, len);
2037                 break;
2038
2039         case IPT_SO_GET_REVISION_MATCH:
2040         case IPT_SO_GET_REVISION_TARGET: {
2041                 struct ipt_get_revision rev;
2042                 int target;
2043
2044                 if (*len != sizeof(rev)) {
2045                         ret = -EINVAL;
2046                         break;
2047                 }
2048                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2049                         ret = -EFAULT;
2050                         break;
2051                 }
2052
2053                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2054                         target = 1;
2055                 else
2056                         target = 0;
2057
2058                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2059                                                          rev.revision,
2060                                                          target, &ret),
2061                                         "ipt_%s", rev.name);
2062                 break;
2063         }
2064
2065         default:
2066                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2067                 ret = -EINVAL;
2068         }
2069
2070         return ret;
2071 }
2072
2073 struct xt_table *ipt_register_table(struct net *net,
2074                                     const struct xt_table *table,
2075                                     const struct ipt_replace *repl)
2076 {
2077         int ret;
2078         struct xt_table_info *newinfo;
2079         struct xt_table_info bootstrap
2080                 = { 0, 0, 0, { 0 }, { 0 }, { } };
2081         void *loc_cpu_entry;
2082         struct xt_table *new_table;
2083
2084         newinfo = xt_alloc_table_info(repl->size);
2085         if (!newinfo) {
2086                 ret = -ENOMEM;
2087                 goto out;
2088         }
2089
2090         /* choose the copy on our node/cpu, but dont care about preemption */
2091         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2092         memcpy(loc_cpu_entry, repl->entries, repl->size);
2093
2094         ret = translate_table(net, table->name, table->valid_hooks,
2095                               newinfo, loc_cpu_entry, repl->size,
2096                               repl->num_entries,
2097                               repl->hook_entry,
2098                               repl->underflow);
2099         if (ret != 0)
2100                 goto out_free;
2101
2102         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2103         if (IS_ERR(new_table)) {
2104                 ret = PTR_ERR(new_table);
2105                 goto out_free;
2106         }
2107
2108         return new_table;
2109
2110 out_free:
2111         xt_free_table_info(newinfo);
2112 out:
2113         return ERR_PTR(ret);
2114 }
2115
2116 void ipt_unregister_table(struct net *net, struct xt_table *table)
2117 {
2118         struct xt_table_info *private;
2119         void *loc_cpu_entry;
2120         struct module *table_owner = table->me;
2121
2122         private = xt_unregister_table(table);
2123
2124         /* Decrease module usage counts and free resources */
2125         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2126         IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL);
2127         if (private->number > private->initial_entries)
2128                 module_put(table_owner);
2129         xt_free_table_info(private);
2130 }
2131
2132 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2133 static inline bool
2134 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2135                      u_int8_t type, u_int8_t code,
2136                      bool invert)
2137 {
2138         return ((test_type == 0xFF) ||
2139                 (type == test_type && code >= min_code && code <= max_code))
2140                 ^ invert;
2141 }
2142
2143 static bool
2144 icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2145 {
2146         const struct icmphdr *ic;
2147         struct icmphdr _icmph;
2148         const struct ipt_icmp *icmpinfo = par->matchinfo;
2149
2150         /* Must not be a fragment. */
2151         if (par->fragoff != 0)
2152                 return false;
2153
2154         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
2155         if (ic == NULL) {
2156                 /* We've been asked to examine this packet, and we
2157                  * can't.  Hence, no choice but to drop.
2158                  */
2159                 duprintf("Dropping evil ICMP tinygram.\n");
2160                 *par->hotdrop = true;
2161                 return false;
2162         }
2163
2164         return icmp_type_code_match(icmpinfo->type,
2165                                     icmpinfo->code[0],
2166                                     icmpinfo->code[1],
2167                                     ic->type, ic->code,
2168                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2169 }
2170
2171 static bool icmp_checkentry(const struct xt_mtchk_param *par)
2172 {
2173         const struct ipt_icmp *icmpinfo = par->matchinfo;
2174
2175         /* Must specify no unknown invflags */
2176         return !(icmpinfo->invflags & ~IPT_ICMP_INV);
2177 }
2178
2179 /* The built-in targets: standard (NULL) and error. */
2180 static struct xt_target ipt_standard_target __read_mostly = {
2181         .name           = IPT_STANDARD_TARGET,
2182         .targetsize     = sizeof(int),
2183         .family         = NFPROTO_IPV4,
2184 #ifdef CONFIG_COMPAT
2185         .compatsize     = sizeof(compat_int_t),
2186         .compat_from_user = compat_standard_from_user,
2187         .compat_to_user = compat_standard_to_user,
2188 #endif
2189 };
2190
2191 static struct xt_target ipt_error_target __read_mostly = {
2192         .name           = IPT_ERROR_TARGET,
2193         .target         = ipt_error,
2194         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2195         .family         = NFPROTO_IPV4,
2196 };
2197
2198 static struct nf_sockopt_ops ipt_sockopts = {
2199         .pf             = PF_INET,
2200         .set_optmin     = IPT_BASE_CTL,
2201         .set_optmax     = IPT_SO_SET_MAX+1,
2202         .set            = do_ipt_set_ctl,
2203 #ifdef CONFIG_COMPAT
2204         .compat_set     = compat_do_ipt_set_ctl,
2205 #endif
2206         .get_optmin     = IPT_BASE_CTL,
2207         .get_optmax     = IPT_SO_GET_MAX+1,
2208         .get            = do_ipt_get_ctl,
2209 #ifdef CONFIG_COMPAT
2210         .compat_get     = compat_do_ipt_get_ctl,
2211 #endif
2212         .owner          = THIS_MODULE,
2213 };
2214
2215 static struct xt_match icmp_matchstruct __read_mostly = {
2216         .name           = "icmp",
2217         .match          = icmp_match,
2218         .matchsize      = sizeof(struct ipt_icmp),
2219         .checkentry     = icmp_checkentry,
2220         .proto          = IPPROTO_ICMP,
2221         .family         = NFPROTO_IPV4,
2222 };
2223
2224 static int __net_init ip_tables_net_init(struct net *net)
2225 {
2226         return xt_proto_init(net, NFPROTO_IPV4);
2227 }
2228
2229 static void __net_exit ip_tables_net_exit(struct net *net)
2230 {
2231         xt_proto_fini(net, NFPROTO_IPV4);
2232 }
2233
2234 static struct pernet_operations ip_tables_net_ops = {
2235         .init = ip_tables_net_init,
2236         .exit = ip_tables_net_exit,
2237 };
2238
2239 static int __init ip_tables_init(void)
2240 {
2241         int ret;
2242
2243         ret = register_pernet_subsys(&ip_tables_net_ops);
2244         if (ret < 0)
2245                 goto err1;
2246
2247         /* Noone else will be downing sem now, so we won't sleep */
2248         ret = xt_register_target(&ipt_standard_target);
2249         if (ret < 0)
2250                 goto err2;
2251         ret = xt_register_target(&ipt_error_target);
2252         if (ret < 0)
2253                 goto err3;
2254         ret = xt_register_match(&icmp_matchstruct);
2255         if (ret < 0)
2256                 goto err4;
2257
2258         /* Register setsockopt */
2259         ret = nf_register_sockopt(&ipt_sockopts);
2260         if (ret < 0)
2261                 goto err5;
2262
2263         printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
2264         return 0;
2265
2266 err5:
2267         xt_unregister_match(&icmp_matchstruct);
2268 err4:
2269         xt_unregister_target(&ipt_error_target);
2270 err3:
2271         xt_unregister_target(&ipt_standard_target);
2272 err2:
2273         unregister_pernet_subsys(&ip_tables_net_ops);
2274 err1:
2275         return ret;
2276 }
2277
2278 static void __exit ip_tables_fini(void)
2279 {
2280         nf_unregister_sockopt(&ipt_sockopts);
2281
2282         xt_unregister_match(&icmp_matchstruct);
2283         xt_unregister_target(&ipt_error_target);
2284         xt_unregister_target(&ipt_standard_target);
2285
2286         unregister_pernet_subsys(&ip_tables_net_ops);
2287 }
2288
2289 EXPORT_SYMBOL(ipt_register_table);
2290 EXPORT_SYMBOL(ipt_unregister_table);
2291 EXPORT_SYMBOL(ipt_do_table);
2292 module_init(ip_tables_init);
2293 module_exit(ip_tables_fini);