]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/netfilter/ip_tables.c
[PATCH] capable/capability.h (net/)
[net-next-2.6.git] / net / ipv4 / netfilter / ip_tables.c
CommitLineData
1da177e4
LT
1/*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 */
15#include <linux/config.h>
16#include <linux/cache.h>
4fc268d2 17#include <linux/capability.h>
1da177e4
LT
18#include <linux/skbuff.h>
19#include <linux/kmod.h>
20#include <linux/vmalloc.h>
21#include <linux/netdevice.h>
22#include <linux/module.h>
23#include <linux/tcp.h>
24#include <linux/udp.h>
25#include <linux/icmp.h>
26#include <net/ip.h>
27#include <asm/uaccess.h>
28#include <asm/semaphore.h>
29#include <linux/proc_fs.h>
30#include <linux/err.h>
c8923c6b 31#include <linux/cpumask.h>
1da177e4
LT
32
33#include <linux/netfilter_ipv4/ip_tables.h>
34
35MODULE_LICENSE("GPL");
36MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
37MODULE_DESCRIPTION("IPv4 packet filter");
38
39/*#define DEBUG_IP_FIREWALL*/
40/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
41/*#define DEBUG_IP_FIREWALL_USER*/
42
43#ifdef DEBUG_IP_FIREWALL
44#define dprintf(format, args...) printk(format , ## args)
45#else
46#define dprintf(format, args...)
47#endif
48
49#ifdef DEBUG_IP_FIREWALL_USER
50#define duprintf(format, args...) printk(format , ## args)
51#else
52#define duprintf(format, args...)
53#endif
54
55#ifdef CONFIG_NETFILTER_DEBUG
56#define IP_NF_ASSERT(x) \
57do { \
58 if (!(x)) \
59 printk("IP_NF_ASSERT: %s:%s:%u\n", \
60 __FUNCTION__, __FILE__, __LINE__); \
61} while(0)
62#else
63#define IP_NF_ASSERT(x)
64#endif
65#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
66
67static DECLARE_MUTEX(ipt_mutex);
68
69/* Must have mutex */
70#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
71#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
1da177e4
LT
72#include <linux/netfilter_ipv4/listhelp.h>
73
74#if 0
75/* All the better to debug you with... */
76#define static
77#define inline
78#endif
79
80/*
81 We keep a set of rules for each CPU, so we can avoid write-locking
82 them in the softirq when updating the counters and therefore
83 only need to read-lock in the softirq; doing a write_lock_bh() in user
84 context stops packets coming through and allows user context to read
85 the counters or update the rules.
86
1da177e4
LT
87 Hence the start of any table is given by get_table() below. */
88
89/* The table itself */
90struct ipt_table_info
91{
92 /* Size per table */
93 unsigned int size;
94 /* Number of entries: FIXME. --RR */
95 unsigned int number;
96 /* Initial number of entries. Needed for module usage count */
97 unsigned int initial_entries;
98
99 /* Entry points and underflows */
100 unsigned int hook_entry[NF_IP_NUMHOOKS];
101 unsigned int underflow[NF_IP_NUMHOOKS];
102
103 /* ipt_entry tables: one per CPU */
31836064 104 void *entries[NR_CPUS];
1da177e4
LT
105};
106
107static LIST_HEAD(ipt_target);
108static LIST_HEAD(ipt_match);
109static LIST_HEAD(ipt_tables);
31836064 110#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
1da177e4
LT
111#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
112
1da177e4
LT
113#if 0
114#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
115#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
116#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
117#endif
118
119/* Returns whether matches rule or not. */
120static inline int
121ip_packet_match(const struct iphdr *ip,
122 const char *indev,
123 const char *outdev,
124 const struct ipt_ip *ipinfo,
125 int isfrag)
126{
127 size_t i;
128 unsigned long ret;
129
130#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
131
132 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
133 IPT_INV_SRCIP)
134 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
135 IPT_INV_DSTIP)) {
136 dprintf("Source or dest mismatch.\n");
137
138 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
139 NIPQUAD(ip->saddr),
140 NIPQUAD(ipinfo->smsk.s_addr),
141 NIPQUAD(ipinfo->src.s_addr),
142 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
143 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
144 NIPQUAD(ip->daddr),
145 NIPQUAD(ipinfo->dmsk.s_addr),
146 NIPQUAD(ipinfo->dst.s_addr),
147 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
148 return 0;
149 }
150
151 /* Look for ifname matches; this should unroll nicely. */
152 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
153 ret |= (((const unsigned long *)indev)[i]
154 ^ ((const unsigned long *)ipinfo->iniface)[i])
155 & ((const unsigned long *)ipinfo->iniface_mask)[i];
156 }
157
158 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
159 dprintf("VIA in mismatch (%s vs %s).%s\n",
160 indev, ipinfo->iniface,
161 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
162 return 0;
163 }
164
165 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
166 ret |= (((const unsigned long *)outdev)[i]
167 ^ ((const unsigned long *)ipinfo->outiface)[i])
168 & ((const unsigned long *)ipinfo->outiface_mask)[i];
169 }
170
171 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
172 dprintf("VIA out mismatch (%s vs %s).%s\n",
173 outdev, ipinfo->outiface,
174 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
175 return 0;
176 }
177
178 /* Check specific protocol */
179 if (ipinfo->proto
180 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
181 dprintf("Packet protocol %hi does not match %hi.%s\n",
182 ip->protocol, ipinfo->proto,
183 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
184 return 0;
185 }
186
187 /* If we have a fragment rule but the packet is not a fragment
188 * then we return zero */
189 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
190 dprintf("Fragment rule but not fragment.%s\n",
191 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
192 return 0;
193 }
194
195 return 1;
196}
197
198static inline int
199ip_checkentry(const struct ipt_ip *ip)
200{
201 if (ip->flags & ~IPT_F_MASK) {
202 duprintf("Unknown flag bits set: %08X\n",
203 ip->flags & ~IPT_F_MASK);
204 return 0;
205 }
206 if (ip->invflags & ~IPT_INV_MASK) {
207 duprintf("Unknown invflag bits set: %08X\n",
208 ip->invflags & ~IPT_INV_MASK);
209 return 0;
210 }
211 return 1;
212}
213
214static unsigned int
215ipt_error(struct sk_buff **pskb,
216 const struct net_device *in,
217 const struct net_device *out,
218 unsigned int hooknum,
219 const void *targinfo,
220 void *userinfo)
221{
222 if (net_ratelimit())
223 printk("ip_tables: error: `%s'\n", (char *)targinfo);
224
225 return NF_DROP;
226}
227
228static inline
229int do_match(struct ipt_entry_match *m,
230 const struct sk_buff *skb,
231 const struct net_device *in,
232 const struct net_device *out,
233 int offset,
234 int *hotdrop)
235{
236 /* Stop iteration if it doesn't match */
237 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
238 return 1;
239 else
240 return 0;
241}
242
243static inline struct ipt_entry *
244get_entry(void *base, unsigned int offset)
245{
246 return (struct ipt_entry *)(base + offset);
247}
248
249/* Returns one of the generic firewall policies, like NF_ACCEPT. */
250unsigned int
251ipt_do_table(struct sk_buff **pskb,
252 unsigned int hook,
253 const struct net_device *in,
254 const struct net_device *out,
255 struct ipt_table *table,
256 void *userdata)
257{
258 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
259 u_int16_t offset;
260 struct iphdr *ip;
261 u_int16_t datalen;
262 int hotdrop = 0;
263 /* Initializing verdict to NF_DROP keeps gcc happy. */
264 unsigned int verdict = NF_DROP;
265 const char *indev, *outdev;
266 void *table_base;
267 struct ipt_entry *e, *back;
268
269 /* Initialization */
270 ip = (*pskb)->nh.iph;
271 datalen = (*pskb)->len - ip->ihl * 4;
272 indev = in ? in->name : nulldevname;
273 outdev = out ? out->name : nulldevname;
274 /* We handle fragments by dealing with the first fragment as
275 * if it was a normal packet. All other fragments are treated
276 * normally, except that they will NEVER match rules that ask
277 * things we don't know, ie. tcp syn flag or ports). If the
278 * rule is also a fragment-specific rule, non-fragments won't
279 * match it. */
280 offset = ntohs(ip->frag_off) & IP_OFFSET;
281
282 read_lock_bh(&table->lock);
283 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
31836064 284 table_base = (void *)table->private->entries[smp_processor_id()];
1da177e4
LT
285 e = get_entry(table_base, table->private->hook_entry[hook]);
286
287#ifdef CONFIG_NETFILTER_DEBUG
288 /* Check noone else using our table */
289 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
290 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
291 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
292 smp_processor_id(),
293 table->name,
294 &((struct ipt_entry *)table_base)->comefrom,
295 ((struct ipt_entry *)table_base)->comefrom);
296 }
297 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
298#endif
299
300 /* For return from builtin chain */
301 back = get_entry(table_base, table->private->underflow[hook]);
302
303 do {
304 IP_NF_ASSERT(e);
305 IP_NF_ASSERT(back);
1da177e4
LT
306 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
307 struct ipt_entry_target *t;
308
309 if (IPT_MATCH_ITERATE(e, do_match,
310 *pskb, in, out,
311 offset, &hotdrop) != 0)
312 goto no_match;
313
314 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
315
316 t = ipt_get_target(e);
317 IP_NF_ASSERT(t->u.kernel.target);
318 /* Standard target? */
319 if (!t->u.kernel.target->target) {
320 int v;
321
322 v = ((struct ipt_standard_target *)t)->verdict;
323 if (v < 0) {
324 /* Pop from stack? */
325 if (v != IPT_RETURN) {
326 verdict = (unsigned)(-v) - 1;
327 break;
328 }
329 e = back;
330 back = get_entry(table_base,
331 back->comefrom);
332 continue;
333 }
05465343
PM
334 if (table_base + v != (void *)e + e->next_offset
335 && !(e->ip.flags & IPT_F_GOTO)) {
1da177e4
LT
336 /* Save old back ptr in next entry */
337 struct ipt_entry *next
338 = (void *)e + e->next_offset;
339 next->comefrom
340 = (void *)back - table_base;
341 /* set back pointer to next entry */
342 back = next;
343 }
344
345 e = get_entry(table_base, v);
346 } else {
347 /* Targets which reenter must return
348 abs. verdicts */
349#ifdef CONFIG_NETFILTER_DEBUG
350 ((struct ipt_entry *)table_base)->comefrom
351 = 0xeeeeeeec;
352#endif
353 verdict = t->u.kernel.target->target(pskb,
354 in, out,
355 hook,
356 t->data,
357 userdata);
358
359#ifdef CONFIG_NETFILTER_DEBUG
360 if (((struct ipt_entry *)table_base)->comefrom
361 != 0xeeeeeeec
362 && verdict == IPT_CONTINUE) {
363 printk("Target %s reentered!\n",
364 t->u.kernel.target->name);
365 verdict = NF_DROP;
366 }
367 ((struct ipt_entry *)table_base)->comefrom
368 = 0x57acc001;
369#endif
370 /* Target might have changed stuff. */
371 ip = (*pskb)->nh.iph;
372 datalen = (*pskb)->len - ip->ihl * 4;
373
374 if (verdict == IPT_CONTINUE)
375 e = (void *)e + e->next_offset;
376 else
377 /* Verdict */
378 break;
379 }
380 } else {
381
382 no_match:
383 e = (void *)e + e->next_offset;
384 }
385 } while (!hotdrop);
386
387#ifdef CONFIG_NETFILTER_DEBUG
388 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
389#endif
390 read_unlock_bh(&table->lock);
391
392#ifdef DEBUG_ALLOW_ALL
393 return NF_ACCEPT;
394#else
395 if (hotdrop)
396 return NF_DROP;
397 else return verdict;
398#endif
399}
400
401/*
402 * These are weird, but module loading must not be done with mutex
403 * held (since they will register), and we have to have a single
404 * function to use try_then_request_module().
405 */
406
407/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
408static inline struct ipt_table *find_table_lock(const char *name)
409{
410 struct ipt_table *t;
411
412 if (down_interruptible(&ipt_mutex) != 0)
413 return ERR_PTR(-EINTR);
414
415 list_for_each_entry(t, &ipt_tables, list)
416 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
417 return t;
418 up(&ipt_mutex);
419 return NULL;
420}
421
422/* Find match, grabs ref. Returns ERR_PTR() on error. */
423static inline struct ipt_match *find_match(const char *name, u8 revision)
424{
425 struct ipt_match *m;
426 int err = 0;
427
428 if (down_interruptible(&ipt_mutex) != 0)
429 return ERR_PTR(-EINTR);
430
431 list_for_each_entry(m, &ipt_match, list) {
432 if (strcmp(m->name, name) == 0) {
433 if (m->revision == revision) {
434 if (try_module_get(m->me)) {
435 up(&ipt_mutex);
436 return m;
437 }
438 } else
439 err = -EPROTOTYPE; /* Found something. */
440 }
441 }
442 up(&ipt_mutex);
443 return ERR_PTR(err);
444}
445
446/* Find target, grabs ref. Returns ERR_PTR() on error. */
447static inline struct ipt_target *find_target(const char *name, u8 revision)
448{
449 struct ipt_target *t;
450 int err = 0;
451
452 if (down_interruptible(&ipt_mutex) != 0)
453 return ERR_PTR(-EINTR);
454
455 list_for_each_entry(t, &ipt_target, list) {
456 if (strcmp(t->name, name) == 0) {
457 if (t->revision == revision) {
458 if (try_module_get(t->me)) {
459 up(&ipt_mutex);
460 return t;
461 }
462 } else
463 err = -EPROTOTYPE; /* Found something. */
464 }
465 }
466 up(&ipt_mutex);
467 return ERR_PTR(err);
468}
469
470struct ipt_target *ipt_find_target(const char *name, u8 revision)
471{
472 struct ipt_target *target;
473
474 target = try_then_request_module(find_target(name, revision),
475 "ipt_%s", name);
476 if (IS_ERR(target) || !target)
477 return NULL;
478 return target;
479}
480
481static int match_revfn(const char *name, u8 revision, int *bestp)
482{
483 struct ipt_match *m;
484 int have_rev = 0;
485
486 list_for_each_entry(m, &ipt_match, list) {
487 if (strcmp(m->name, name) == 0) {
488 if (m->revision > *bestp)
489 *bestp = m->revision;
490 if (m->revision == revision)
491 have_rev = 1;
492 }
493 }
494 return have_rev;
495}
496
497static int target_revfn(const char *name, u8 revision, int *bestp)
498{
499 struct ipt_target *t;
500 int have_rev = 0;
501
502 list_for_each_entry(t, &ipt_target, list) {
503 if (strcmp(t->name, name) == 0) {
504 if (t->revision > *bestp)
505 *bestp = t->revision;
506 if (t->revision == revision)
507 have_rev = 1;
508 }
509 }
510 return have_rev;
511}
512
513/* Returns true or false (if no such extension at all) */
514static inline int find_revision(const char *name, u8 revision,
515 int (*revfn)(const char *, u8, int *),
516 int *err)
517{
518 int have_rev, best = -1;
519
520 if (down_interruptible(&ipt_mutex) != 0) {
521 *err = -EINTR;
522 return 1;
523 }
524 have_rev = revfn(name, revision, &best);
525 up(&ipt_mutex);
526
527 /* Nothing at all? Return 0 to try loading module. */
528 if (best == -1) {
529 *err = -ENOENT;
530 return 0;
531 }
532
533 *err = best;
534 if (!have_rev)
535 *err = -EPROTONOSUPPORT;
536 return 1;
537}
538
539
540/* All zeroes == unconditional rule. */
541static inline int
542unconditional(const struct ipt_ip *ip)
543{
544 unsigned int i;
545
546 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
547 if (((__u32 *)ip)[i])
548 return 0;
549
550 return 1;
551}
552
553/* Figures out from what hook each rule can be called: returns 0 if
554 there are loops. Puts hook bitmask in comefrom. */
555static int
31836064
ED
556mark_source_chains(struct ipt_table_info *newinfo,
557 unsigned int valid_hooks, void *entry0)
1da177e4
LT
558{
559 unsigned int hook;
560
561 /* No recursion; use packet counter to save back ptrs (reset
562 to 0 as we leave), and comefrom to save source hook bitmask */
563 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
564 unsigned int pos = newinfo->hook_entry[hook];
565 struct ipt_entry *e
31836064 566 = (struct ipt_entry *)(entry0 + pos);
1da177e4
LT
567
568 if (!(valid_hooks & (1 << hook)))
569 continue;
570
571 /* Set initial back pointer. */
572 e->counters.pcnt = pos;
573
574 for (;;) {
575 struct ipt_standard_target *t
576 = (void *)ipt_get_target(e);
577
578 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
579 printk("iptables: loop hook %u pos %u %08X.\n",
580 hook, pos, e->comefrom);
581 return 0;
582 }
583 e->comefrom
584 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
585
586 /* Unconditional return/END. */
587 if (e->target_offset == sizeof(struct ipt_entry)
588 && (strcmp(t->target.u.user.name,
589 IPT_STANDARD_TARGET) == 0)
590 && t->verdict < 0
591 && unconditional(&e->ip)) {
592 unsigned int oldpos, size;
593
594 /* Return: backtrack through the last
595 big jump. */
596 do {
597 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
598#ifdef DEBUG_IP_FIREWALL_USER
599 if (e->comefrom
600 & (1 << NF_IP_NUMHOOKS)) {
601 duprintf("Back unset "
602 "on hook %u "
603 "rule %u\n",
604 hook, pos);
605 }
606#endif
607 oldpos = pos;
608 pos = e->counters.pcnt;
609 e->counters.pcnt = 0;
610
611 /* We're at the start. */
612 if (pos == oldpos)
613 goto next;
614
615 e = (struct ipt_entry *)
31836064 616 (entry0 + pos);
1da177e4
LT
617 } while (oldpos == pos + e->next_offset);
618
619 /* Move along one */
620 size = e->next_offset;
621 e = (struct ipt_entry *)
31836064 622 (entry0 + pos + size);
1da177e4
LT
623 e->counters.pcnt = pos;
624 pos += size;
625 } else {
626 int newpos = t->verdict;
627
628 if (strcmp(t->target.u.user.name,
629 IPT_STANDARD_TARGET) == 0
630 && newpos >= 0) {
631 /* This a jump; chase it. */
632 duprintf("Jump rule %u -> %u\n",
633 pos, newpos);
634 } else {
635 /* ... this is a fallthru */
636 newpos = pos + e->next_offset;
637 }
638 e = (struct ipt_entry *)
31836064 639 (entry0 + newpos);
1da177e4
LT
640 e->counters.pcnt = pos;
641 pos = newpos;
642 }
643 }
644 next:
645 duprintf("Finished chain %u\n", hook);
646 }
647 return 1;
648}
649
650static inline int
651cleanup_match(struct ipt_entry_match *m, unsigned int *i)
652{
653 if (i && (*i)-- == 0)
654 return 1;
655
656 if (m->u.kernel.match->destroy)
657 m->u.kernel.match->destroy(m->data,
658 m->u.match_size - sizeof(*m));
659 module_put(m->u.kernel.match->me);
660 return 0;
661}
662
663static inline int
664standard_check(const struct ipt_entry_target *t,
665 unsigned int max_offset)
666{
667 struct ipt_standard_target *targ = (void *)t;
668
669 /* Check standard info. */
670 if (t->u.target_size
671 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
672 duprintf("standard_check: target size %u != %u\n",
673 t->u.target_size,
674 IPT_ALIGN(sizeof(struct ipt_standard_target)));
675 return 0;
676 }
677
678 if (targ->verdict >= 0
679 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
680 duprintf("ipt_standard_check: bad verdict (%i)\n",
681 targ->verdict);
682 return 0;
683 }
684
685 if (targ->verdict < -NF_MAX_VERDICT - 1) {
686 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
687 targ->verdict);
688 return 0;
689 }
690 return 1;
691}
692
693static inline int
694check_match(struct ipt_entry_match *m,
695 const char *name,
696 const struct ipt_ip *ip,
697 unsigned int hookmask,
698 unsigned int *i)
699{
700 struct ipt_match *match;
701
702 match = try_then_request_module(find_match(m->u.user.name,
703 m->u.user.revision),
704 "ipt_%s", m->u.user.name);
705 if (IS_ERR(match) || !match) {
706 duprintf("check_match: `%s' not found\n", m->u.user.name);
707 return match ? PTR_ERR(match) : -ENOENT;
708 }
709 m->u.kernel.match = match;
710
711 if (m->u.kernel.match->checkentry
712 && !m->u.kernel.match->checkentry(name, ip, m->data,
713 m->u.match_size - sizeof(*m),
714 hookmask)) {
715 module_put(m->u.kernel.match->me);
716 duprintf("ip_tables: check failed for `%s'.\n",
717 m->u.kernel.match->name);
718 return -EINVAL;
719 }
720
721 (*i)++;
722 return 0;
723}
724
725static struct ipt_target ipt_standard_target;
726
727static inline int
728check_entry(struct ipt_entry *e, const char *name, unsigned int size,
729 unsigned int *i)
730{
731 struct ipt_entry_target *t;
732 struct ipt_target *target;
733 int ret;
734 unsigned int j;
735
736 if (!ip_checkentry(&e->ip)) {
737 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
738 return -EINVAL;
739 }
740
741 j = 0;
742 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
743 if (ret != 0)
744 goto cleanup_matches;
745
746 t = ipt_get_target(e);
747 target = try_then_request_module(find_target(t->u.user.name,
748 t->u.user.revision),
749 "ipt_%s", t->u.user.name);
750 if (IS_ERR(target) || !target) {
751 duprintf("check_entry: `%s' not found\n", t->u.user.name);
752 ret = target ? PTR_ERR(target) : -ENOENT;
753 goto cleanup_matches;
754 }
755 t->u.kernel.target = target;
756
757 if (t->u.kernel.target == &ipt_standard_target) {
758 if (!standard_check(t, size)) {
759 ret = -EINVAL;
760 goto cleanup_matches;
761 }
762 } else if (t->u.kernel.target->checkentry
763 && !t->u.kernel.target->checkentry(name, e, t->data,
764 t->u.target_size
765 - sizeof(*t),
766 e->comefrom)) {
767 module_put(t->u.kernel.target->me);
768 duprintf("ip_tables: check failed for `%s'.\n",
769 t->u.kernel.target->name);
770 ret = -EINVAL;
771 goto cleanup_matches;
772 }
773
774 (*i)++;
775 return 0;
776
777 cleanup_matches:
778 IPT_MATCH_ITERATE(e, cleanup_match, &j);
779 return ret;
780}
781
782static inline int
783check_entry_size_and_hooks(struct ipt_entry *e,
784 struct ipt_table_info *newinfo,
785 unsigned char *base,
786 unsigned char *limit,
787 const unsigned int *hook_entries,
788 const unsigned int *underflows,
789 unsigned int *i)
790{
791 unsigned int h;
792
793 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
794 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
795 duprintf("Bad offset %p\n", e);
796 return -EINVAL;
797 }
798
799 if (e->next_offset
800 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
801 duprintf("checking: element %p size %u\n",
802 e, e->next_offset);
803 return -EINVAL;
804 }
805
806 /* Check hooks & underflows */
807 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
808 if ((unsigned char *)e - base == hook_entries[h])
809 newinfo->hook_entry[h] = hook_entries[h];
810 if ((unsigned char *)e - base == underflows[h])
811 newinfo->underflow[h] = underflows[h];
812 }
813
814 /* FIXME: underflows must be unconditional, standard verdicts
815 < 0 (not IPT_RETURN). --RR */
816
817 /* Clear counters and comefrom */
818 e->counters = ((struct ipt_counters) { 0, 0 });
819 e->comefrom = 0;
820
821 (*i)++;
822 return 0;
823}
824
825static inline int
826cleanup_entry(struct ipt_entry *e, unsigned int *i)
827{
828 struct ipt_entry_target *t;
829
830 if (i && (*i)-- == 0)
831 return 1;
832
833 /* Cleanup all matches */
834 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
835 t = ipt_get_target(e);
836 if (t->u.kernel.target->destroy)
837 t->u.kernel.target->destroy(t->data,
838 t->u.target_size - sizeof(*t));
839 module_put(t->u.kernel.target->me);
840 return 0;
841}
842
843/* Checks and translates the user-supplied table segment (held in
844 newinfo) */
845static int
846translate_table(const char *name,
847 unsigned int valid_hooks,
848 struct ipt_table_info *newinfo,
31836064 849 void *entry0,
1da177e4
LT
850 unsigned int size,
851 unsigned int number,
852 const unsigned int *hook_entries,
853 const unsigned int *underflows)
854{
855 unsigned int i;
856 int ret;
857
858 newinfo->size = size;
859 newinfo->number = number;
860
861 /* Init all hooks to impossible value. */
862 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
863 newinfo->hook_entry[i] = 0xFFFFFFFF;
864 newinfo->underflow[i] = 0xFFFFFFFF;
865 }
866
867 duprintf("translate_table: size %u\n", newinfo->size);
868 i = 0;
869 /* Walk through entries, checking offsets. */
31836064 870 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
1da177e4
LT
871 check_entry_size_and_hooks,
872 newinfo,
31836064
ED
873 entry0,
874 entry0 + size,
1da177e4
LT
875 hook_entries, underflows, &i);
876 if (ret != 0)
877 return ret;
878
879 if (i != number) {
880 duprintf("translate_table: %u not %u entries\n",
881 i, number);
882 return -EINVAL;
883 }
884
885 /* Check hooks all assigned */
886 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
887 /* Only hooks which are valid */
888 if (!(valid_hooks & (1 << i)))
889 continue;
890 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
891 duprintf("Invalid hook entry %u %u\n",
892 i, hook_entries[i]);
893 return -EINVAL;
894 }
895 if (newinfo->underflow[i] == 0xFFFFFFFF) {
896 duprintf("Invalid underflow %u %u\n",
897 i, underflows[i]);
898 return -EINVAL;
899 }
900 }
901
31836064 902 if (!mark_source_chains(newinfo, valid_hooks, entry0))
1da177e4
LT
903 return -ELOOP;
904
905 /* Finally, each sanity check must pass */
906 i = 0;
31836064 907 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
1da177e4
LT
908 check_entry, name, size, &i);
909
910 if (ret != 0) {
31836064 911 IPT_ENTRY_ITERATE(entry0, newinfo->size,
1da177e4
LT
912 cleanup_entry, &i);
913 return ret;
914 }
915
916 /* And one copy for every other CPU */
c8923c6b 917 for_each_cpu(i) {
31836064
ED
918 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
919 memcpy(newinfo->entries[i], entry0, newinfo->size);
1da177e4
LT
920 }
921
922 return ret;
923}
924
925static struct ipt_table_info *
926replace_table(struct ipt_table *table,
927 unsigned int num_counters,
928 struct ipt_table_info *newinfo,
929 int *error)
930{
931 struct ipt_table_info *oldinfo;
932
933#ifdef CONFIG_NETFILTER_DEBUG
934 {
31836064 935 int cpu;
1da177e4 936
31836064
ED
937 for_each_cpu(cpu) {
938 struct ipt_entry *table_base = newinfo->entries[cpu];
939 if (table_base)
940 table_base->comefrom = 0xdead57ac;
1da177e4
LT
941 }
942 }
943#endif
944
945 /* Do the substitution. */
946 write_lock_bh(&table->lock);
947 /* Check inside lock: is the old number correct? */
948 if (num_counters != table->private->number) {
949 duprintf("num_counters != table->private->number (%u/%u)\n",
950 num_counters, table->private->number);
951 write_unlock_bh(&table->lock);
952 *error = -EAGAIN;
953 return NULL;
954 }
955 oldinfo = table->private;
956 table->private = newinfo;
957 newinfo->initial_entries = oldinfo->initial_entries;
958 write_unlock_bh(&table->lock);
959
960 return oldinfo;
961}
962
963/* Gets counters. */
964static inline int
965add_entry_to_counter(const struct ipt_entry *e,
966 struct ipt_counters total[],
967 unsigned int *i)
968{
969 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
970
971 (*i)++;
972 return 0;
973}
974
31836064
ED
975static inline int
976set_entry_to_counter(const struct ipt_entry *e,
977 struct ipt_counters total[],
978 unsigned int *i)
979{
980 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
981
982 (*i)++;
983 return 0;
984}
985
1da177e4
LT
986static void
987get_counters(const struct ipt_table_info *t,
988 struct ipt_counters counters[])
989{
990 unsigned int cpu;
991 unsigned int i;
31836064
ED
992 unsigned int curcpu;
993
994 /* Instead of clearing (by a previous call to memset())
995 * the counters and using adds, we set the counters
996 * with data used by 'current' CPU
997 * We dont care about preemption here.
998 */
999 curcpu = raw_smp_processor_id();
1000
1001 i = 0;
1002 IPT_ENTRY_ITERATE(t->entries[curcpu],
1003 t->size,
1004 set_entry_to_counter,
1005 counters,
1006 &i);
1da177e4 1007
c8923c6b 1008 for_each_cpu(cpu) {
31836064
ED
1009 if (cpu == curcpu)
1010 continue;
1da177e4 1011 i = 0;
31836064 1012 IPT_ENTRY_ITERATE(t->entries[cpu],
1da177e4
LT
1013 t->size,
1014 add_entry_to_counter,
1015 counters,
1016 &i);
1017 }
1018}
1019
1020static int
1021copy_entries_to_user(unsigned int total_size,
1022 struct ipt_table *table,
1023 void __user *userptr)
1024{
1025 unsigned int off, num, countersize;
1026 struct ipt_entry *e;
1027 struct ipt_counters *counters;
1028 int ret = 0;
31836064 1029 void *loc_cpu_entry;
1da177e4
LT
1030
1031 /* We need atomic snapshot of counters: rest doesn't change
1032 (other than comefrom, which userspace doesn't care
1033 about). */
1034 countersize = sizeof(struct ipt_counters) * table->private->number;
31836064 1035 counters = vmalloc_node(countersize, numa_node_id());
1da177e4
LT
1036
1037 if (counters == NULL)
1038 return -ENOMEM;
1039
1040 /* First, sum counters... */
1da177e4
LT
1041 write_lock_bh(&table->lock);
1042 get_counters(table->private, counters);
1043 write_unlock_bh(&table->lock);
1044
31836064
ED
1045 /* choose the copy that is on our node/cpu, ...
1046 * This choice is lazy (because current thread is
1047 * allowed to migrate to another cpu)
1048 */
1049 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1050 /* ... then copy entire thing ... */
1051 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1da177e4
LT
1052 ret = -EFAULT;
1053 goto free_counters;
1054 }
1055
1056 /* FIXME: use iterator macros --RR */
1057 /* ... then go back and fix counters and names */
1058 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1059 unsigned int i;
1060 struct ipt_entry_match *m;
1061 struct ipt_entry_target *t;
1062
31836064 1063 e = (struct ipt_entry *)(loc_cpu_entry + off);
1da177e4
LT
1064 if (copy_to_user(userptr + off
1065 + offsetof(struct ipt_entry, counters),
1066 &counters[num],
1067 sizeof(counters[num])) != 0) {
1068 ret = -EFAULT;
1069 goto free_counters;
1070 }
1071
1072 for (i = sizeof(struct ipt_entry);
1073 i < e->target_offset;
1074 i += m->u.match_size) {
1075 m = (void *)e + i;
1076
1077 if (copy_to_user(userptr + off + i
1078 + offsetof(struct ipt_entry_match,
1079 u.user.name),
1080 m->u.kernel.match->name,
1081 strlen(m->u.kernel.match->name)+1)
1082 != 0) {
1083 ret = -EFAULT;
1084 goto free_counters;
1085 }
1086 }
1087
1088 t = ipt_get_target(e);
1089 if (copy_to_user(userptr + off + e->target_offset
1090 + offsetof(struct ipt_entry_target,
1091 u.user.name),
1092 t->u.kernel.target->name,
1093 strlen(t->u.kernel.target->name)+1) != 0) {
1094 ret = -EFAULT;
1095 goto free_counters;
1096 }
1097 }
1098
1099 free_counters:
1100 vfree(counters);
1101 return ret;
1102}
1103
1104static int
1105get_entries(const struct ipt_get_entries *entries,
1106 struct ipt_get_entries __user *uptr)
1107{
1108 int ret;
1109 struct ipt_table *t;
1110
1111 t = find_table_lock(entries->name);
1112 if (t && !IS_ERR(t)) {
1113 duprintf("t->private->number = %u\n",
1114 t->private->number);
1115 if (entries->size == t->private->size)
1116 ret = copy_entries_to_user(t->private->size,
1117 t, uptr->entrytable);
1118 else {
1119 duprintf("get_entries: I've got %u not %u!\n",
1120 t->private->size,
1121 entries->size);
1122 ret = -EINVAL;
1123 }
1124 module_put(t->me);
1125 up(&ipt_mutex);
1126 } else
1127 ret = t ? PTR_ERR(t) : -ENOENT;
1128
1129 return ret;
1130}
1131
31836064
ED
1132static void free_table_info(struct ipt_table_info *info)
1133{
1134 int cpu;
1135 for_each_cpu(cpu) {
1136 if (info->size <= PAGE_SIZE)
1137 kfree(info->entries[cpu]);
1138 else
1139 vfree(info->entries[cpu]);
1140 }
1141 kfree(info);
1142}
1143
1144static struct ipt_table_info *alloc_table_info(unsigned int size)
1145{
1146 struct ipt_table_info *newinfo;
1147 int cpu;
1148
1149 newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL);
1150 if (!newinfo)
1151 return NULL;
1152
1153 newinfo->size = size;
1154
1155 for_each_cpu(cpu) {
1156 if (size <= PAGE_SIZE)
1157 newinfo->entries[cpu] = kmalloc_node(size,
1158 GFP_KERNEL,
1159 cpu_to_node(cpu));
1160 else
1161 newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu));
1162 if (newinfo->entries[cpu] == 0) {
1163 free_table_info(newinfo);
1164 return NULL;
1165 }
1166 }
1167
1168 return newinfo;
1169}
1170
1da177e4
LT
1171static int
1172do_replace(void __user *user, unsigned int len)
1173{
1174 int ret;
1175 struct ipt_replace tmp;
1176 struct ipt_table *t;
1177 struct ipt_table_info *newinfo, *oldinfo;
1178 struct ipt_counters *counters;
31836064 1179 void *loc_cpu_entry, *loc_cpu_old_entry;
1da177e4
LT
1180
1181 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1182 return -EFAULT;
1183
1184 /* Hack: Causes ipchains to give correct error msg --RR */
1185 if (len != sizeof(tmp) + tmp.size)
1186 return -ENOPROTOOPT;
1187
1188 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1189 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1190 return -ENOMEM;
1191
31836064 1192 newinfo = alloc_table_info(tmp.size);
1da177e4
LT
1193 if (!newinfo)
1194 return -ENOMEM;
1195
31836064
ED
1196 /* choose the copy that is our node/cpu */
1197 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1198 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1da177e4
LT
1199 tmp.size) != 0) {
1200 ret = -EFAULT;
1201 goto free_newinfo;
1202 }
1203
1204 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1205 if (!counters) {
1206 ret = -ENOMEM;
1207 goto free_newinfo;
1208 }
1da177e4
LT
1209
1210 ret = translate_table(tmp.name, tmp.valid_hooks,
31836064 1211 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1da177e4
LT
1212 tmp.hook_entry, tmp.underflow);
1213 if (ret != 0)
1214 goto free_newinfo_counters;
1215
1216 duprintf("ip_tables: Translated table\n");
1217
1218 t = try_then_request_module(find_table_lock(tmp.name),
1219 "iptable_%s", tmp.name);
1220 if (!t || IS_ERR(t)) {
1221 ret = t ? PTR_ERR(t) : -ENOENT;
1222 goto free_newinfo_counters_untrans;
1223 }
1224
1225 /* You lied! */
1226 if (tmp.valid_hooks != t->valid_hooks) {
1227 duprintf("Valid hook crap: %08X vs %08X\n",
1228 tmp.valid_hooks, t->valid_hooks);
1229 ret = -EINVAL;
1230 goto put_module;
1231 }
1232
1233 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1234 if (!oldinfo)
1235 goto put_module;
1236
1237 /* Update module usage count based on number of rules */
1238 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1239 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1240 if ((oldinfo->number > oldinfo->initial_entries) ||
1241 (newinfo->number <= oldinfo->initial_entries))
1242 module_put(t->me);
1243 if ((oldinfo->number > oldinfo->initial_entries) &&
1244 (newinfo->number <= oldinfo->initial_entries))
1245 module_put(t->me);
1246
1247 /* Get the old counters. */
1248 get_counters(oldinfo, counters);
1249 /* Decrease module usage counts and free resource */
31836064
ED
1250 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1251 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1252 free_table_info(oldinfo);
1da177e4
LT
1253 if (copy_to_user(tmp.counters, counters,
1254 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1255 ret = -EFAULT;
1256 vfree(counters);
1257 up(&ipt_mutex);
1258 return ret;
1259
1260 put_module:
1261 module_put(t->me);
1262 up(&ipt_mutex);
1263 free_newinfo_counters_untrans:
31836064 1264 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1da177e4
LT
1265 free_newinfo_counters:
1266 vfree(counters);
1267 free_newinfo:
31836064 1268 free_table_info(newinfo);
1da177e4
LT
1269 return ret;
1270}
1271
1272/* We're lazy, and add to the first CPU; overflow works its fey magic
1273 * and everything is OK. */
1274static inline int
1275add_counter_to_entry(struct ipt_entry *e,
1276 const struct ipt_counters addme[],
1277 unsigned int *i)
1278{
1279#if 0
1280 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1281 *i,
1282 (long unsigned int)e->counters.pcnt,
1283 (long unsigned int)e->counters.bcnt,
1284 (long unsigned int)addme[*i].pcnt,
1285 (long unsigned int)addme[*i].bcnt);
1286#endif
1287
1288 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1289
1290 (*i)++;
1291 return 0;
1292}
1293
1294static int
1295do_add_counters(void __user *user, unsigned int len)
1296{
1297 unsigned int i;
1298 struct ipt_counters_info tmp, *paddc;
1299 struct ipt_table *t;
1300 int ret = 0;
31836064 1301 void *loc_cpu_entry;
1da177e4
LT
1302
1303 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1304 return -EFAULT;
1305
1306 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1307 return -EINVAL;
1308
31836064 1309 paddc = vmalloc_node(len, numa_node_id());
1da177e4
LT
1310 if (!paddc)
1311 return -ENOMEM;
1312
1313 if (copy_from_user(paddc, user, len) != 0) {
1314 ret = -EFAULT;
1315 goto free;
1316 }
1317
1318 t = find_table_lock(tmp.name);
1319 if (!t || IS_ERR(t)) {
1320 ret = t ? PTR_ERR(t) : -ENOENT;
1321 goto free;
1322 }
1323
1324 write_lock_bh(&t->lock);
1325 if (t->private->number != paddc->num_counters) {
1326 ret = -EINVAL;
1327 goto unlock_up_free;
1328 }
1329
1330 i = 0;
31836064
ED
1331 /* Choose the copy that is on our node */
1332 loc_cpu_entry = t->private->entries[raw_smp_processor_id()];
1333 IPT_ENTRY_ITERATE(loc_cpu_entry,
1da177e4
LT
1334 t->private->size,
1335 add_counter_to_entry,
1336 paddc->counters,
1337 &i);
1338 unlock_up_free:
1339 write_unlock_bh(&t->lock);
1340 up(&ipt_mutex);
1341 module_put(t->me);
1342 free:
1343 vfree(paddc);
1344
1345 return ret;
1346}
1347
1348static int
1349do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1350{
1351 int ret;
1352
1353 if (!capable(CAP_NET_ADMIN))
1354 return -EPERM;
1355
1356 switch (cmd) {
1357 case IPT_SO_SET_REPLACE:
1358 ret = do_replace(user, len);
1359 break;
1360
1361 case IPT_SO_SET_ADD_COUNTERS:
1362 ret = do_add_counters(user, len);
1363 break;
1364
1365 default:
1366 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1367 ret = -EINVAL;
1368 }
1369
1370 return ret;
1371}
1372
1373static int
1374do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1375{
1376 int ret;
1377
1378 if (!capable(CAP_NET_ADMIN))
1379 return -EPERM;
1380
1381 switch (cmd) {
1382 case IPT_SO_GET_INFO: {
1383 char name[IPT_TABLE_MAXNAMELEN];
1384 struct ipt_table *t;
1385
1386 if (*len != sizeof(struct ipt_getinfo)) {
1387 duprintf("length %u != %u\n", *len,
1388 sizeof(struct ipt_getinfo));
1389 ret = -EINVAL;
1390 break;
1391 }
1392
1393 if (copy_from_user(name, user, sizeof(name)) != 0) {
1394 ret = -EFAULT;
1395 break;
1396 }
1397 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1398
1399 t = try_then_request_module(find_table_lock(name),
1400 "iptable_%s", name);
1401 if (t && !IS_ERR(t)) {
1402 struct ipt_getinfo info;
1403
1404 info.valid_hooks = t->valid_hooks;
1405 memcpy(info.hook_entry, t->private->hook_entry,
1406 sizeof(info.hook_entry));
1407 memcpy(info.underflow, t->private->underflow,
1408 sizeof(info.underflow));
1409 info.num_entries = t->private->number;
1410 info.size = t->private->size;
1411 memcpy(info.name, name, sizeof(info.name));
1412
1413 if (copy_to_user(user, &info, *len) != 0)
1414 ret = -EFAULT;
1415 else
1416 ret = 0;
1417 up(&ipt_mutex);
1418 module_put(t->me);
1419 } else
1420 ret = t ? PTR_ERR(t) : -ENOENT;
1421 }
1422 break;
1423
1424 case IPT_SO_GET_ENTRIES: {
1425 struct ipt_get_entries get;
1426
1427 if (*len < sizeof(get)) {
1428 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1429 ret = -EINVAL;
1430 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1431 ret = -EFAULT;
1432 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1433 duprintf("get_entries: %u != %u\n", *len,
1434 sizeof(struct ipt_get_entries) + get.size);
1435 ret = -EINVAL;
1436 } else
1437 ret = get_entries(&get, user);
1438 break;
1439 }
1440
1441 case IPT_SO_GET_REVISION_MATCH:
1442 case IPT_SO_GET_REVISION_TARGET: {
1443 struct ipt_get_revision rev;
1444 int (*revfn)(const char *, u8, int *);
1445
1446 if (*len != sizeof(rev)) {
1447 ret = -EINVAL;
1448 break;
1449 }
1450 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1451 ret = -EFAULT;
1452 break;
1453 }
1454
1455 if (cmd == IPT_SO_GET_REVISION_TARGET)
1456 revfn = target_revfn;
1457 else
1458 revfn = match_revfn;
1459
1460 try_then_request_module(find_revision(rev.name, rev.revision,
1461 revfn, &ret),
1462 "ipt_%s", rev.name);
1463 break;
1464 }
1465
1466 default:
1467 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1468 ret = -EINVAL;
1469 }
1470
1471 return ret;
1472}
1473
1474/* Registration hooks for targets. */
1475int
1476ipt_register_target(struct ipt_target *target)
1477{
1478 int ret;
1479
1480 ret = down_interruptible(&ipt_mutex);
1481 if (ret != 0)
1482 return ret;
1483 list_add(&target->list, &ipt_target);
1484 up(&ipt_mutex);
1485 return ret;
1486}
1487
1488void
1489ipt_unregister_target(struct ipt_target *target)
1490{
1491 down(&ipt_mutex);
1492 LIST_DELETE(&ipt_target, target);
1493 up(&ipt_mutex);
1494}
1495
1496int
1497ipt_register_match(struct ipt_match *match)
1498{
1499 int ret;
1500
1501 ret = down_interruptible(&ipt_mutex);
1502 if (ret != 0)
1503 return ret;
1504
1505 list_add(&match->list, &ipt_match);
1506 up(&ipt_mutex);
1507
1508 return ret;
1509}
1510
1511void
1512ipt_unregister_match(struct ipt_match *match)
1513{
1514 down(&ipt_mutex);
1515 LIST_DELETE(&ipt_match, match);
1516 up(&ipt_mutex);
1517}
1518
1519int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1520{
1521 int ret;
1522 struct ipt_table_info *newinfo;
1523 static struct ipt_table_info bootstrap
1524 = { 0, 0, 0, { 0 }, { 0 }, { } };
31836064 1525 void *loc_cpu_entry;
1da177e4 1526
31836064 1527 newinfo = alloc_table_info(repl->size);
1da177e4
LT
1528 if (!newinfo)
1529 return -ENOMEM;
1530
31836064
ED
1531 /* choose the copy on our node/cpu
1532 * but dont care of preemption
1533 */
1534 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1535 memcpy(loc_cpu_entry, repl->entries, repl->size);
1da177e4
LT
1536
1537 ret = translate_table(table->name, table->valid_hooks,
31836064 1538 newinfo, loc_cpu_entry, repl->size,
1da177e4
LT
1539 repl->num_entries,
1540 repl->hook_entry,
1541 repl->underflow);
1542 if (ret != 0) {
31836064 1543 free_table_info(newinfo);
1da177e4
LT
1544 return ret;
1545 }
1546
1547 ret = down_interruptible(&ipt_mutex);
1548 if (ret != 0) {
31836064 1549 free_table_info(newinfo);
1da177e4
LT
1550 return ret;
1551 }
1552
1553 /* Don't autoload: we'd eat our tail... */
1554 if (list_named_find(&ipt_tables, table->name)) {
1555 ret = -EEXIST;
1556 goto free_unlock;
1557 }
1558
1559 /* Simplifies replace_table code. */
1560 table->private = &bootstrap;
1561 if (!replace_table(table, 0, newinfo, &ret))
1562 goto free_unlock;
1563
1564 duprintf("table->private->number = %u\n",
1565 table->private->number);
1566
1567 /* save number of initial entries */
1568 table->private->initial_entries = table->private->number;
1569
1570 rwlock_init(&table->lock);
1571 list_prepend(&ipt_tables, table);
1572
1573 unlock:
1574 up(&ipt_mutex);
1575 return ret;
1576
1577 free_unlock:
31836064 1578 free_table_info(newinfo);
1da177e4
LT
1579 goto unlock;
1580}
1581
1582void ipt_unregister_table(struct ipt_table *table)
1583{
31836064
ED
1584 void *loc_cpu_entry;
1585
1da177e4
LT
1586 down(&ipt_mutex);
1587 LIST_DELETE(&ipt_tables, table);
1588 up(&ipt_mutex);
1589
1590 /* Decrease module usage counts and free resources */
31836064
ED
1591 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1592 IPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1da177e4 1593 cleanup_entry, NULL);
31836064 1594 free_table_info(table->private);
1da177e4
LT
1595}
1596
1597/* Returns 1 if the port is matched by the range, 0 otherwise */
1598static inline int
1599port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1600{
1601 int ret;
1602
1603 ret = (port >= min && port <= max) ^ invert;
1604 return ret;
1605}
1606
1607static int
1608tcp_find_option(u_int8_t option,
1609 const struct sk_buff *skb,
1610 unsigned int optlen,
1611 int invert,
1612 int *hotdrop)
1613{
1614 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1615 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1616 unsigned int i;
1617
1618 duprintf("tcp_match: finding option\n");
1619
1620 if (!optlen)
1621 return invert;
1622
1623 /* If we don't have the whole header, drop packet. */
1624 op = skb_header_pointer(skb,
1625 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1626 optlen, _opt);
1627 if (op == NULL) {
1628 *hotdrop = 1;
1629 return 0;
1630 }
1631
1632 for (i = 0; i < optlen; ) {
1633 if (op[i] == option) return !invert;
1634 if (op[i] < 2) i++;
1635 else i += op[i+1]?:1;
1636 }
1637
1638 return invert;
1639}
1640
1641static int
1642tcp_match(const struct sk_buff *skb,
1643 const struct net_device *in,
1644 const struct net_device *out,
1645 const void *matchinfo,
1646 int offset,
1647 int *hotdrop)
1648{
1649 struct tcphdr _tcph, *th;
1650 const struct ipt_tcp *tcpinfo = matchinfo;
1651
1652 if (offset) {
1653 /* To quote Alan:
1654
1655 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1656 causes this. Its a cracker trying to break in by doing a
1657 flag overwrite to pass the direction checks.
1658 */
1659 if (offset == 1) {
1660 duprintf("Dropping evil TCP offset=1 frag.\n");
1661 *hotdrop = 1;
1662 }
1663 /* Must not be a fragment. */
1664 return 0;
1665 }
1666
1667#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1668
1669 th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1670 sizeof(_tcph), &_tcph);
1671 if (th == NULL) {
1672 /* We've been asked to examine this packet, and we
1673 can't. Hence, no choice but to drop. */
1674 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1675 *hotdrop = 1;
1676 return 0;
1677 }
1678
1679 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1680 ntohs(th->source),
1681 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1682 return 0;
1683 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1684 ntohs(th->dest),
1685 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1686 return 0;
1687 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1688 == tcpinfo->flg_cmp,
1689 IPT_TCP_INV_FLAGS))
1690 return 0;
1691 if (tcpinfo->option) {
1692 if (th->doff * 4 < sizeof(_tcph)) {
1693 *hotdrop = 1;
1694 return 0;
1695 }
1696 if (!tcp_find_option(tcpinfo->option, skb,
1697 th->doff*4 - sizeof(_tcph),
1698 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1699 hotdrop))
1700 return 0;
1701 }
1702 return 1;
1703}
1704
1705/* Called when user tries to insert an entry of this type. */
1706static int
1707tcp_checkentry(const char *tablename,
1708 const struct ipt_ip *ip,
1709 void *matchinfo,
1710 unsigned int matchsize,
1711 unsigned int hook_mask)
1712{
1713 const struct ipt_tcp *tcpinfo = matchinfo;
1714
1715 /* Must specify proto == TCP, and no unknown invflags */
1716 return ip->proto == IPPROTO_TCP
1717 && !(ip->invflags & IPT_INV_PROTO)
1718 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1719 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1720}
1721
1722static int
1723udp_match(const struct sk_buff *skb,
1724 const struct net_device *in,
1725 const struct net_device *out,
1726 const void *matchinfo,
1727 int offset,
1728 int *hotdrop)
1729{
1730 struct udphdr _udph, *uh;
1731 const struct ipt_udp *udpinfo = matchinfo;
1732
1733 /* Must not be a fragment. */
1734 if (offset)
1735 return 0;
1736
1737 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1738 sizeof(_udph), &_udph);
1739 if (uh == NULL) {
1740 /* We've been asked to examine this packet, and we
1741 can't. Hence, no choice but to drop. */
1742 duprintf("Dropping evil UDP tinygram.\n");
1743 *hotdrop = 1;
1744 return 0;
1745 }
1746
1747 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1748 ntohs(uh->source),
1749 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1750 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1751 ntohs(uh->dest),
1752 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1753}
1754
1755/* Called when user tries to insert an entry of this type. */
1756static int
1757udp_checkentry(const char *tablename,
1758 const struct ipt_ip *ip,
1759 void *matchinfo,
1760 unsigned int matchinfosize,
1761 unsigned int hook_mask)
1762{
1763 const struct ipt_udp *udpinfo = matchinfo;
1764
1765 /* Must specify proto == UDP, and no unknown invflags */
1766 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1767 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1768 IPPROTO_UDP);
1769 return 0;
1770 }
1771 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1772 duprintf("ipt_udp: matchsize %u != %u\n",
1773 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1774 return 0;
1775 }
1776 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1777 duprintf("ipt_udp: unknown flags %X\n",
1778 udpinfo->invflags);
1779 return 0;
1780 }
1781
1782 return 1;
1783}
1784
1785/* Returns 1 if the type and code is matched by the range, 0 otherwise */
1786static inline int
1787icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1788 u_int8_t type, u_int8_t code,
1789 int invert)
1790{
1791 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1792 ^ invert;
1793}
1794
1795static int
1796icmp_match(const struct sk_buff *skb,
1797 const struct net_device *in,
1798 const struct net_device *out,
1799 const void *matchinfo,
1800 int offset,
1801 int *hotdrop)
1802{
1803 struct icmphdr _icmph, *ic;
1804 const struct ipt_icmp *icmpinfo = matchinfo;
1805
1806 /* Must not be a fragment. */
1807 if (offset)
1808 return 0;
1809
1810 ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1811 sizeof(_icmph), &_icmph);
1812 if (ic == NULL) {
1813 /* We've been asked to examine this packet, and we
1814 * can't. Hence, no choice but to drop.
1815 */
1816 duprintf("Dropping evil ICMP tinygram.\n");
1817 *hotdrop = 1;
1818 return 0;
1819 }
1820
1821 return icmp_type_code_match(icmpinfo->type,
1822 icmpinfo->code[0],
1823 icmpinfo->code[1],
1824 ic->type, ic->code,
1825 !!(icmpinfo->invflags&IPT_ICMP_INV));
1826}
1827
1828/* Called when user tries to insert an entry of this type. */
1829static int
1830icmp_checkentry(const char *tablename,
1831 const struct ipt_ip *ip,
1832 void *matchinfo,
1833 unsigned int matchsize,
1834 unsigned int hook_mask)
1835{
1836 const struct ipt_icmp *icmpinfo = matchinfo;
1837
1838 /* Must specify proto == ICMP, and no unknown invflags */
1839 return ip->proto == IPPROTO_ICMP
1840 && !(ip->invflags & IPT_INV_PROTO)
1841 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1842 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1843}
1844
1845/* The built-in targets: standard (NULL) and error. */
1846static struct ipt_target ipt_standard_target = {
1847 .name = IPT_STANDARD_TARGET,
1848};
1849
1850static struct ipt_target ipt_error_target = {
1851 .name = IPT_ERROR_TARGET,
1852 .target = ipt_error,
1853};
1854
1855static struct nf_sockopt_ops ipt_sockopts = {
1856 .pf = PF_INET,
1857 .set_optmin = IPT_BASE_CTL,
1858 .set_optmax = IPT_SO_SET_MAX+1,
1859 .set = do_ipt_set_ctl,
1860 .get_optmin = IPT_BASE_CTL,
1861 .get_optmax = IPT_SO_GET_MAX+1,
1862 .get = do_ipt_get_ctl,
1863};
1864
1865static struct ipt_match tcp_matchstruct = {
1866 .name = "tcp",
1867 .match = &tcp_match,
1868 .checkentry = &tcp_checkentry,
1869};
1870
1871static struct ipt_match udp_matchstruct = {
1872 .name = "udp",
1873 .match = &udp_match,
1874 .checkentry = &udp_checkentry,
1875};
1876
1877static struct ipt_match icmp_matchstruct = {
1878 .name = "icmp",
1879 .match = &icmp_match,
1880 .checkentry = &icmp_checkentry,
1881};
1882
1883#ifdef CONFIG_PROC_FS
1884static inline int print_name(const char *i,
1885 off_t start_offset, char *buffer, int length,
1886 off_t *pos, unsigned int *count)
1887{
1888 if ((*count)++ >= start_offset) {
1889 unsigned int namelen;
1890
1891 namelen = sprintf(buffer + *pos, "%s\n",
1892 i + sizeof(struct list_head));
1893 if (*pos + namelen > length) {
1894 /* Stop iterating */
1895 return 1;
1896 }
1897 *pos += namelen;
1898 }
1899 return 0;
1900}
1901
1902static inline int print_target(const struct ipt_target *t,
1903 off_t start_offset, char *buffer, int length,
1904 off_t *pos, unsigned int *count)
1905{
1906 if (t == &ipt_standard_target || t == &ipt_error_target)
1907 return 0;
1908 return print_name((char *)t, start_offset, buffer, length, pos, count);
1909}
1910
1911static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1912{
1913 off_t pos = 0;
1914 unsigned int count = 0;
1915
1916 if (down_interruptible(&ipt_mutex) != 0)
1917 return 0;
1918
1919 LIST_FIND(&ipt_tables, print_name, void *,
1920 offset, buffer, length, &pos, &count);
1921
1922 up(&ipt_mutex);
1923
1924 /* `start' hack - see fs/proc/generic.c line ~105 */
1925 *start=(char *)((unsigned long)count-offset);
1926 return pos;
1927}
1928
1929static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1930{
1931 off_t pos = 0;
1932 unsigned int count = 0;
1933
1934 if (down_interruptible(&ipt_mutex) != 0)
1935 return 0;
1936
1937 LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1938 offset, buffer, length, &pos, &count);
1939
1940 up(&ipt_mutex);
1941
1942 *start = (char *)((unsigned long)count - offset);
1943 return pos;
1944}
1945
1946static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1947{
1948 off_t pos = 0;
1949 unsigned int count = 0;
1950
1951 if (down_interruptible(&ipt_mutex) != 0)
1952 return 0;
1953
1954 LIST_FIND(&ipt_match, print_name, void *,
1955 offset, buffer, length, &pos, &count);
1956
1957 up(&ipt_mutex);
1958
1959 *start = (char *)((unsigned long)count - offset);
1960 return pos;
1961}
1962
9b5b5cff 1963static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1da177e4
LT
1964{ { "ip_tables_names", ipt_get_tables },
1965 { "ip_tables_targets", ipt_get_targets },
1966 { "ip_tables_matches", ipt_get_matches },
1967 { NULL, NULL} };
1968#endif /*CONFIG_PROC_FS*/
1969
1970static int __init init(void)
1971{
1972 int ret;
1973
1974 /* Noone else will be downing sem now, so we won't sleep */
1975 down(&ipt_mutex);
1976 list_append(&ipt_target, &ipt_standard_target);
1977 list_append(&ipt_target, &ipt_error_target);
1978 list_append(&ipt_match, &tcp_matchstruct);
1979 list_append(&ipt_match, &udp_matchstruct);
1980 list_append(&ipt_match, &icmp_matchstruct);
1981 up(&ipt_mutex);
1982
1983 /* Register setsockopt */
1984 ret = nf_register_sockopt(&ipt_sockopts);
1985 if (ret < 0) {
1986 duprintf("Unable to register sockopts.\n");
1987 return ret;
1988 }
1989
1990#ifdef CONFIG_PROC_FS
1991 {
1992 struct proc_dir_entry *proc;
1993 int i;
1994
1995 for (i = 0; ipt_proc_entry[i].name; i++) {
1996 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1997 ipt_proc_entry[i].get_info);
1998 if (!proc) {
1999 while (--i >= 0)
2000 proc_net_remove(ipt_proc_entry[i].name);
2001 nf_unregister_sockopt(&ipt_sockopts);
2002 return -ENOMEM;
2003 }
2004 proc->owner = THIS_MODULE;
2005 }
2006 }
2007#endif
2008
2009 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
2010 return 0;
2011}
2012
2013static void __exit fini(void)
2014{
2015 nf_unregister_sockopt(&ipt_sockopts);
2016#ifdef CONFIG_PROC_FS
2017 {
2018 int i;
2019 for (i = 0; ipt_proc_entry[i].name; i++)
2020 proc_net_remove(ipt_proc_entry[i].name);
2021 }
2022#endif
2023}
2024
2025EXPORT_SYMBOL(ipt_register_table);
2026EXPORT_SYMBOL(ipt_unregister_table);
2027EXPORT_SYMBOL(ipt_register_match);
2028EXPORT_SYMBOL(ipt_unregister_match);
2029EXPORT_SYMBOL(ipt_do_table);
2030EXPORT_SYMBOL(ipt_register_target);
2031EXPORT_SYMBOL(ipt_unregister_target);
2032EXPORT_SYMBOL(ipt_find_target);
2033
2034module_init(init);
2035module_exit(fini);