]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/core/netfilter.c
[NETFILTER]: Move reroute-after-queue code up to the nf_queue layer.
[net-next-2.6.git] / net / core / netfilter.c
1 /* netfilter.c: look after the filters for various protocols. 
2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
3  *
4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5  * way.
6  *
7  * Rusty Russell (C)2000 -- This code is GPL.
8  *
9  * February 2000: Modified by James Morris to have 1 queue per protocol.
10  * 15-Mar-2000:   Added NF_REPEAT --RR.
11  * 08-May-2003:   Internal logging interface added by Jozsef Kadlecsik.
12  */
13 #include <linux/config.h>
14 #include <linux/kernel.h>
15 #include <linux/netfilter.h>
16 #include <net/protocol.h>
17 #include <linux/init.h>
18 #include <linux/skbuff.h>
19 #include <linux/wait.h>
20 #include <linux/module.h>
21 #include <linux/interrupt.h>
22 #include <linux/if.h>
23 #include <linux/netdevice.h>
24 #include <linux/inetdevice.h>
25 #include <net/sock.h>
26
27 /* In this code, we can be waiting indefinitely for userspace to
28  * service a packet if a hook returns NF_QUEUE.  We could keep a count
29  * of skbuffs queued for userspace, and not deregister a hook unless
30  * this is zero, but that sucks.  Now, we simply check when the
31  * packets come back: if the hook is gone, the packet is discarded. */
32 #ifdef CONFIG_NETFILTER_DEBUG
33 #define NFDEBUG(format, args...)  printk(format , ## args)
34 #else
35 #define NFDEBUG(format, args...)
36 #endif
37
38 /* Sockopts only registered and called from user context, so
39    net locking would be overkill.  Also, [gs]etsockopt calls may
40    sleep. */
41 static DECLARE_MUTEX(nf_sockopt_mutex);
42
43 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
44 static LIST_HEAD(nf_sockopts);
45 static DEFINE_SPINLOCK(nf_hook_lock);
46
47 /* 
48  * A queue handler may be registered for each protocol.  Each is protected by
49  * long term mutex.  The handler must provide an an outfn() to accept packets
50  * for queueing and must reinject all packets it receives, no matter what.
51  */
52 static struct nf_queue_handler_t {
53         nf_queue_outfn_t outfn;
54         void *data;
55 } queue_handler[NPROTO];
56
57 static struct nf_queue_rerouter *queue_rerouter;
58
59 static DEFINE_RWLOCK(queue_handler_lock);
60
61 int nf_register_hook(struct nf_hook_ops *reg)
62 {
63         struct list_head *i;
64
65         spin_lock_bh(&nf_hook_lock);
66         list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
67                 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
68                         break;
69         }
70         list_add_rcu(&reg->list, i->prev);
71         spin_unlock_bh(&nf_hook_lock);
72
73         synchronize_net();
74         return 0;
75 }
76
77 void nf_unregister_hook(struct nf_hook_ops *reg)
78 {
79         spin_lock_bh(&nf_hook_lock);
80         list_del_rcu(&reg->list);
81         spin_unlock_bh(&nf_hook_lock);
82
83         synchronize_net();
84 }
85
86 /* Do exclusive ranges overlap? */
87 static inline int overlap(int min1, int max1, int min2, int max2)
88 {
89         return max1 > min2 && min1 < max2;
90 }
91
92 /* Functions to register sockopt ranges (exclusive). */
93 int nf_register_sockopt(struct nf_sockopt_ops *reg)
94 {
95         struct list_head *i;
96         int ret = 0;
97
98         if (down_interruptible(&nf_sockopt_mutex) != 0)
99                 return -EINTR;
100
101         list_for_each(i, &nf_sockopts) {
102                 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
103                 if (ops->pf == reg->pf
104                     && (overlap(ops->set_optmin, ops->set_optmax, 
105                                 reg->set_optmin, reg->set_optmax)
106                         || overlap(ops->get_optmin, ops->get_optmax, 
107                                    reg->get_optmin, reg->get_optmax))) {
108                         NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
109                                 ops->set_optmin, ops->set_optmax, 
110                                 ops->get_optmin, ops->get_optmax, 
111                                 reg->set_optmin, reg->set_optmax,
112                                 reg->get_optmin, reg->get_optmax);
113                         ret = -EBUSY;
114                         goto out;
115                 }
116         }
117
118         list_add(&reg->list, &nf_sockopts);
119 out:
120         up(&nf_sockopt_mutex);
121         return ret;
122 }
123
124 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
125 {
126         /* No point being interruptible: we're probably in cleanup_module() */
127  restart:
128         down(&nf_sockopt_mutex);
129         if (reg->use != 0) {
130                 /* To be woken by nf_sockopt call... */
131                 /* FIXME: Stuart Young's name appears gratuitously. */
132                 set_current_state(TASK_UNINTERRUPTIBLE);
133                 reg->cleanup_task = current;
134                 up(&nf_sockopt_mutex);
135                 schedule();
136                 goto restart;
137         }
138         list_del(&reg->list);
139         up(&nf_sockopt_mutex);
140 }
141
142 /* Call get/setsockopt() */
143 static int nf_sockopt(struct sock *sk, int pf, int val, 
144                       char __user *opt, int *len, int get)
145 {
146         struct list_head *i;
147         struct nf_sockopt_ops *ops;
148         int ret;
149
150         if (down_interruptible(&nf_sockopt_mutex) != 0)
151                 return -EINTR;
152
153         list_for_each(i, &nf_sockopts) {
154                 ops = (struct nf_sockopt_ops *)i;
155                 if (ops->pf == pf) {
156                         if (get) {
157                                 if (val >= ops->get_optmin
158                                     && val < ops->get_optmax) {
159                                         ops->use++;
160                                         up(&nf_sockopt_mutex);
161                                         ret = ops->get(sk, val, opt, len);
162                                         goto out;
163                                 }
164                         } else {
165                                 if (val >= ops->set_optmin
166                                     && val < ops->set_optmax) {
167                                         ops->use++;
168                                         up(&nf_sockopt_mutex);
169                                         ret = ops->set(sk, val, opt, *len);
170                                         goto out;
171                                 }
172                         }
173                 }
174         }
175         up(&nf_sockopt_mutex);
176         return -ENOPROTOOPT;
177         
178  out:
179         down(&nf_sockopt_mutex);
180         ops->use--;
181         if (ops->cleanup_task)
182                 wake_up_process(ops->cleanup_task);
183         up(&nf_sockopt_mutex);
184         return ret;
185 }
186
187 int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
188                   int len)
189 {
190         return nf_sockopt(sk, pf, val, opt, &len, 0);
191 }
192
193 int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
194 {
195         return nf_sockopt(sk, pf, val, opt, len, 1);
196 }
197
198 static unsigned int nf_iterate(struct list_head *head,
199                                struct sk_buff **skb,
200                                int hook,
201                                const struct net_device *indev,
202                                const struct net_device *outdev,
203                                struct list_head **i,
204                                int (*okfn)(struct sk_buff *),
205                                int hook_thresh)
206 {
207         unsigned int verdict;
208
209         /*
210          * The caller must not block between calls to this
211          * function because of risk of continuing from deleted element.
212          */
213         list_for_each_continue_rcu(*i, head) {
214                 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
215
216                 if (hook_thresh > elem->priority)
217                         continue;
218
219                 /* Optimization: we don't need to hold module
220                    reference here, since function can't sleep. --RR */
221                 verdict = elem->hook(hook, skb, indev, outdev, okfn);
222                 if (verdict != NF_ACCEPT) {
223 #ifdef CONFIG_NETFILTER_DEBUG
224                         if (unlikely(verdict > NF_MAX_VERDICT)) {
225                                 NFDEBUG("Evil return from %p(%u).\n",
226                                         elem->hook, hook);
227                                 continue;
228                         }
229 #endif
230                         if (verdict != NF_REPEAT)
231                                 return verdict;
232                         *i = (*i)->prev;
233                 }
234         }
235         return NF_ACCEPT;
236 }
237
238 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
239 {      
240         int ret;
241
242         write_lock_bh(&queue_handler_lock);
243         if (queue_handler[pf].outfn)
244                 ret = -EBUSY;
245         else {
246                 queue_handler[pf].outfn = outfn;
247                 queue_handler[pf].data = data;
248                 ret = 0;
249         }
250         write_unlock_bh(&queue_handler_lock);
251
252         return ret;
253 }
254
255 /* The caller must flush their queue before this */
256 int nf_unregister_queue_handler(int pf)
257 {
258         write_lock_bh(&queue_handler_lock);
259         queue_handler[pf].outfn = NULL;
260         queue_handler[pf].data = NULL;
261         write_unlock_bh(&queue_handler_lock);
262         
263         return 0;
264 }
265
266 int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer)
267 {
268         if (pf >= NPROTO)
269                 return -EINVAL;
270
271         write_lock_bh(&queue_handler_lock);
272         memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf]));
273         write_unlock_bh(&queue_handler_lock);
274
275         return 0;
276 }
277
278 int nf_unregister_queue_rerouter(int pf)
279 {
280         if (pf >= NPROTO)
281                 return -EINVAL;
282
283         write_lock_bh(&queue_handler_lock);
284         memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf]));
285         write_unlock_bh(&queue_handler_lock);
286         return 0;
287 }
288
289 /* 
290  * Any packet that leaves via this function must come back 
291  * through nf_reinject().
292  */
293 static int nf_queue(struct sk_buff **skb, 
294                     struct list_head *elem, 
295                     int pf, unsigned int hook,
296                     struct net_device *indev,
297                     struct net_device *outdev,
298                     int (*okfn)(struct sk_buff *))
299 {
300         int status;
301         struct nf_info *info;
302 #ifdef CONFIG_BRIDGE_NETFILTER
303         struct net_device *physindev = NULL;
304         struct net_device *physoutdev = NULL;
305 #endif
306
307         /* QUEUE == DROP if noone is waiting, to be safe. */
308         read_lock(&queue_handler_lock);
309         if (!queue_handler[pf].outfn) {
310                 read_unlock(&queue_handler_lock);
311                 kfree_skb(*skb);
312                 return 1;
313         }
314
315         info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC);
316         if (!info) {
317                 if (net_ratelimit())
318                         printk(KERN_ERR "OOM queueing packet %p\n",
319                                *skb);
320                 read_unlock(&queue_handler_lock);
321                 kfree_skb(*skb);
322                 return 1;
323         }
324
325         *info = (struct nf_info) { 
326                 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
327
328         /* If it's going away, ignore hook. */
329         if (!try_module_get(info->elem->owner)) {
330                 read_unlock(&queue_handler_lock);
331                 kfree(info);
332                 return 0;
333         }
334
335         /* Bump dev refs so they don't vanish while packet is out */
336         if (indev) dev_hold(indev);
337         if (outdev) dev_hold(outdev);
338
339 #ifdef CONFIG_BRIDGE_NETFILTER
340         if ((*skb)->nf_bridge) {
341                 physindev = (*skb)->nf_bridge->physindev;
342                 if (physindev) dev_hold(physindev);
343                 physoutdev = (*skb)->nf_bridge->physoutdev;
344                 if (physoutdev) dev_hold(physoutdev);
345         }
346 #endif
347         if (queue_rerouter[pf].save)
348                 queue_rerouter[pf].save(*skb, info);
349
350         status = queue_handler[pf].outfn(*skb, info, queue_handler[pf].data);
351
352         if (status >= 0 && queue_rerouter[pf].reroute)
353                 status = queue_rerouter[pf].reroute(skb, info);
354
355         read_unlock(&queue_handler_lock);
356
357         if (status < 0) {
358                 /* James M doesn't say fuck enough. */
359                 if (indev) dev_put(indev);
360                 if (outdev) dev_put(outdev);
361 #ifdef CONFIG_BRIDGE_NETFILTER
362                 if (physindev) dev_put(physindev);
363                 if (physoutdev) dev_put(physoutdev);
364 #endif
365                 module_put(info->elem->owner);
366                 kfree(info);
367                 kfree_skb(*skb);
368
369                 return 1;
370         }
371
372         return 1;
373 }
374
375 /* Returns 1 if okfn() needs to be executed by the caller,
376  * -EPERM for NF_DROP, 0 otherwise. */
377 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
378                  struct net_device *indev,
379                  struct net_device *outdev,
380                  int (*okfn)(struct sk_buff *),
381                  int hook_thresh)
382 {
383         struct list_head *elem;
384         unsigned int verdict;
385         int ret = 0;
386
387         /* We may already have this, but read-locks nest anyway */
388         rcu_read_lock();
389
390         elem = &nf_hooks[pf][hook];
391 next_hook:
392         verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
393                              outdev, &elem, okfn, hook_thresh);
394         if (verdict == NF_ACCEPT || verdict == NF_STOP) {
395                 ret = 1;
396                 goto unlock;
397         } else if (verdict == NF_DROP) {
398                 kfree_skb(*pskb);
399                 ret = -EPERM;
400         } else if (verdict == NF_QUEUE) {
401                 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
402                 if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn))
403                         goto next_hook;
404         }
405 unlock:
406         rcu_read_unlock();
407         return ret;
408 }
409
410 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
411                  unsigned int verdict)
412 {
413         struct list_head *elem = &info->elem->list;
414         struct list_head *i;
415
416         rcu_read_lock();
417
418         /* Release those devices we held, or Alexey will kill me. */
419         if (info->indev) dev_put(info->indev);
420         if (info->outdev) dev_put(info->outdev);
421 #ifdef CONFIG_BRIDGE_NETFILTER
422         if (skb->nf_bridge) {
423                 if (skb->nf_bridge->physindev)
424                         dev_put(skb->nf_bridge->physindev);
425                 if (skb->nf_bridge->physoutdev)
426                         dev_put(skb->nf_bridge->physoutdev);
427         }
428 #endif
429
430         /* Drop reference to owner of hook which queued us. */
431         module_put(info->elem->owner);
432
433         list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
434                 if (i == elem) 
435                         break;
436         }
437   
438         if (elem == &nf_hooks[info->pf][info->hook]) {
439                 /* The module which sent it to userspace is gone. */
440                 NFDEBUG("%s: module disappeared, dropping packet.\n",
441                         __FUNCTION__);
442                 verdict = NF_DROP;
443         }
444
445         /* Continue traversal iff userspace said ok... */
446         if (verdict == NF_REPEAT) {
447                 elem = elem->prev;
448                 verdict = NF_ACCEPT;
449         }
450
451         if (verdict == NF_ACCEPT) {
452         next_hook:
453                 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
454                                      &skb, info->hook, 
455                                      info->indev, info->outdev, &elem,
456                                      info->okfn, INT_MIN);
457         }
458
459         switch (verdict) {
460         case NF_ACCEPT:
461                 info->okfn(skb);
462                 break;
463
464         case NF_QUEUE:
465                 if (!nf_queue(&skb, elem, info->pf, info->hook, 
466                               info->indev, info->outdev, info->okfn))
467                         goto next_hook;
468                 break;
469         }
470         rcu_read_unlock();
471
472         if (verdict == NF_DROP)
473                 kfree_skb(skb);
474
475         kfree(info);
476         return;
477 }
478
479 int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
480 {
481         struct sk_buff *nskb;
482
483         if (writable_len > (*pskb)->len)
484                 return 0;
485
486         /* Not exclusive use of packet?  Must copy. */
487         if (skb_shared(*pskb) || skb_cloned(*pskb))
488                 goto copy_skb;
489
490         return pskb_may_pull(*pskb, writable_len);
491
492 copy_skb:
493         nskb = skb_copy(*pskb, GFP_ATOMIC);
494         if (!nskb)
495                 return 0;
496         BUG_ON(skb_is_nonlinear(nskb));
497
498         /* Rest of kernel will get very unhappy if we pass it a
499            suddenly-orphaned skbuff */
500         if ((*pskb)->sk)
501                 skb_set_owner_w(nskb, (*pskb)->sk);
502         kfree_skb(*pskb);
503         *pskb = nskb;
504         return 1;
505 }
506 EXPORT_SYMBOL(skb_make_writable);
507
508 /* Internal logging interface, which relies on the real 
509    LOG target modules */
510
511 #define NF_LOG_PREFIXLEN                128
512
513 static nf_logfn *nf_logging[NPROTO]; /* = NULL */
514 static int reported = 0;
515 static DEFINE_SPINLOCK(nf_log_lock);
516
517 int nf_log_register(int pf, nf_logfn *logfn)
518 {
519         int ret = -EBUSY;
520
521         /* Any setup of logging members must be done before
522          * substituting pointer. */
523         spin_lock(&nf_log_lock);
524         if (!nf_logging[pf]) {
525                 rcu_assign_pointer(nf_logging[pf], logfn);
526                 ret = 0;
527         }
528         spin_unlock(&nf_log_lock);
529         return ret;
530 }               
531
532 void nf_log_unregister(int pf, nf_logfn *logfn)
533 {
534         spin_lock(&nf_log_lock);
535         if (nf_logging[pf] == logfn)
536                 nf_logging[pf] = NULL;
537         spin_unlock(&nf_log_lock);
538
539         /* Give time to concurrent readers. */
540         synchronize_net();
541 }               
542
543 void nf_log_packet(int pf,
544                    unsigned int hooknum,
545                    const struct sk_buff *skb,
546                    const struct net_device *in,
547                    const struct net_device *out,
548                    const char *fmt, ...)
549 {
550         va_list args;
551         char prefix[NF_LOG_PREFIXLEN];
552         nf_logfn *logfn;
553         
554         rcu_read_lock();
555         logfn = rcu_dereference(nf_logging[pf]);
556         if (logfn) {
557                 va_start(args, fmt);
558                 vsnprintf(prefix, sizeof(prefix), fmt, args);
559                 va_end(args);
560                 /* We must read logging before nf_logfn[pf] */
561                 logfn(hooknum, skb, in, out, prefix);
562         } else if (!reported) {
563                 printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
564                        "no backend logging module loaded in!\n");
565                 reported++;
566         }
567         rcu_read_unlock();
568 }
569 EXPORT_SYMBOL(nf_log_register);
570 EXPORT_SYMBOL(nf_log_unregister);
571 EXPORT_SYMBOL(nf_log_packet);
572
573 /* This does not belong here, but locally generated errors need it if connection
574    tracking in use: without this, connection may not be in hash table, and hence
575    manufactured ICMP or RST packets will not be associated with it. */
576 void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
577
578 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
579 {
580         void (*attach)(struct sk_buff *, struct sk_buff *);
581
582         if (skb->nfct && (attach = ip_ct_attach) != NULL) {
583                 mb(); /* Just to be sure: must be read before executing this */
584                 attach(new, skb);
585         }
586 }
587
588 void __init netfilter_init(void)
589 {
590         int i, h;
591
592         queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter),
593                                  GFP_KERNEL);
594         if (!queue_rerouter)
595                 panic("netfilter: cannot allocate queue rerouter array\n");
596         memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter));
597
598         for (i = 0; i < NPROTO; i++) {
599                 for (h = 0; h < NF_MAX_HOOKS; h++)
600                         INIT_LIST_HEAD(&nf_hooks[i][h]);
601         }
602 }
603
604 EXPORT_SYMBOL(ip_ct_attach);
605 EXPORT_SYMBOL(nf_ct_attach);
606 EXPORT_SYMBOL(nf_getsockopt);
607 EXPORT_SYMBOL(nf_hook_slow);
608 EXPORT_SYMBOL(nf_hooks);
609 EXPORT_SYMBOL(nf_register_hook);
610 EXPORT_SYMBOL(nf_register_queue_handler);
611 EXPORT_SYMBOL(nf_register_sockopt);
612 EXPORT_SYMBOL(nf_reinject);
613 EXPORT_SYMBOL(nf_setsockopt);
614 EXPORT_SYMBOL(nf_unregister_hook);
615 EXPORT_SYMBOL(nf_unregister_queue_handler);
616 EXPORT_SYMBOL_GPL(nf_register_queue_rerouter);
617 EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter);
618 EXPORT_SYMBOL(nf_unregister_sockopt);