net/core/netfilter.c

   1 /* netfilter.c: look after the filters for various protocols.
   2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
   3  *
   4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
   5  * way.
   6  *
   7  * Rusty Russell (C)2000 -- This code is GPL.
   8  *
   9  * February 2000: Modified by James Morris to have 1 queue per protocol.
  10  * 15-Mar-2000:   Added NF_REPEAT --RR.
  11  * 08-May-2003:   Internal logging interface added by Jozsef Kadlecsik.
  12  */
  13 #include <linux/config.h>
  14 #include <linux/kernel.h>
  15 #include <linux/netfilter.h>
  16 #include <net/protocol.h>
  17 #include <linux/init.h>
  18 #include <linux/skbuff.h>
  19 #include <linux/wait.h>
  20 #include <linux/module.h>
  21 #include <linux/interrupt.h>
  22 #include <linux/if.h>
  23 #include <linux/netdevice.h>
  24 #include <linux/inetdevice.h>
  25 #include <net/sock.h>
  26
  27 /* In this code, we can be waiting indefinitely for userspace to
  28  * service a packet if a hook returns NF_QUEUE.  We could keep a count
  29  * of skbuffs queued for userspace, and not deregister a hook unless
  30  * this is zero, but that sucks.  Now, we simply check when the
  31  * packets come back: if the hook is gone, the packet is discarded. */
  32 #ifdef CONFIG_NETFILTER_DEBUG
  33 #define NFDEBUG(format, args...)  printk(format , ## args)
  34 #else
  35 #define NFDEBUG(format, args...)
  36 #endif
  37
  38 /* Sockopts only registered and called from user context, so
  39    net locking would be overkill.  Also, [gs]etsockopt calls may
  40    sleep. */
  41 static DECLARE_MUTEX(nf_sockopt_mutex);
  42
  43 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
  44 static LIST_HEAD(nf_sockopts);
  45 static DEFINE_SPINLOCK(nf_hook_lock);
  46
  47 /*
  48  * A queue handler may be registered for each protocol.  Each is protected by
  49  * long term mutex.  The handler must provide an an outfn() to accept packets
  50  * for queueing and must reinject all packets it receives, no matter what.
  51  */
  52 static struct nf_queue_handler_t {
  53         nf_queue_outfn_t outfn;
  54         void *data;
  55 } queue_handler[NPROTO];
  56
  57 static struct nf_queue_rerouter *queue_rerouter;
  58
  59 static DEFINE_RWLOCK(queue_handler_lock);
  60
  61 int nf_register_hook(struct nf_hook_ops *reg)
  62 {
  63         struct list_head *i;
  64
  65         spin_lock_bh(&nf_hook_lock);
  66         list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
  67                 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
  68                         break;
  69         }
  70         list_add_rcu(&reg->list, i->prev);
  71         spin_unlock_bh(&nf_hook_lock);
  72
  73         synchronize_net();
  74         return 0;
  75 }
  76
  77 void nf_unregister_hook(struct nf_hook_ops *reg)
  78 {
  79         spin_lock_bh(&nf_hook_lock);
  80         list_del_rcu(&reg->list);
  81         spin_unlock_bh(&nf_hook_lock);
  82
  83         synchronize_net();
  84 }
  85
  86 /* Do exclusive ranges overlap? */
  87 static inline int overlap(int min1, int max1, int min2, int max2)
  88 {
  89         return max1 > min2 && min1 < max2;
  90 }
  91
  92 /* Functions to register sockopt ranges (exclusive). */
  93 int nf_register_sockopt(struct nf_sockopt_ops *reg)
  94 {
  95         struct list_head *i;
  96         int ret = 0;
  97
  98         if (down_interruptible(&nf_sockopt_mutex) != 0)
  99                 return -EINTR;
 100
 101         list_for_each(i, &nf_sockopts) {
 102                 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
 103                 if (ops->pf == reg->pf
 104                     && (overlap(ops->set_optmin, ops->set_optmax,
 105                                 reg->set_optmin, reg->set_optmax)
 106                         || overlap(ops->get_optmin, ops->get_optmax,
 107                                    reg->get_optmin, reg->get_optmax))) {
 108                         NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
 109                                 ops->set_optmin, ops->set_optmax,
 110                                 ops->get_optmin, ops->get_optmax,
 111                                 reg->set_optmin, reg->set_optmax,
 112                                 reg->get_optmin, reg->get_optmax);
 113                         ret = -EBUSY;
 114                         goto out;
 115                 }
 116         }
 117
 118         list_add(&reg->list, &nf_sockopts);
 119 out:
 120         up(&nf_sockopt_mutex);
 121         return ret;
 122 }
 123
 124 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
 125 {
 126         /* No point being interruptible: we're probably in cleanup_module() */
 127  restart:
 128         down(&nf_sockopt_mutex);
 129         if (reg->use != 0) {
 130                 /* To be woken by nf_sockopt call... */
 131                 /* FIXME: Stuart Young's name appears gratuitously. */
 132                 set_current_state(TASK_UNINTERRUPTIBLE);
 133                 reg->cleanup_task = current;
 134                 up(&nf_sockopt_mutex);
 135                 schedule();
 136                 goto restart;
 137         }
 138         list_del(&reg->list);
 139         up(&nf_sockopt_mutex);
 140 }
 141
 142 /* Call get/setsockopt() */
 143 static int nf_sockopt(struct sock *sk, int pf, int val,
 144                       char __user *opt, int *len, int get)
 145 {
 146         struct list_head *i;
 147         struct nf_sockopt_ops *ops;
 148         int ret;
 149
 150         if (down_interruptible(&nf_sockopt_mutex) != 0)
 151                 return -EINTR;
 152
 153         list_for_each(i, &nf_sockopts) {
 154                 ops = (struct nf_sockopt_ops *)i;
 155                 if (ops->pf == pf) {
 156                         if (get) {
 157                                 if (val >= ops->get_optmin
 158                                     && val < ops->get_optmax) {
 159                                         ops->use++;
 160                                         up(&nf_sockopt_mutex);
 161                                         ret = ops->get(sk, val, opt, len);
 162                                         goto out;
 163                                 }
 164                         } else {
 165                                 if (val >= ops->set_optmin
 166                                     && val < ops->set_optmax) {
 167                                         ops->use++;
 168                                         up(&nf_sockopt_mutex);
 169                                         ret = ops->set(sk, val, opt, *len);
 170                                         goto out;
 171                                 }
 172                         }
 173                 }
 174         }
 175         up(&nf_sockopt_mutex);
 176         return -ENOPROTOOPT;
 177
 178  out:
 179         down(&nf_sockopt_mutex);
 180         ops->use--;
 181         if (ops->cleanup_task)
 182                 wake_up_process(ops->cleanup_task);
 183         up(&nf_sockopt_mutex);
 184         return ret;
 185 }
 186
 187 int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
 188                   int len)
 189 {
 190         return nf_sockopt(sk, pf, val, opt, &len, 0);
 191 }
 192
 193 int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
 194 {
 195         return nf_sockopt(sk, pf, val, opt, len, 1);
 196 }
 197
 198 static unsigned int nf_iterate(struct list_head *head,
 199                                struct sk_buff **skb,
 200                                int hook,
 201                                const struct net_device *indev,
 202                                const struct net_device *outdev,
 203                                struct list_head **i,
 204                                int (*okfn)(struct sk_buff *),
 205                                int hook_thresh)
 206 {
 207         unsigned int verdict;
 208
 209         /*
 210          * The caller must not block between calls to this
 211          * function because of risk of continuing from deleted element.
 212          */
 213         list_for_each_continue_rcu(*i, head) {
 214                 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
 215
 216                 if (hook_thresh > elem->priority)
 217                         continue;
 218
 219                 /* Optimization: we don't need to hold module
 220                    reference here, since function can't sleep. --RR */
 221                 verdict = elem->hook(hook, skb, indev, outdev, okfn);
 222                 if (verdict != NF_ACCEPT) {
 223 #ifdef CONFIG_NETFILTER_DEBUG
 224                         if (unlikely(verdict > NF_MAX_VERDICT)) {
 225                                 NFDEBUG("Evil return from %p(%u).\n",
 226                                         elem->hook, hook);
 227                                 continue;
 228                         }
 229 #endif
 230                         if (verdict != NF_REPEAT)
 231                                 return verdict;
 232                         *i = (*i)->prev;
 233                 }
 234         }
 235         return NF_ACCEPT;
 236 }
 237
 238 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
 239 {
 240         int ret;
 241
 242         write_lock_bh(&queue_handler_lock);
 243         if (queue_handler[pf].outfn)
 244                 ret = -EBUSY;
 245         else {
 246                 queue_handler[pf].outfn = outfn;
 247                 queue_handler[pf].data = data;
 248                 ret = 0;
 249         }
 250         write_unlock_bh(&queue_handler_lock);
 251
 252         return ret;
 253 }
 254
 255 /* The caller must flush their queue before this */
 256 int nf_unregister_queue_handler(int pf)
 257 {
 258         write_lock_bh(&queue_handler_lock);
 259         queue_handler[pf].outfn = NULL;
 260         queue_handler[pf].data = NULL;
 261         write_unlock_bh(&queue_handler_lock);
 262
 263         return 0;
 264 }
 265
 266 int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer)
 267 {
 268         if (pf >= NPROTO)
 269                 return -EINVAL;
 270
 271         write_lock_bh(&queue_handler_lock);
 272         memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf]));
 273         write_unlock_bh(&queue_handler_lock);
 274
 275         return 0;
 276 }
 277
 278 int nf_unregister_queue_rerouter(int pf)
 279 {
 280         if (pf >= NPROTO)
 281                 return -EINVAL;
 282
 283         write_lock_bh(&queue_handler_lock);
 284         memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf]));
 285         write_unlock_bh(&queue_handler_lock);
 286         return 0;
 287 }
 288
 289 /*
 290  * Any packet that leaves via this function must come back
 291  * through nf_reinject().
 292  */
 293 static int nf_queue(struct sk_buff **skb,
 294                     struct list_head *elem,
 295                     int pf, unsigned int hook,
 296                     struct net_device *indev,
 297                     struct net_device *outdev,
 298                     int (*okfn)(struct sk_buff *))
 299 {
 300         int status;
 301         struct nf_info *info;
 302 #ifdef CONFIG_BRIDGE_NETFILTER
 303         struct net_device *physindev = NULL;
 304         struct net_device *physoutdev = NULL;
 305 #endif
 306
 307         /* QUEUE == DROP if noone is waiting, to be safe. */
 308         read_lock(&queue_handler_lock);
 309         if (!queue_handler[pf].outfn) {
 310                 read_unlock(&queue_handler_lock);
 311                 kfree_skb(*skb);
 312                 return 1;
 313         }
 314
 315         info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC);
 316         if (!info) {
 317                 if (net_ratelimit())
 318                         printk(KERN_ERR "OOM queueing packet %p\n",
 319                                *skb);
 320                 read_unlock(&queue_handler_lock);
 321                 kfree_skb(*skb);
 322                 return 1;
 323         }
 324
 325         *info = (struct nf_info) {
 326                 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
 327
 328         /* If it's going away, ignore hook. */
 329         if (!try_module_get(info->elem->owner)) {
 330                 read_unlock(&queue_handler_lock);
 331                 kfree(info);
 332                 return 0;
 333         }
 334
 335         /* Bump dev refs so they don't vanish while packet is out */
 336         if (indev) dev_hold(indev);
 337         if (outdev) dev_hold(outdev);
 338
 339 #ifdef CONFIG_BRIDGE_NETFILTER
 340         if ((*skb)->nf_bridge) {
 341                 physindev = (*skb)->nf_bridge->physindev;
 342                 if (physindev) dev_hold(physindev);
 343                 physoutdev = (*skb)->nf_bridge->physoutdev;
 344                 if (physoutdev) dev_hold(physoutdev);
 345         }
 346 #endif
 347         if (queue_rerouter[pf].save)
 348                 queue_rerouter[pf].save(*skb, info);
 349
 350         status = queue_handler[pf].outfn(*skb, info, queue_handler[pf].data);
 351
 352         if (status >= 0 && queue_rerouter[pf].reroute)
 353                 status = queue_rerouter[pf].reroute(skb, info);
 354
 355         read_unlock(&queue_handler_lock);
 356
 357         if (status < 0) {
 358                 /* James M doesn't say fuck enough. */
 359                 if (indev) dev_put(indev);
 360                 if (outdev) dev_put(outdev);
 361 #ifdef CONFIG_BRIDGE_NETFILTER
 362                 if (physindev) dev_put(physindev);
 363                 if (physoutdev) dev_put(physoutdev);
 364 #endif
 365                 module_put(info->elem->owner);
 366                 kfree(info);
 367                 kfree_skb(*skb);
 368
 369                 return 1;
 370         }
 371
 372         return 1;
 373 }
 374
 375 /* Returns 1 if okfn() needs to be executed by the caller,
 376  * -EPERM for NF_DROP, 0 otherwise. */
 377 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
 378                  struct net_device *indev,
 379                  struct net_device *outdev,
 380                  int (*okfn)(struct sk_buff *),
 381                  int hook_thresh)
 382 {
 383         struct list_head *elem;
 384         unsigned int verdict;
 385         int ret = 0;
 386
 387         /* We may already have this, but read-locks nest anyway */
 388         rcu_read_lock();
 389
 390         elem = &nf_hooks[pf][hook];
 391 next_hook:
 392         verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
 393                              outdev, &elem, okfn, hook_thresh);
 394         if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 395                 ret = 1;
 396                 goto unlock;
 397         } else if (verdict == NF_DROP) {
 398                 kfree_skb(*pskb);
 399                 ret = -EPERM;
 400         } else if (verdict == NF_QUEUE) {
 401                 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
 402                 if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn))
 403                         goto next_hook;
 404         }
 405 unlock:
 406         rcu_read_unlock();
 407         return ret;
 408 }
 409
 410 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 411                  unsigned int verdict)
 412 {
 413         struct list_head *elem = &info->elem->list;
 414         struct list_head *i;
 415
 416         rcu_read_lock();
 417
 418         /* Release those devices we held, or Alexey will kill me. */
 419         if (info->indev) dev_put(info->indev);
 420         if (info->outdev) dev_put(info->outdev);
 421 #ifdef CONFIG_BRIDGE_NETFILTER
 422         if (skb->nf_bridge) {
 423                 if (skb->nf_bridge->physindev)
 424                         dev_put(skb->nf_bridge->physindev);
 425                 if (skb->nf_bridge->physoutdev)
 426                         dev_put(skb->nf_bridge->physoutdev);
 427         }
 428 #endif
 429
 430         /* Drop reference to owner of hook which queued us. */
 431         module_put(info->elem->owner);
 432
 433         list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
 434                 if (i == elem)
 435                         break;
 436         }
 437
 438         if (elem == &nf_hooks[info->pf][info->hook]) {
 439                 /* The module which sent it to userspace is gone. */
 440                 NFDEBUG("%s: module disappeared, dropping packet.\n",
 441                         __FUNCTION__);
 442                 verdict = NF_DROP;
 443         }
 444
 445         /* Continue traversal iff userspace said ok... */
 446         if (verdict == NF_REPEAT) {
 447                 elem = elem->prev;
 448                 verdict = NF_ACCEPT;
 449         }
 450
 451         if (verdict == NF_ACCEPT) {
 452         next_hook:
 453                 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
 454                                      &skb, info->hook,
 455                                      info->indev, info->outdev, &elem,
 456                                      info->okfn, INT_MIN);
 457         }
 458
 459         switch (verdict) {
 460         case NF_ACCEPT:
 461                 info->okfn(skb);
 462                 break;
 463
 464         case NF_QUEUE:
 465                 if (!nf_queue(&skb, elem, info->pf, info->hook,
 466                               info->indev, info->outdev, info->okfn))
 467                         goto next_hook;
 468                 break;
 469         }
 470         rcu_read_unlock();
 471
 472         if (verdict == NF_DROP)
 473                 kfree_skb(skb);
 474
 475         kfree(info);
 476         return;
 477 }
 478
 479 int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
 480 {
 481         struct sk_buff *nskb;
 482
 483         if (writable_len > (*pskb)->len)
 484                 return 0;
 485
 486         /* Not exclusive use of packet?  Must copy. */
 487         if (skb_shared(*pskb) || skb_cloned(*pskb))
 488                 goto copy_skb;
 489
 490         return pskb_may_pull(*pskb, writable_len);
 491
 492 copy_skb:
 493         nskb = skb_copy(*pskb, GFP_ATOMIC);
 494         if (!nskb)
 495                 return 0;
 496         BUG_ON(skb_is_nonlinear(nskb));
 497
 498         /* Rest of kernel will get very unhappy if we pass it a
 499            suddenly-orphaned skbuff */
 500         if ((*pskb)->sk)
 501                 skb_set_owner_w(nskb, (*pskb)->sk);
 502         kfree_skb(*pskb);
 503         *pskb = nskb;
 504         return 1;
 505 }
 506 EXPORT_SYMBOL(skb_make_writable);
 507
 508 /* Internal logging interface, which relies on the real
 509    LOG target modules */
 510
 511 #define NF_LOG_PREFIXLEN                128
 512
 513 static nf_logfn *nf_logging[NPROTO]; /* = NULL */
 514 static int reported = 0;
 515 static DEFINE_SPINLOCK(nf_log_lock);
 516
 517 int nf_log_register(int pf, nf_logfn *logfn)
 518 {
 519         int ret = -EBUSY;
 520
 521         /* Any setup of logging members must be done before
 522          * substituting pointer. */
 523         spin_lock(&nf_log_lock);
 524         if (!nf_logging[pf]) {
 525                 rcu_assign_pointer(nf_logging[pf], logfn);
 526                 ret = 0;
 527         }
 528         spin_unlock(&nf_log_lock);
 529         return ret;
 530 }
 531
 532 void nf_log_unregister(int pf, nf_logfn *logfn)
 533 {
 534         spin_lock(&nf_log_lock);
 535         if (nf_logging[pf] == logfn)
 536                 nf_logging[pf] = NULL;
 537         spin_unlock(&nf_log_lock);
 538
 539         /* Give time to concurrent readers. */
 540         synchronize_net();
 541 }
 542
 543 void nf_log_packet(int pf,
 544                    unsigned int hooknum,
 545                    const struct sk_buff *skb,
 546                    const struct net_device *in,
 547                    const struct net_device *out,
 548                    const char *fmt, ...)
 549 {
 550         va_list args;
 551         char prefix[NF_LOG_PREFIXLEN];
 552         nf_logfn *logfn;
 553
 554         rcu_read_lock();
 555         logfn = rcu_dereference(nf_logging[pf]);
 556         if (logfn) {
 557                 va_start(args, fmt);
 558                 vsnprintf(prefix, sizeof(prefix), fmt, args);
 559                 va_end(args);
 560                 /* We must read logging before nf_logfn[pf] */
 561                 logfn(hooknum, skb, in, out, prefix);
 562         } else if (!reported) {
 563                 printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
 564                        "no backend logging module loaded in!\n");
 565                 reported++;
 566         }
 567         rcu_read_unlock();
 568 }
 569 EXPORT_SYMBOL(nf_log_register);
 570 EXPORT_SYMBOL(nf_log_unregister);
 571 EXPORT_SYMBOL(nf_log_packet);
 572
 573 /* This does not belong here, but locally generated errors need it if connection
 574    tracking in use: without this, connection may not be in hash table, and hence
 575    manufactured ICMP or RST packets will not be associated with it. */
 576 void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
 577
 578 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
 579 {
 580         void (*attach)(struct sk_buff *, struct sk_buff *);
 581
 582         if (skb->nfct && (attach = ip_ct_attach) != NULL) {
 583                 mb(); /* Just to be sure: must be read before executing this */
 584                 attach(new, skb);
 585         }
 586 }
 587
 588 void __init netfilter_init(void)
 589 {
 590         int i, h;
 591
 592         queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter),
 593                                  GFP_KERNEL);
 594         if (!queue_rerouter)
 595                 panic("netfilter: cannot allocate queue rerouter array\n");
 596         memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter));
 597
 598         for (i = 0; i < NPROTO; i++) {
 599                 for (h = 0; h < NF_MAX_HOOKS; h++)
 600                         INIT_LIST_HEAD(&nf_hooks[i][h]);
 601         }
 602 }
 603
 604 EXPORT_SYMBOL(ip_ct_attach);
 605 EXPORT_SYMBOL(nf_ct_attach);
 606 EXPORT_SYMBOL(nf_getsockopt);
 607 EXPORT_SYMBOL(nf_hook_slow);
 608 EXPORT_SYMBOL(nf_hooks);
 609 EXPORT_SYMBOL(nf_register_hook);
 610 EXPORT_SYMBOL(nf_register_queue_handler);
 611 EXPORT_SYMBOL(nf_register_sockopt);
 612 EXPORT_SYMBOL(nf_reinject);
 613 EXPORT_SYMBOL(nf_setsockopt);
 614 EXPORT_SYMBOL(nf_unregister_hook);
 615 EXPORT_SYMBOL(nf_unregister_queue_handler);
 616 EXPORT_SYMBOL_GPL(nf_register_queue_rerouter);
 617 EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter);
 618 EXPORT_SYMBOL(nf_unregister_sockopt);