]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/core/filter.c
iwlwifi: don't include iwl-dev.h from iwl-devtrace.h
[net-next-2.6.git] / net / core / filter.c
CommitLineData
1da177e4
LT
1/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
4 * Author:
5 * Jay Schulist <jschlst@samba.org>
6 *
7 * Based on the design of:
8 * - The Berkeley Packet Filter
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Andi Kleen - Fix a few bad bugs and races.
93699863 16 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
1da177e4
LT
17 */
18
19#include <linux/module.h>
20#include <linux/types.h>
1da177e4
LT
21#include <linux/mm.h>
22#include <linux/fcntl.h>
23#include <linux/socket.h>
24#include <linux/in.h>
25#include <linux/inet.h>
26#include <linux/netdevice.h>
27#include <linux/if_packet.h>
28#include <net/ip.h>
29#include <net/protocol.h>
4738c1db 30#include <net/netlink.h>
1da177e4
LT
31#include <linux/skbuff.h>
32#include <net/sock.h>
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <asm/system.h>
36#include <asm/uaccess.h>
40daafc8 37#include <asm/unaligned.h>
1da177e4
LT
38#include <linux/filter.h>
39
40/* No hurry in this branch */
0b05b2a4 41static void *__load_pointer(struct sk_buff *skb, int k)
1da177e4
LT
42{
43 u8 *ptr = NULL;
44
45 if (k >= SKF_NET_OFF)
d56f90a7 46 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
1da177e4 47 else if (k >= SKF_LL_OFF)
98e399f8 48 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
1da177e4 49
27a884dc 50 if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
1da177e4
LT
51 return ptr;
52 return NULL;
53}
54
0b05b2a4 55static inline void *load_pointer(struct sk_buff *skb, int k,
4ec93edb 56 unsigned int size, void *buffer)
0b05b2a4
PM
57{
58 if (k >= 0)
59 return skb_header_pointer(skb, k, size, buffer);
60 else {
61 if (k >= SKF_AD_OFF)
62 return NULL;
63 return __load_pointer(skb, k);
64 }
65}
66
43db6d65
SH
67/**
68 * sk_filter - run a packet through a socket filter
69 * @sk: sock associated with &sk_buff
70 * @skb: buffer to filter
43db6d65
SH
71 *
72 * Run the filter code and then cut skb->data to correct size returned by
73 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
74 * than pkt_len we keep whole skb->data. This is the socket level
75 * wrapper to sk_run_filter. It returns 0 if the packet should
76 * be accepted or -EPERM if the packet should be tossed.
77 *
78 */
79int sk_filter(struct sock *sk, struct sk_buff *skb)
80{
81 int err;
82 struct sk_filter *filter;
83
84 err = security_sock_rcv_skb(sk, skb);
85 if (err)
86 return err;
87
88 rcu_read_lock_bh();
a898def2 89 filter = rcu_dereference_bh(sk->sk_filter);
43db6d65
SH
90 if (filter) {
91 unsigned int pkt_len = sk_run_filter(skb, filter->insns,
92 filter->len);
93 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
94 }
95 rcu_read_unlock_bh();
96
97 return err;
98}
99EXPORT_SYMBOL(sk_filter);
100
1da177e4 101/**
2966b66c 102 * sk_run_filter - run a filter on a socket
1da177e4
LT
103 * @skb: buffer to run the filter on
104 * @filter: filter to apply
105 * @flen: length of filter
106 *
107 * Decode and apply filter instructions to the skb->data.
108 * Return length to keep, 0 for none. skb is the data we are
109 * filtering, filter is the array of filter instructions, and
110 * len is the number of filter blocks in the array.
111 */
4bad4dc9 112unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
1da177e4 113{
1da177e4 114 struct sock_filter *fentry; /* We walk down these */
0b05b2a4 115 void *ptr;
2966b66c
KK
116 u32 A = 0; /* Accumulator */
117 u32 X = 0; /* Index Register */
1da177e4 118 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
0b05b2a4 119 u32 tmp;
1da177e4
LT
120 int k;
121 int pc;
122
123 /*
124 * Process array of filter instructions.
125 */
126 for (pc = 0; pc < flen; pc++) {
127 fentry = &filter[pc];
4ec93edb 128
1da177e4
LT
129 switch (fentry->code) {
130 case BPF_ALU|BPF_ADD|BPF_X:
131 A += X;
132 continue;
133 case BPF_ALU|BPF_ADD|BPF_K:
134 A += fentry->k;
135 continue;
136 case BPF_ALU|BPF_SUB|BPF_X:
137 A -= X;
138 continue;
139 case BPF_ALU|BPF_SUB|BPF_K:
140 A -= fentry->k;
141 continue;
142 case BPF_ALU|BPF_MUL|BPF_X:
143 A *= X;
144 continue;
145 case BPF_ALU|BPF_MUL|BPF_K:
146 A *= fentry->k;
147 continue;
148 case BPF_ALU|BPF_DIV|BPF_X:
149 if (X == 0)
150 return 0;
151 A /= X;
152 continue;
153 case BPF_ALU|BPF_DIV|BPF_K:
1da177e4
LT
154 A /= fentry->k;
155 continue;
156 case BPF_ALU|BPF_AND|BPF_X:
157 A &= X;
158 continue;
159 case BPF_ALU|BPF_AND|BPF_K:
160 A &= fentry->k;
161 continue;
162 case BPF_ALU|BPF_OR|BPF_X:
163 A |= X;
164 continue;
165 case BPF_ALU|BPF_OR|BPF_K:
166 A |= fentry->k;
167 continue;
168 case BPF_ALU|BPF_LSH|BPF_X:
169 A <<= X;
170 continue;
171 case BPF_ALU|BPF_LSH|BPF_K:
172 A <<= fentry->k;
173 continue;
174 case BPF_ALU|BPF_RSH|BPF_X:
175 A >>= X;
176 continue;
177 case BPF_ALU|BPF_RSH|BPF_K:
178 A >>= fentry->k;
179 continue;
180 case BPF_ALU|BPF_NEG:
181 A = -A;
182 continue;
183 case BPF_JMP|BPF_JA:
184 pc += fentry->k;
185 continue;
186 case BPF_JMP|BPF_JGT|BPF_K:
187 pc += (A > fentry->k) ? fentry->jt : fentry->jf;
188 continue;
189 case BPF_JMP|BPF_JGE|BPF_K:
190 pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
191 continue;
192 case BPF_JMP|BPF_JEQ|BPF_K:
193 pc += (A == fentry->k) ? fentry->jt : fentry->jf;
194 continue;
195 case BPF_JMP|BPF_JSET|BPF_K:
196 pc += (A & fentry->k) ? fentry->jt : fentry->jf;
197 continue;
198 case BPF_JMP|BPF_JGT|BPF_X:
199 pc += (A > X) ? fentry->jt : fentry->jf;
200 continue;
201 case BPF_JMP|BPF_JGE|BPF_X:
202 pc += (A >= X) ? fentry->jt : fentry->jf;
203 continue;
204 case BPF_JMP|BPF_JEQ|BPF_X:
205 pc += (A == X) ? fentry->jt : fentry->jf;
206 continue;
207 case BPF_JMP|BPF_JSET|BPF_X:
208 pc += (A & X) ? fentry->jt : fentry->jf;
209 continue;
210 case BPF_LD|BPF_W|BPF_ABS:
211 k = fentry->k;
e35bedf3 212load_w:
0b05b2a4
PM
213 ptr = load_pointer(skb, k, 4, &tmp);
214 if (ptr != NULL) {
d3e2ce3b 215 A = get_unaligned_be32(ptr);
0b05b2a4 216 continue;
1da177e4 217 }
1198ad00 218 break;
1da177e4
LT
219 case BPF_LD|BPF_H|BPF_ABS:
220 k = fentry->k;
e35bedf3 221load_h:
0b05b2a4
PM
222 ptr = load_pointer(skb, k, 2, &tmp);
223 if (ptr != NULL) {
d3e2ce3b 224 A = get_unaligned_be16(ptr);
0b05b2a4 225 continue;
1da177e4 226 }
1198ad00 227 break;
1da177e4
LT
228 case BPF_LD|BPF_B|BPF_ABS:
229 k = fentry->k;
230load_b:
0b05b2a4
PM
231 ptr = load_pointer(skb, k, 1, &tmp);
232 if (ptr != NULL) {
233 A = *(u8 *)ptr;
234 continue;
1da177e4 235 }
1198ad00 236 break;
1da177e4 237 case BPF_LD|BPF_W|BPF_LEN:
3154e540 238 A = skb->len;
1da177e4
LT
239 continue;
240 case BPF_LDX|BPF_W|BPF_LEN:
3154e540 241 X = skb->len;
1da177e4
LT
242 continue;
243 case BPF_LD|BPF_W|BPF_IND:
244 k = X + fentry->k;
245 goto load_w;
246 case BPF_LD|BPF_H|BPF_IND:
247 k = X + fentry->k;
248 goto load_h;
249 case BPF_LD|BPF_B|BPF_IND:
250 k = X + fentry->k;
251 goto load_b;
252 case BPF_LDX|BPF_B|BPF_MSH:
0b05b2a4
PM
253 ptr = load_pointer(skb, fentry->k, 1, &tmp);
254 if (ptr != NULL) {
255 X = (*(u8 *)ptr & 0xf) << 2;
256 continue;
257 }
258 return 0;
1da177e4
LT
259 case BPF_LD|BPF_IMM:
260 A = fentry->k;
261 continue;
262 case BPF_LDX|BPF_IMM:
263 X = fentry->k;
264 continue;
265 case BPF_LD|BPF_MEM:
266 A = mem[fentry->k];
267 continue;
268 case BPF_LDX|BPF_MEM:
269 X = mem[fentry->k];
270 continue;
271 case BPF_MISC|BPF_TAX:
272 X = A;
273 continue;
274 case BPF_MISC|BPF_TXA:
275 A = X;
276 continue;
277 case BPF_RET|BPF_K:
4bad4dc9 278 return fentry->k;
1da177e4 279 case BPF_RET|BPF_A:
4bad4dc9 280 return A;
1da177e4
LT
281 case BPF_ST:
282 mem[fentry->k] = A;
283 continue;
284 case BPF_STX:
285 mem[fentry->k] = X;
286 continue;
287 default:
93699863 288 WARN_ON(1);
1da177e4
LT
289 return 0;
290 }
291
292 /*
293 * Handle ancillary data, which are impossible
294 * (or very difficult) to get parsing packet contents.
295 */
296 switch (k-SKF_AD_OFF) {
297 case SKF_AD_PROTOCOL:
252e3346 298 A = ntohs(skb->protocol);
1da177e4
LT
299 continue;
300 case SKF_AD_PKTTYPE:
301 A = skb->pkt_type;
302 continue;
303 case SKF_AD_IFINDEX:
304 A = skb->dev->ifindex;
305 continue;
7e75f93e 306 case SKF_AD_MARK:
307 A = skb->mark;
308 continue;
d19742fb
ED
309 case SKF_AD_QUEUE:
310 A = skb->queue_mapping;
311 continue;
4738c1db
PM
312 case SKF_AD_NLATTR: {
313 struct nlattr *nla;
314
315 if (skb_is_nonlinear(skb))
316 return 0;
317 if (A > skb->len - sizeof(struct nlattr))
318 return 0;
319
320 nla = nla_find((struct nlattr *)&skb->data[A],
321 skb->len - A, X);
322 if (nla)
323 A = (void *)nla - (void *)skb->data;
324 else
325 A = 0;
326 continue;
327 }
d214c753
PNA
328 case SKF_AD_NLATTR_NEST: {
329 struct nlattr *nla;
330
331 if (skb_is_nonlinear(skb))
332 return 0;
333 if (A > skb->len - sizeof(struct nlattr))
334 return 0;
335
336 nla = (struct nlattr *)&skb->data[A];
337 if (nla->nla_len > A - skb->len)
338 return 0;
339
340 nla = nla_find_nested(nla, X);
341 if (nla)
342 A = (void *)nla - (void *)skb->data;
343 else
344 A = 0;
345 continue;
346 }
1da177e4
LT
347 default:
348 return 0;
349 }
350 }
351
352 return 0;
353}
b715631f 354EXPORT_SYMBOL(sk_run_filter);
1da177e4
LT
355
356/**
357 * sk_chk_filter - verify socket filter code
358 * @filter: filter to verify
359 * @flen: length of filter
360 *
361 * Check the user's filter code. If we let some ugly
362 * filter code slip through kaboom! The filter must contain
93699863
KK
363 * no references or jumps that are out of range, no illegal
364 * instructions, and must end with a RET instruction.
1da177e4 365 *
7b11f69f
KK
366 * All jumps are forward as they are not signed.
367 *
368 * Returns 0 if the rule set is legal or -EINVAL if not.
1da177e4
LT
369 */
370int sk_chk_filter(struct sock_filter *filter, int flen)
371{
372 struct sock_filter *ftest;
373 int pc;
374
1b93ae64 375 if (flen == 0 || flen > BPF_MAXINSNS)
1da177e4
LT
376 return -EINVAL;
377
378 /* check the filter code now */
379 for (pc = 0; pc < flen; pc++) {
1da177e4 380 ftest = &filter[pc];
1da177e4 381
93699863
KK
382 /* Only allow valid instructions */
383 switch (ftest->code) {
384 case BPF_ALU|BPF_ADD|BPF_K:
385 case BPF_ALU|BPF_ADD|BPF_X:
386 case BPF_ALU|BPF_SUB|BPF_K:
387 case BPF_ALU|BPF_SUB|BPF_X:
388 case BPF_ALU|BPF_MUL|BPF_K:
389 case BPF_ALU|BPF_MUL|BPF_X:
390 case BPF_ALU|BPF_DIV|BPF_X:
391 case BPF_ALU|BPF_AND|BPF_K:
392 case BPF_ALU|BPF_AND|BPF_X:
393 case BPF_ALU|BPF_OR|BPF_K:
394 case BPF_ALU|BPF_OR|BPF_X:
395 case BPF_ALU|BPF_LSH|BPF_K:
396 case BPF_ALU|BPF_LSH|BPF_X:
397 case BPF_ALU|BPF_RSH|BPF_K:
398 case BPF_ALU|BPF_RSH|BPF_X:
399 case BPF_ALU|BPF_NEG:
400 case BPF_LD|BPF_W|BPF_ABS:
401 case BPF_LD|BPF_H|BPF_ABS:
402 case BPF_LD|BPF_B|BPF_ABS:
403 case BPF_LD|BPF_W|BPF_LEN:
404 case BPF_LD|BPF_W|BPF_IND:
405 case BPF_LD|BPF_H|BPF_IND:
406 case BPF_LD|BPF_B|BPF_IND:
407 case BPF_LD|BPF_IMM:
408 case BPF_LDX|BPF_W|BPF_LEN:
409 case BPF_LDX|BPF_B|BPF_MSH:
410 case BPF_LDX|BPF_IMM:
411 case BPF_MISC|BPF_TAX:
412 case BPF_MISC|BPF_TXA:
413 case BPF_RET|BPF_K:
414 case BPF_RET|BPF_A:
415 break;
416
417 /* Some instructions need special checks */
fb0d366b 418
93699863
KK
419 case BPF_ALU|BPF_DIV|BPF_K:
420 /* check for division by zero */
421 if (ftest->k == 0)
1da177e4 422 return -EINVAL;
93699863
KK
423 break;
424
425 case BPF_LD|BPF_MEM:
426 case BPF_LDX|BPF_MEM:
427 case BPF_ST:
428 case BPF_STX:
429 /* check for invalid memory addresses */
430 if (ftest->k >= BPF_MEMWORDS)
431 return -EINVAL;
432 break;
433
434 case BPF_JMP|BPF_JA:
435 /*
436 * Note, the large ftest->k might cause loops.
437 * Compare this with conditional jumps below,
438 * where offsets are limited. --ANK (981016)
439 */
440 if (ftest->k >= (unsigned)(flen-pc-1))
441 return -EINVAL;
442 break;
443
444 case BPF_JMP|BPF_JEQ|BPF_K:
445 case BPF_JMP|BPF_JEQ|BPF_X:
446 case BPF_JMP|BPF_JGE|BPF_K:
447 case BPF_JMP|BPF_JGE|BPF_X:
448 case BPF_JMP|BPF_JGT|BPF_K:
449 case BPF_JMP|BPF_JGT|BPF_X:
450 case BPF_JMP|BPF_JSET|BPF_K:
451 case BPF_JMP|BPF_JSET|BPF_X:
452 /* for conditionals both must be safe */
e35bedf3 453 if (pc + ftest->jt + 1 >= flen ||
93699863
KK
454 pc + ftest->jf + 1 >= flen)
455 return -EINVAL;
456 break;
457
458 default:
459 return -EINVAL;
1da177e4
LT
460 }
461 }
462
e35bedf3 463 return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
1da177e4 464}
b715631f 465EXPORT_SYMBOL(sk_chk_filter);
1da177e4 466
47e958ea
PE
467/**
468 * sk_filter_rcu_release: Release a socket filter by rcu_head
469 * @rcu: rcu_head that contains the sk_filter to free
470 */
471static void sk_filter_rcu_release(struct rcu_head *rcu)
472{
473 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
474
475 sk_filter_release(fp);
476}
477
478static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
479{
480 unsigned int size = sk_filter_len(fp);
481
482 atomic_sub(size, &sk->sk_omem_alloc);
483 call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
484}
485
1da177e4
LT
486/**
487 * sk_attach_filter - attach a socket filter
488 * @fprog: the filter program
489 * @sk: the socket to use
490 *
491 * Attach the user's filter code. We first run some sanity checks on
492 * it to make sure it does not explode on us later. If an error
493 * occurs or there is insufficient memory for the filter a negative
494 * errno code is returned. On success the return is zero.
495 */
496int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
497{
d3904b73 498 struct sk_filter *fp, *old_fp;
1da177e4
LT
499 unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
500 int err;
501
502 /* Make sure new filter is there and in the right amounts. */
e35bedf3
KK
503 if (fprog->filter == NULL)
504 return -EINVAL;
1da177e4
LT
505
506 fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
507 if (!fp)
508 return -ENOMEM;
509 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
4ec93edb 510 sock_kfree_s(sk, fp, fsize+sizeof(*fp));
1da177e4
LT
511 return -EFAULT;
512 }
513
514 atomic_set(&fp->refcnt, 1);
515 fp->len = fprog->len;
516
517 err = sk_chk_filter(fp->insns, fp->len);
d3904b73
PE
518 if (err) {
519 sk_filter_uncharge(sk, fp);
520 return err;
1da177e4
LT
521 }
522
d3904b73 523 rcu_read_lock_bh();
a898def2 524 old_fp = rcu_dereference_bh(sk->sk_filter);
d3904b73
PE
525 rcu_assign_pointer(sk->sk_filter, fp);
526 rcu_read_unlock_bh();
527
9b013e05
OJ
528 if (old_fp)
529 sk_filter_delayed_uncharge(sk, old_fp);
d3904b73 530 return 0;
1da177e4 531}
5ff3f073 532EXPORT_SYMBOL_GPL(sk_attach_filter);
1da177e4 533
55b33325
PE
534int sk_detach_filter(struct sock *sk)
535{
536 int ret = -ENOENT;
537 struct sk_filter *filter;
538
539 rcu_read_lock_bh();
a898def2 540 filter = rcu_dereference_bh(sk->sk_filter);
55b33325
PE
541 if (filter) {
542 rcu_assign_pointer(sk->sk_filter, NULL);
47e958ea 543 sk_filter_delayed_uncharge(sk, filter);
55b33325
PE
544 ret = 0;
545 }
546 rcu_read_unlock_bh();
547 return ret;
548}
5ff3f073 549EXPORT_SYMBOL_GPL(sk_detach_filter);