]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/sched/cls_rsvp.h
net: Move && and || to end of previous line
[net-next-2.6.git] / net / sched / cls_rsvp.h
CommitLineData
1da177e4
LT
1/*
2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
12/*
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
15
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
21
22 IMPLEMENTATION.
23
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
28
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
34
35
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
38
39
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
43 should be wildcard.
44
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
57
58
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
63
64
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
67
1da177e4
LT
68
69struct rsvp_head
70{
71 u32 tmap[256/32];
72 u32 hgenerator;
73 u8 tgenerator;
74 struct rsvp_session *ht[256];
75};
76
77struct rsvp_session
78{
79 struct rsvp_session *next;
66c6f529 80 __be32 dst[RSVP_DST_LEN];
1da177e4
LT
81 struct tc_rsvp_gpi dpi;
82 u8 protocol;
83 u8 tunnelid;
84 /* 16 (src,sport) hash slots, and one wildcard source slot */
85 struct rsvp_filter *ht[16+1];
86};
87
88
89struct rsvp_filter
90{
91 struct rsvp_filter *next;
66c6f529 92 __be32 src[RSVP_DST_LEN];
1da177e4
LT
93 struct tc_rsvp_gpi spi;
94 u8 tunnelhdr;
95
96 struct tcf_result res;
97 struct tcf_exts exts;
98
99 u32 handle;
100 struct rsvp_session *sess;
101};
102
66c6f529 103static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
1da177e4 104{
66c6f529 105 unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
1da177e4
LT
106 h ^= h>>16;
107 h ^= h>>8;
108 return (h ^ protocol ^ tunnelid) & 0xFF;
109}
110
66c6f529 111static __inline__ unsigned hash_src(__be32 *src)
1da177e4 112{
66c6f529 113 unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
1da177e4
LT
114 h ^= h>>16;
115 h ^= h>>8;
116 h ^= h>>4;
117 return h & 0xF;
118}
119
120static struct tcf_ext_map rsvp_ext_map = {
121 .police = TCA_RSVP_POLICE,
122 .action = TCA_RSVP_ACT
123};
124
125#define RSVP_APPLY_RESULT() \
126{ \
127 int r = tcf_exts_exec(skb, &f->exts, res); \
128 if (r < 0) \
129 continue; \
130 else if (r > 0) \
131 return r; \
132}
10297b99 133
1da177e4
LT
134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135 struct tcf_result *res)
136{
137 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
138 struct rsvp_session *s;
139 struct rsvp_filter *f;
140 unsigned h1, h2;
66c6f529 141 __be32 *dst, *src;
1da177e4
LT
142 u8 protocol;
143 u8 tunnelid = 0;
144 u8 *xprt;
145#if RSVP_DST_LEN == 4
0660e03f 146 struct ipv6hdr *nhptr = ipv6_hdr(skb);
1da177e4 147#else
eddc9ec5 148 struct iphdr *nhptr = ip_hdr(skb);
1da177e4
LT
149#endif
150
151restart:
152
153#if RSVP_DST_LEN == 4
154 src = &nhptr->saddr.s6_addr32[0];
155 dst = &nhptr->daddr.s6_addr32[0];
156 protocol = nhptr->nexthdr;
157 xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
158#else
159 src = &nhptr->saddr;
160 dst = &nhptr->daddr;
161 protocol = nhptr->protocol;
162 xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
b6d9bcb0 163 if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
1da177e4
LT
164 return -1;
165#endif
166
167 h1 = hash_dst(dst, protocol, tunnelid);
168 h2 = hash_src(src);
169
170 for (s = sht[h1]; s; s = s->next) {
171 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
172 protocol == s->protocol &&
f64f9e71
JP
173 !(s->dpi.mask &
174 (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
1da177e4 175#if RSVP_DST_LEN == 4
f64f9e71
JP
176 dst[0] == s->dst[0] &&
177 dst[1] == s->dst[1] &&
178 dst[2] == s->dst[2] &&
1da177e4 179#endif
f64f9e71 180 tunnelid == s->tunnelid) {
1da177e4
LT
181
182 for (f = s->ht[h2]; f; f = f->next) {
183 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
184 !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
185#if RSVP_DST_LEN == 4
f64f9e71
JP
186 &&
187 src[0] == f->src[0] &&
188 src[1] == f->src[1] &&
189 src[2] == f->src[2]
1da177e4
LT
190#endif
191 ) {
192 *res = f->res;
193 RSVP_APPLY_RESULT();
194
195matched:
196 if (f->tunnelhdr == 0)
197 return 0;
198
199 tunnelid = f->res.classid;
200 nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
201 goto restart;
202 }
203 }
204
205 /* And wildcard bucket... */
206 for (f = s->ht[16]; f; f = f->next) {
207 *res = f->res;
208 RSVP_APPLY_RESULT();
209 goto matched;
210 }
211 return -1;
212 }
213 }
214 return -1;
215}
216
217static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
218{
219 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
220 struct rsvp_session *s;
221 struct rsvp_filter *f;
222 unsigned h1 = handle&0xFF;
223 unsigned h2 = (handle>>8)&0xFF;
224
225 if (h2 > 16)
226 return 0;
227
228 for (s = sht[h1]; s; s = s->next) {
229 for (f = s->ht[h2]; f; f = f->next) {
230 if (f->handle == handle)
231 return (unsigned long)f;
232 }
233 }
234 return 0;
235}
236
237static void rsvp_put(struct tcf_proto *tp, unsigned long f)
238{
239}
240
241static int rsvp_init(struct tcf_proto *tp)
242{
243 struct rsvp_head *data;
244
0da974f4 245 data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
1da177e4 246 if (data) {
1da177e4
LT
247 tp->root = data;
248 return 0;
249 }
250 return -ENOBUFS;
251}
252
253static inline void
254rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
255{
256 tcf_unbind_filter(tp, &f->res);
257 tcf_exts_destroy(tp, &f->exts);
258 kfree(f);
259}
260
261static void rsvp_destroy(struct tcf_proto *tp)
262{
263 struct rsvp_head *data = xchg(&tp->root, NULL);
264 struct rsvp_session **sht;
265 int h1, h2;
266
267 if (data == NULL)
268 return;
269
270 sht = data->ht;
271
272 for (h1=0; h1<256; h1++) {
273 struct rsvp_session *s;
274
275 while ((s = sht[h1]) != NULL) {
276 sht[h1] = s->next;
277
278 for (h2=0; h2<=16; h2++) {
279 struct rsvp_filter *f;
280
281 while ((f = s->ht[h2]) != NULL) {
282 s->ht[h2] = f->next;
283 rsvp_delete_filter(tp, f);
284 }
285 }
286 kfree(s);
287 }
288 }
289 kfree(data);
290}
291
292static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
293{
294 struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
295 unsigned h = f->handle;
296 struct rsvp_session **sp;
297 struct rsvp_session *s = f->sess;
298 int i;
299
300 for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
301 if (*fp == f) {
302 tcf_tree_lock(tp);
303 *fp = f->next;
304 tcf_tree_unlock(tp);
305 rsvp_delete_filter(tp, f);
306
307 /* Strip tree */
308
309 for (i=0; i<=16; i++)
310 if (s->ht[i])
311 return 0;
312
313 /* OK, session has no flows */
314 for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
315 *sp; sp = &(*sp)->next) {
316 if (*sp == s) {
317 tcf_tree_lock(tp);
318 *sp = s->next;
319 tcf_tree_unlock(tp);
320
321 kfree(s);
322 return 0;
323 }
324 }
325
326 return 0;
327 }
328 }
329 return 0;
330}
331
332static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
333{
334 struct rsvp_head *data = tp->root;
335 int i = 0xFFFF;
336
337 while (i-- > 0) {
338 u32 h;
339 if ((data->hgenerator += 0x10000) == 0)
340 data->hgenerator = 0x10000;
341 h = data->hgenerator|salt;
342 if (rsvp_get(tp, h) == 0)
343 return h;
344 }
345 return 0;
346}
347
348static int tunnel_bts(struct rsvp_head *data)
349{
350 int n = data->tgenerator>>5;
351 u32 b = 1<<(data->tgenerator&0x1F);
10297b99 352
1da177e4
LT
353 if (data->tmap[n]&b)
354 return 0;
355 data->tmap[n] |= b;
356 return 1;
357}
358
359static void tunnel_recycle(struct rsvp_head *data)
360{
361 struct rsvp_session **sht = data->ht;
362 u32 tmap[256/32];
363 int h1, h2;
364
365 memset(tmap, 0, sizeof(tmap));
366
367 for (h1=0; h1<256; h1++) {
368 struct rsvp_session *s;
369 for (s = sht[h1]; s; s = s->next) {
370 for (h2=0; h2<=16; h2++) {
371 struct rsvp_filter *f;
372
373 for (f = s->ht[h2]; f; f = f->next) {
374 if (f->tunnelhdr == 0)
375 continue;
376 data->tgenerator = f->res.classid;
377 tunnel_bts(data);
378 }
379 }
380 }
381 }
382
383 memcpy(data->tmap, tmap, sizeof(tmap));
384}
385
386static u32 gen_tunnel(struct rsvp_head *data)
387{
388 int i, k;
389
390 for (k=0; k<2; k++) {
391 for (i=255; i>0; i--) {
392 if (++data->tgenerator == 0)
393 data->tgenerator = 1;
394 if (tunnel_bts(data))
395 return data->tgenerator;
396 }
397 tunnel_recycle(data);
398 }
399 return 0;
400}
401
6fa8c014
PM
402static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
403 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
404 [TCA_RSVP_DST] = { .type = NLA_BINARY,
405 .len = RSVP_DST_LEN * sizeof(u32) },
406 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
407 .len = RSVP_DST_LEN * sizeof(u32) },
408 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
409};
410
1da177e4
LT
411static int rsvp_change(struct tcf_proto *tp, unsigned long base,
412 u32 handle,
add93b61 413 struct nlattr **tca,
1da177e4
LT
414 unsigned long *arg)
415{
416 struct rsvp_head *data = tp->root;
417 struct rsvp_filter *f, **fp;
418 struct rsvp_session *s, **sp;
419 struct tc_rsvp_pinfo *pinfo = NULL;
add93b61
PM
420 struct nlattr *opt = tca[TCA_OPTIONS-1];
421 struct nlattr *tb[TCA_RSVP_MAX + 1];
1da177e4
LT
422 struct tcf_exts e;
423 unsigned h1, h2;
66c6f529 424 __be32 *dst;
1da177e4
LT
425 int err;
426
427 if (opt == NULL)
428 return handle ? -EINVAL : 0;
429
6fa8c014 430 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
cee63723
PM
431 if (err < 0)
432 return err;
1da177e4
LT
433
434 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
435 if (err < 0)
436 return err;
437
438 if ((f = (struct rsvp_filter*)*arg) != NULL) {
439 /* Node exists: adjust only classid */
440
441 if (f->handle != handle && handle)
442 goto errout2;
443 if (tb[TCA_RSVP_CLASSID-1]) {
1587bac4 444 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
1da177e4
LT
445 tcf_bind_filter(tp, &f->res, base);
446 }
447
448 tcf_exts_change(tp, &f->exts, &e);
449 return 0;
450 }
451
452 /* Now more serious part... */
453 err = -EINVAL;
454 if (handle)
455 goto errout2;
456 if (tb[TCA_RSVP_DST-1] == NULL)
457 goto errout2;
458
459 err = -ENOBUFS;
0da974f4 460 f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
1da177e4
LT
461 if (f == NULL)
462 goto errout2;
463
1da177e4
LT
464 h2 = 16;
465 if (tb[TCA_RSVP_SRC-1]) {
add93b61 466 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
1da177e4
LT
467 h2 = hash_src(f->src);
468 }
469 if (tb[TCA_RSVP_PINFO-1]) {
add93b61 470 pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
1da177e4
LT
471 f->spi = pinfo->spi;
472 f->tunnelhdr = pinfo->tunnelhdr;
473 }
6fa8c014 474 if (tb[TCA_RSVP_CLASSID-1])
1587bac4 475 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
1da177e4 476
add93b61 477 dst = nla_data(tb[TCA_RSVP_DST-1]);
1da177e4
LT
478 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
479
480 err = -ENOMEM;
481 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
482 goto errout;
483
484 if (f->tunnelhdr) {
485 err = -EINVAL;
486 if (f->res.classid > 255)
487 goto errout;
488
489 err = -ENOMEM;
490 if (f->res.classid == 0 &&
491 (f->res.classid = gen_tunnel(data)) == 0)
492 goto errout;
493 }
494
495 for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
496 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
497 pinfo && pinfo->protocol == s->protocol &&
f64f9e71 498 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
1da177e4 499#if RSVP_DST_LEN == 4
f64f9e71
JP
500 dst[0] == s->dst[0] &&
501 dst[1] == s->dst[1] &&
502 dst[2] == s->dst[2] &&
1da177e4 503#endif
f64f9e71 504 pinfo->tunnelid == s->tunnelid) {
1da177e4
LT
505
506insert:
507 /* OK, we found appropriate session */
508
509 fp = &s->ht[h2];
510
511 f->sess = s;
512 if (f->tunnelhdr == 0)
513 tcf_bind_filter(tp, &f->res, base);
514
515 tcf_exts_change(tp, &f->exts, &e);
516
517 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
518 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
519 break;
520 f->next = *fp;
521 wmb();
522 *fp = f;
523
524 *arg = (unsigned long)f;
525 return 0;
526 }
527 }
528
529 /* No session found. Create new one. */
530
531 err = -ENOBUFS;
0da974f4 532 s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
1da177e4
LT
533 if (s == NULL)
534 goto errout;
1da177e4
LT
535 memcpy(s->dst, dst, sizeof(s->dst));
536
537 if (pinfo) {
538 s->dpi = pinfo->dpi;
539 s->protocol = pinfo->protocol;
540 s->tunnelid = pinfo->tunnelid;
541 }
542 for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
543 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
544 break;
545 }
546 s->next = *sp;
547 wmb();
548 *sp = s;
10297b99 549
1da177e4
LT
550 goto insert;
551
552errout:
a51482bd 553 kfree(f);
1da177e4
LT
554errout2:
555 tcf_exts_destroy(tp, &e);
556 return err;
557}
558
559static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
560{
561 struct rsvp_head *head = tp->root;
562 unsigned h, h1;
563
564 if (arg->stop)
565 return;
566
567 for (h = 0; h < 256; h++) {
568 struct rsvp_session *s;
569
570 for (s = head->ht[h]; s; s = s->next) {
571 for (h1 = 0; h1 <= 16; h1++) {
572 struct rsvp_filter *f;
573
574 for (f = s->ht[h1]; f; f = f->next) {
575 if (arg->count < arg->skip) {
576 arg->count++;
577 continue;
578 }
579 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
580 arg->stop = 1;
581 return;
582 }
583 arg->count++;
584 }
585 }
586 }
587 }
588}
589
590static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
591 struct sk_buff *skb, struct tcmsg *t)
592{
593 struct rsvp_filter *f = (struct rsvp_filter*)fh;
594 struct rsvp_session *s;
27a884dc 595 unsigned char *b = skb_tail_pointer(skb);
4b3550ef 596 struct nlattr *nest;
1da177e4
LT
597 struct tc_rsvp_pinfo pinfo;
598
599 if (f == NULL)
600 return skb->len;
601 s = f->sess;
602
603 t->tcm_handle = f->handle;
604
4b3550ef
PM
605 nest = nla_nest_start(skb, TCA_OPTIONS);
606 if (nest == NULL)
607 goto nla_put_failure;
1da177e4 608
add93b61 609 NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
1da177e4
LT
610 pinfo.dpi = s->dpi;
611 pinfo.spi = f->spi;
612 pinfo.protocol = s->protocol;
613 pinfo.tunnelid = s->tunnelid;
614 pinfo.tunnelhdr = f->tunnelhdr;
8a47077a 615 pinfo.pad = 0;
add93b61 616 NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
1da177e4 617 if (f->res.classid)
24beeab5 618 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
1da177e4 619 if (((f->handle>>8)&0xFF) != 16)
add93b61 620 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
1da177e4
LT
621
622 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
add93b61 623 goto nla_put_failure;
1da177e4 624
4b3550ef 625 nla_nest_end(skb, nest);
1da177e4
LT
626
627 if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
add93b61 628 goto nla_put_failure;
1da177e4
LT
629 return skb->len;
630
add93b61 631nla_put_failure:
dc5fc579 632 nlmsg_trim(skb, b);
1da177e4
LT
633 return -1;
634}
635
636static struct tcf_proto_ops RSVP_OPS = {
637 .next = NULL,
638 .kind = RSVP_ID,
639 .classify = rsvp_classify,
640 .init = rsvp_init,
641 .destroy = rsvp_destroy,
642 .get = rsvp_get,
643 .put = rsvp_put,
644 .change = rsvp_change,
645 .delete = rsvp_delete,
646 .walk = rsvp_walk,
647 .dump = rsvp_dump,
648 .owner = THIS_MODULE,
649};
650
651static int __init init_rsvp(void)
652{
653 return register_tcf_proto_ops(&RSVP_OPS);
654}
655
10297b99 656static void __exit exit_rsvp(void)
1da177e4
LT
657{
658 unregister_tcf_proto_ops(&RSVP_OPS);
659}
660
661module_init(init_rsvp)
662module_exit(exit_rsvp)