]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/sched/sch_cbq.c
ipv6: When we droped a packet, we should return NET_RX_DROP instead of 0
[net-next-2.6.git] / net / sched / sch_cbq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
1da177e4 13#include <linux/module.h>
1da177e4
LT
14#include <linux/types.h>
15#include <linux/kernel.h>
1da177e4 16#include <linux/string.h>
1da177e4 17#include <linux/errno.h>
1da177e4 18#include <linux/skbuff.h>
0ba48053 19#include <net/netlink.h>
1da177e4
LT
20#include <net/pkt_sched.h>
21
22
23/* Class-Based Queueing (CBQ) algorithm.
24 =======================================
25
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
10297b99 27 Management Models for Packet Networks",
1da177e4
LT
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
10297b99 30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
1da177e4 31
10297b99 32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
1da177e4
LT
33 Parameters", 1996
34
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
37
38 -----------------------------------------------------------------------
39
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
44
45 --- The WRR algorithm is different. Our version looks more
10297b99
YH
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
1da177e4
LT
52
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
63
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
70
71struct cbq_sched_data;
72
73
74struct cbq_class
75{
d77fea2e 76 struct Qdisc_class_common common;
1da177e4
LT
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
1da177e4
LT
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
c3bc7cff 84#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
85 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
1a13cb63 99 psched_tdiff_t penalty;
1da177e4
LT
100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
1a13cb63 130 psched_time_t penalized;
1da177e4
LT
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
1da177e4
LT
134 struct tc_cbq_xstats xstats;
135
136 struct tcf_proto *filter_list;
137
138 int refcnt;
139 int filters;
140
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
142};
143
144struct cbq_sched_data
145{
d77fea2e 146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
1da177e4
LT
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
149
150 struct cbq_class link;
151
152 unsigned activemask;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
154 with backlog */
155
c3bc7cff 156#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
157 struct cbq_class *rx_class;
158#endif
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
161 int tx_len;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
164 unsigned pmask;
165
1a13cb63 166 struct hrtimer delay_timer;
88a99354 167 struct qdisc_watchdog watchdog; /* Watchdog timer,
1da177e4
LT
168 started when CBQ has
169 backlog, but cannot
170 transmit just now */
88a99354 171 psched_tdiff_t wd_expires;
1da177e4
LT
172 int toplevel;
173 u32 hgenerator;
174};
175
176
e9bef55d 177#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
1da177e4 178
1da177e4
LT
179static __inline__ struct cbq_class *
180cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
181{
d77fea2e 182 struct Qdisc_class_common *clc;
1da177e4 183
d77fea2e
PM
184 clc = qdisc_class_find(&q->clhash, classid);
185 if (clc == NULL)
186 return NULL;
187 return container_of(clc, struct cbq_class, common);
1da177e4
LT
188}
189
c3bc7cff 190#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
191
192static struct cbq_class *
193cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
194{
195 struct cbq_class *cl, *new;
196
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
199 return new;
200
201 return NULL;
202}
203
204#endif
205
206/* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
208 transparently.
209
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
213 to the split node.
214 */
215
216static struct cbq_class *
217cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218{
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
225
226 /*
227 * Step 1. If skb->priority points to one of our classes, use it.
228 */
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
231 return cl;
232
c27f339a 233 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
234 for (;;) {
235 int result = 0;
236 defmap = head->defaults;
237
238 /*
239 * Step 2+n. Apply classifier.
240 */
73ca4918
PM
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
1da177e4
LT
243 goto fallback;
244
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
250
251 if (cl == NULL || cl->level >= head->level)
252 goto fallback;
253 }
254
255#ifdef CONFIG_NET_CLS_ACT
256 switch (result) {
257 case TC_ACT_QUEUED:
10297b99 258 case TC_ACT_STOLEN:
378a2f09 259 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
260 case TC_ACT_SHOT:
261 return NULL;
73ca4918
PM
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
1da177e4 264 }
1da177e4
LT
265#endif
266 if (cl->level == 0)
267 return cl;
268
269 /*
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
273 */
274 head = cl;
275 }
276
277fallback:
278 cl = head;
279
280 /*
281 * Step 4. No success...
282 */
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
286 return head;
287
288 return cl;
289}
290
291/*
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
295 */
296
297static __inline__ void cbq_activate_class(struct cbq_class *cl)
298{
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
302
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
305
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
309 } else {
310 cl->next_alive = cl;
311 q->activemask |= (1<<prio);
312 }
313}
314
315/*
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
319 */
320
321static void cbq_deactivate_class(struct cbq_class *this)
322{
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
327
328 do {
329 cl = cl_prev->next_alive;
330 if (cl == this) {
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
333
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
339 return;
340 }
341 }
1da177e4
LT
342 return;
343 }
344 } while ((cl_prev = cl) != q->active[prio]);
345}
346
347static void
348cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
349{
350 int toplevel = q->toplevel;
351
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
353 psched_time_t now;
354 psched_tdiff_t incr;
355
3bebcda2 356 now = psched_get_time();
8edc0c31 357 incr = now - q->now_rt;
7c59e25f 358 now = q->now + incr;
1da177e4
LT
359
360 do {
104e0878 361 if (cl->undertime < now) {
1da177e4
LT
362 q->toplevel = cl->level;
363 return;
364 }
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
366 }
367}
368
369static int
370cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
371{
372 struct cbq_sched_data *q = qdisc_priv(sch);
ddeee3ce 373 int uninitialized_var(ret);
1da177e4
LT
374 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
375
c3bc7cff 376#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
377 q->rx_class = cl;
378#endif
379 if (cl == NULL) {
c27f339a 380 if (ret & __NET_XMIT_BYPASS)
1da177e4
LT
381 sch->qstats.drops++;
382 kfree_skb(skb);
383 return ret;
384 }
385
c3bc7cff 386#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
387 cl->q->__parent = sch;
388#endif
5f86173b
JK
389 ret = qdisc_enqueue(skb, cl->q);
390 if (ret == NET_XMIT_SUCCESS) {
1da177e4
LT
391 sch->q.qlen++;
392 sch->bstats.packets++;
0abf77e5 393 sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4
LT
394 cbq_mark_toplevel(q, cl);
395 if (!cl->next_alive)
396 cbq_activate_class(cl);
397 return ret;
398 }
399
378a2f09
JP
400 if (net_xmit_drop_count(ret)) {
401 sch->qstats.drops++;
402 cbq_mark_toplevel(q, cl);
403 cl->qstats.drops++;
404 }
1da177e4
LT
405 return ret;
406}
407
408static int
409cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
410{
411 struct cbq_sched_data *q = qdisc_priv(sch);
412 struct cbq_class *cl;
413 int ret;
414
415 if ((cl = q->tx_class) == NULL) {
416 kfree_skb(skb);
417 sch->qstats.drops++;
418 return NET_XMIT_CN;
419 }
420 q->tx_class = NULL;
421
422 cbq_mark_toplevel(q, cl);
423
c3bc7cff 424#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
425 q->rx_class = cl;
426 cl->q->__parent = sch;
427#endif
428 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
429 sch->q.qlen++;
430 sch->qstats.requeues++;
431 if (!cl->next_alive)
432 cbq_activate_class(cl);
433 return 0;
434 }
378a2f09
JP
435 if (net_xmit_drop_count(ret)) {
436 sch->qstats.drops++;
437 cl->qstats.drops++;
438 }
1da177e4
LT
439 return ret;
440}
441
442/* Overlimit actions */
443
444/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
445
446static void cbq_ovl_classic(struct cbq_class *cl)
447{
448 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 449 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
450
451 if (!cl->delayed) {
452 delay += cl->offtime;
453
10297b99 454 /*
1da177e4
LT
455 Class goes to sleep, so that it will have no
456 chance to work avgidle. Let's forgive it 8)
457
458 BTW cbq-2.0 has a crap in this
459 place, apparently they forgot to shift it by cl->ewma_log.
460 */
461 if (cl->avgidle < 0)
462 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
463 if (cl->avgidle < cl->minidle)
464 cl->avgidle = cl->minidle;
465 if (delay <= 0)
466 delay = 1;
7c59e25f 467 cl->undertime = q->now + delay;
1da177e4
LT
468
469 cl->xstats.overactions++;
470 cl->delayed = 1;
471 }
472 if (q->wd_expires == 0 || q->wd_expires > delay)
473 q->wd_expires = delay;
474
475 /* Dirty work! We must schedule wakeups based on
476 real available rate, rather than leaf rate,
477 which may be tiny (even zero).
478 */
479 if (q->toplevel == TC_CBQ_MAXLEVEL) {
480 struct cbq_class *b;
481 psched_tdiff_t base_delay = q->wd_expires;
482
483 for (b = cl->borrow; b; b = b->borrow) {
8edc0c31 484 delay = b->undertime - q->now;
1da177e4
LT
485 if (delay < base_delay) {
486 if (delay <= 0)
487 delay = 1;
488 base_delay = delay;
489 }
490 }
491
492 q->wd_expires = base_delay;
493 }
494}
495
496/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
497 they go overlimit
498 */
499
500static void cbq_ovl_rclassic(struct cbq_class *cl)
501{
502 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
503 struct cbq_class *this = cl;
504
505 do {
506 if (cl->level > q->toplevel) {
507 cl = NULL;
508 break;
509 }
510 } while ((cl = cl->borrow) != NULL);
511
512 if (cl == NULL)
513 cl = this;
514 cbq_ovl_classic(cl);
515}
516
517/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
518
519static void cbq_ovl_delay(struct cbq_class *cl)
520{
521 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 522 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4 523
2540e051
JP
524 if (test_bit(__QDISC_STATE_DEACTIVATED,
525 &qdisc_root_sleeping(cl->qdisc)->state))
526 return;
527
1da177e4 528 if (!cl->delayed) {
1a13cb63
PM
529 psched_time_t sched = q->now;
530 ktime_t expires;
1da177e4
LT
531
532 delay += cl->offtime;
533 if (cl->avgidle < 0)
534 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
535 if (cl->avgidle < cl->minidle)
536 cl->avgidle = cl->minidle;
7c59e25f 537 cl->undertime = q->now + delay;
1da177e4
LT
538
539 if (delay > 0) {
1a13cb63 540 sched += delay + cl->penalty;
1da177e4
LT
541 cl->penalized = sched;
542 cl->cpriority = TC_CBQ_MAXPRIO;
543 q->pmask |= (1<<TC_CBQ_MAXPRIO);
1a13cb63
PM
544
545 expires = ktime_set(0, 0);
546 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
547 if (hrtimer_try_to_cancel(&q->delay_timer) &&
548 ktime_to_ns(ktime_sub(q->delay_timer.expires,
549 expires)) > 0)
550 q->delay_timer.expires = expires;
551 hrtimer_restart(&q->delay_timer);
1da177e4
LT
552 cl->delayed = 1;
553 cl->xstats.overactions++;
554 return;
555 }
556 delay = 1;
557 }
558 if (q->wd_expires == 0 || q->wd_expires > delay)
559 q->wd_expires = delay;
560}
561
562/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
563
564static void cbq_ovl_lowprio(struct cbq_class *cl)
565{
566 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
567
1a13cb63 568 cl->penalized = q->now + cl->penalty;
1da177e4
LT
569
570 if (cl->cpriority != cl->priority2) {
571 cl->cpriority = cl->priority2;
572 q->pmask |= (1<<cl->cpriority);
573 cl->xstats.overactions++;
574 }
575 cbq_ovl_classic(cl);
576}
577
578/* TC_CBQ_OVL_DROP: penalize class by dropping */
579
580static void cbq_ovl_drop(struct cbq_class *cl)
581{
582 if (cl->q->ops->drop)
583 if (cl->q->ops->drop(cl->q))
584 cl->qdisc->q.qlen--;
585 cl->xstats.overactions++;
586 cbq_ovl_classic(cl);
587}
588
1a13cb63
PM
589static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
590 psched_time_t now)
1da177e4
LT
591{
592 struct cbq_class *cl;
593 struct cbq_class *cl_prev = q->active[prio];
1a13cb63 594 psched_time_t sched = now;
1da177e4
LT
595
596 if (cl_prev == NULL)
e9054a33 597 return 0;
1da177e4
LT
598
599 do {
600 cl = cl_prev->next_alive;
1a13cb63 601 if (now - cl->penalized > 0) {
1da177e4
LT
602 cl_prev->next_alive = cl->next_alive;
603 cl->next_alive = NULL;
604 cl->cpriority = cl->priority;
605 cl->delayed = 0;
606 cbq_activate_class(cl);
607
608 if (cl == q->active[prio]) {
609 q->active[prio] = cl_prev;
610 if (cl == q->active[prio]) {
611 q->active[prio] = NULL;
612 return 0;
613 }
614 }
615
616 cl = cl_prev->next_alive;
1a13cb63 617 } else if (sched - cl->penalized > 0)
1da177e4
LT
618 sched = cl->penalized;
619 } while ((cl_prev = cl) != q->active[prio]);
620
1a13cb63 621 return sched - now;
1da177e4
LT
622}
623
1a13cb63 624static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
1da177e4 625{
1a13cb63
PM
626 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
627 delay_timer);
628 struct Qdisc *sch = q->watchdog.qdisc;
629 psched_time_t now;
630 psched_tdiff_t delay = 0;
1da177e4
LT
631 unsigned pmask;
632
3bebcda2 633 now = psched_get_time();
1a13cb63 634
1da177e4
LT
635 pmask = q->pmask;
636 q->pmask = 0;
637
638 while (pmask) {
639 int prio = ffz(~pmask);
1a13cb63 640 psched_tdiff_t tmp;
1da177e4
LT
641
642 pmask &= ~(1<<prio);
643
1a13cb63 644 tmp = cbq_undelay_prio(q, prio, now);
1da177e4
LT
645 if (tmp > 0) {
646 q->pmask |= 1<<prio;
647 if (tmp < delay || delay == 0)
648 delay = tmp;
649 }
650 }
651
652 if (delay) {
1a13cb63
PM
653 ktime_t time;
654
655 time = ktime_set(0, 0);
656 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
657 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
1da177e4
LT
658 }
659
660 sch->flags &= ~TCQ_F_THROTTLED;
8608db03 661 __netif_schedule(qdisc_root(sch));
1a13cb63 662 return HRTIMER_NORESTART;
1da177e4
LT
663}
664
c3bc7cff 665#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
666static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
667{
1da177e4
LT
668 struct Qdisc *sch = child->__parent;
669 struct cbq_sched_data *q = qdisc_priv(sch);
670 struct cbq_class *cl = q->rx_class;
671
672 q->rx_class = NULL;
673
674 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
378a2f09 675 int ret;
1da177e4
LT
676
677 cbq_mark_toplevel(q, cl);
678
679 q->rx_class = cl;
680 cl->q->__parent = sch;
681
378a2f09
JP
682 ret = qdisc_enqueue(skb, cl->q);
683 if (ret == NET_XMIT_SUCCESS) {
1da177e4
LT
684 sch->q.qlen++;
685 sch->bstats.packets++;
0abf77e5 686 sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4
LT
687 if (!cl->next_alive)
688 cbq_activate_class(cl);
689 return 0;
690 }
378a2f09
JP
691 if (net_xmit_drop_count(ret))
692 sch->qstats.drops++;
1da177e4
LT
693 return 0;
694 }
695
696 sch->qstats.drops++;
697 return -1;
698}
699#endif
700
10297b99 701/*
1da177e4
LT
702 It is mission critical procedure.
703
704 We "regenerate" toplevel cutoff, if transmitting class
705 has backlog and it is not regulated. It is not part of
706 original CBQ description, but looks more reasonable.
707 Probably, it is wrong. This question needs further investigation.
708*/
709
710static __inline__ void
711cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
712 struct cbq_class *borrowed)
713{
714 if (cl && q->toplevel >= borrowed->level) {
715 if (cl->q->q.qlen > 1) {
716 do {
a084980d 717 if (borrowed->undertime == PSCHED_PASTPERFECT) {
1da177e4
LT
718 q->toplevel = borrowed->level;
719 return;
720 }
721 } while ((borrowed=borrowed->borrow) != NULL);
722 }
10297b99 723#if 0
1da177e4
LT
724 /* It is not necessary now. Uncommenting it
725 will save CPU cycles, but decrease fairness.
726 */
727 q->toplevel = TC_CBQ_MAXLEVEL;
728#endif
729 }
730}
731
732static void
733cbq_update(struct cbq_sched_data *q)
734{
735 struct cbq_class *this = q->tx_class;
736 struct cbq_class *cl = this;
737 int len = q->tx_len;
738
739 q->tx_class = NULL;
740
741 for ( ; cl; cl = cl->share) {
742 long avgidle = cl->avgidle;
743 long idle;
744
745 cl->bstats.packets++;
746 cl->bstats.bytes += len;
747
748 /*
749 (now - last) is total time between packet right edges.
750 (last_pktlen/rate) is "virtual" busy time, so that
751
10297b99 752 idle = (now - last) - last_pktlen/rate
1da177e4
LT
753 */
754
8edc0c31 755 idle = q->now - cl->last;
1da177e4
LT
756 if ((unsigned long)idle > 128*1024*1024) {
757 avgidle = cl->maxidle;
758 } else {
759 idle -= L2T(cl, len);
760
761 /* true_avgidle := (1-W)*true_avgidle + W*idle,
762 where W=2^{-ewma_log}. But cl->avgidle is scaled:
763 cl->avgidle == true_avgidle/W,
764 hence:
765 */
766 avgidle += idle - (avgidle>>cl->ewma_log);
767 }
768
769 if (avgidle <= 0) {
770 /* Overlimit or at-limit */
771
772 if (avgidle < cl->minidle)
773 avgidle = cl->minidle;
774
775 cl->avgidle = avgidle;
776
777 /* Calculate expected time, when this class
778 will be allowed to send.
779 It will occur, when:
780 (1-W)*true_avgidle + W*delay = 0, i.e.
781 idle = (1/W - 1)*(-true_avgidle)
782 or
783 idle = (1 - W)*(-cl->avgidle);
784 */
785 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
786
787 /*
788 That is not all.
789 To maintain the rate allocated to the class,
790 we add to undertime virtual clock,
791 necessary to complete transmitted packet.
792 (len/phys_bandwidth has been already passed
793 to the moment of cbq_update)
794 */
795
796 idle -= L2T(&q->link, len);
797 idle += L2T(cl, len);
798
7c59e25f 799 cl->undertime = q->now + idle;
1da177e4
LT
800 } else {
801 /* Underlimit */
802
a084980d 803 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
804 if (avgidle > cl->maxidle)
805 cl->avgidle = cl->maxidle;
806 else
807 cl->avgidle = avgidle;
808 }
809 cl->last = q->now;
810 }
811
812 cbq_update_toplevel(q, this, q->tx_borrowed);
813}
814
815static __inline__ struct cbq_class *
816cbq_under_limit(struct cbq_class *cl)
817{
818 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
819 struct cbq_class *this_cl = cl;
820
821 if (cl->tparent == NULL)
822 return cl;
823
a084980d 824 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
1da177e4
LT
825 cl->delayed = 0;
826 return cl;
827 }
828
829 do {
830 /* It is very suspicious place. Now overlimit
831 action is generated for not bounded classes
832 only if link is completely congested.
833 Though it is in agree with ancestor-only paradigm,
834 it looks very stupid. Particularly,
835 it means that this chunk of code will either
836 never be called or result in strong amplification
837 of burstiness. Dangerous, silly, and, however,
838 no another solution exists.
839 */
840 if ((cl = cl->borrow) == NULL) {
841 this_cl->qstats.overlimits++;
842 this_cl->overlimit(this_cl);
843 return NULL;
844 }
845 if (cl->level > q->toplevel)
846 return NULL;
a084980d 847 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
1da177e4
LT
848
849 cl->delayed = 0;
850 return cl;
851}
852
853static __inline__ struct sk_buff *
854cbq_dequeue_prio(struct Qdisc *sch, int prio)
855{
856 struct cbq_sched_data *q = qdisc_priv(sch);
857 struct cbq_class *cl_tail, *cl_prev, *cl;
858 struct sk_buff *skb;
859 int deficit;
860
861 cl_tail = cl_prev = q->active[prio];
862 cl = cl_prev->next_alive;
863
864 do {
865 deficit = 0;
866
867 /* Start round */
868 do {
869 struct cbq_class *borrow = cl;
870
871 if (cl->q->q.qlen &&
872 (borrow = cbq_under_limit(cl)) == NULL)
873 goto skip_class;
874
875 if (cl->deficit <= 0) {
876 /* Class exhausted its allotment per
877 this round. Switch to the next one.
878 */
879 deficit = 1;
880 cl->deficit += cl->quantum;
881 goto next_class;
882 }
883
884 skb = cl->q->dequeue(cl->q);
885
886 /* Class did not give us any skb :-(
10297b99 887 It could occur even if cl->q->q.qlen != 0
1da177e4
LT
888 f.e. if cl->q == "tbf"
889 */
890 if (skb == NULL)
891 goto skip_class;
892
0abf77e5 893 cl->deficit -= qdisc_pkt_len(skb);
1da177e4
LT
894 q->tx_class = cl;
895 q->tx_borrowed = borrow;
896 if (borrow != cl) {
897#ifndef CBQ_XSTATS_BORROWS_BYTES
898 borrow->xstats.borrows++;
899 cl->xstats.borrows++;
900#else
0abf77e5
JK
901 borrow->xstats.borrows += qdisc_pkt_len(skb);
902 cl->xstats.borrows += qdisc_pkt_len(skb);
1da177e4
LT
903#endif
904 }
0abf77e5 905 q->tx_len = qdisc_pkt_len(skb);
1da177e4
LT
906
907 if (cl->deficit <= 0) {
908 q->active[prio] = cl;
909 cl = cl->next_alive;
910 cl->deficit += cl->quantum;
911 }
912 return skb;
913
914skip_class:
915 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
916 /* Class is empty or penalized.
917 Unlink it from active chain.
918 */
919 cl_prev->next_alive = cl->next_alive;
920 cl->next_alive = NULL;
921
922 /* Did cl_tail point to it? */
923 if (cl == cl_tail) {
924 /* Repair it! */
925 cl_tail = cl_prev;
926
927 /* Was it the last class in this band? */
928 if (cl == cl_tail) {
929 /* Kill the band! */
930 q->active[prio] = NULL;
931 q->activemask &= ~(1<<prio);
932 if (cl->q->q.qlen)
933 cbq_activate_class(cl);
934 return NULL;
935 }
936
937 q->active[prio] = cl_tail;
938 }
939 if (cl->q->q.qlen)
940 cbq_activate_class(cl);
941
942 cl = cl_prev;
943 }
944
945next_class:
946 cl_prev = cl;
947 cl = cl->next_alive;
948 } while (cl_prev != cl_tail);
949 } while (deficit);
950
951 q->active[prio] = cl_prev;
952
953 return NULL;
954}
955
956static __inline__ struct sk_buff *
957cbq_dequeue_1(struct Qdisc *sch)
958{
959 struct cbq_sched_data *q = qdisc_priv(sch);
960 struct sk_buff *skb;
961 unsigned activemask;
962
963 activemask = q->activemask&0xFF;
964 while (activemask) {
965 int prio = ffz(~activemask);
966 activemask &= ~(1<<prio);
967 skb = cbq_dequeue_prio(sch, prio);
968 if (skb)
969 return skb;
970 }
971 return NULL;
972}
973
974static struct sk_buff *
975cbq_dequeue(struct Qdisc *sch)
976{
977 struct sk_buff *skb;
978 struct cbq_sched_data *q = qdisc_priv(sch);
979 psched_time_t now;
980 psched_tdiff_t incr;
981
3bebcda2 982 now = psched_get_time();
8edc0c31 983 incr = now - q->now_rt;
1da177e4
LT
984
985 if (q->tx_class) {
986 psched_tdiff_t incr2;
987 /* Time integrator. We calculate EOS time
988 by adding expected packet transmission time.
989 If real time is greater, we warp artificial clock,
990 so that:
991
992 cbq_time = max(real_time, work);
993 */
994 incr2 = L2T(&q->link, q->tx_len);
7c59e25f 995 q->now += incr2;
1da177e4
LT
996 cbq_update(q);
997 if ((incr -= incr2) < 0)
998 incr = 0;
999 }
7c59e25f 1000 q->now += incr;
1da177e4
LT
1001 q->now_rt = now;
1002
1003 for (;;) {
1004 q->wd_expires = 0;
1005
1006 skb = cbq_dequeue_1(sch);
1007 if (skb) {
1008 sch->q.qlen--;
1009 sch->flags &= ~TCQ_F_THROTTLED;
1010 return skb;
1011 }
1012
1013 /* All the classes are overlimit.
1014
1015 It is possible, if:
1016
1017 1. Scheduler is empty.
1018 2. Toplevel cutoff inhibited borrowing.
1019 3. Root class is overlimit.
1020
1021 Reset 2d and 3d conditions and retry.
1022
1023 Note, that NS and cbq-2.0 are buggy, peeking
1024 an arbitrary class is appropriate for ancestor-only
1025 sharing, but not for toplevel algorithm.
1026
1027 Our version is better, but slower, because it requires
1028 two passes, but it is unavoidable with top-level sharing.
1029 */
1030
1031 if (q->toplevel == TC_CBQ_MAXLEVEL &&
a084980d 1032 q->link.undertime == PSCHED_PASTPERFECT)
1da177e4
LT
1033 break;
1034
1035 q->toplevel = TC_CBQ_MAXLEVEL;
a084980d 1036 q->link.undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1037 }
1038
1039 /* No packets in scheduler or nobody wants to give them to us :-(
1040 Sigh... start watchdog timer in the last case. */
1041
1042 if (sch->q.qlen) {
1043 sch->qstats.overlimits++;
88a99354
PM
1044 if (q->wd_expires)
1045 qdisc_watchdog_schedule(&q->watchdog,
bb239acf 1046 now + q->wd_expires);
1da177e4
LT
1047 }
1048 return NULL;
1049}
1050
1051/* CBQ class maintanance routines */
1052
1053static void cbq_adjust_levels(struct cbq_class *this)
1054{
1055 if (this == NULL)
1056 return;
1057
1058 do {
1059 int level = 0;
1060 struct cbq_class *cl;
1061
1062 if ((cl = this->children) != NULL) {
1063 do {
1064 if (cl->level > level)
1065 level = cl->level;
1066 } while ((cl = cl->sibling) != this->children);
1067 }
1068 this->level = level+1;
1069 } while ((this = this->tparent) != NULL);
1070}
1071
1072static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1073{
1074 struct cbq_class *cl;
d77fea2e
PM
1075 struct hlist_node *n;
1076 unsigned int h;
1da177e4
LT
1077
1078 if (q->quanta[prio] == 0)
1079 return;
1080
d77fea2e
PM
1081 for (h = 0; h < q->clhash.hashsize; h++) {
1082 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1083 /* BUGGGG... Beware! This expression suffer of
1084 arithmetic overflows!
1085 */
1086 if (cl->priority == prio) {
1087 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1088 q->quanta[prio];
1089 }
5ce2d488 1090 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
d77fea2e 1091 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
5ce2d488 1092 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1da177e4
LT
1093 }
1094 }
1095 }
1096}
1097
1098static void cbq_sync_defmap(struct cbq_class *cl)
1099{
1100 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1101 struct cbq_class *split = cl->split;
1102 unsigned h;
1103 int i;
1104
1105 if (split == NULL)
1106 return;
1107
1108 for (i=0; i<=TC_PRIO_MAX; i++) {
1109 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1110 split->defaults[i] = NULL;
1111 }
1112
1113 for (i=0; i<=TC_PRIO_MAX; i++) {
1114 int level = split->level;
1115
1116 if (split->defaults[i])
1117 continue;
1118
d77fea2e
PM
1119 for (h = 0; h < q->clhash.hashsize; h++) {
1120 struct hlist_node *n;
1da177e4
LT
1121 struct cbq_class *c;
1122
d77fea2e
PM
1123 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1124 common.hnode) {
1da177e4
LT
1125 if (c->split == split && c->level < level &&
1126 c->defmap&(1<<i)) {
1127 split->defaults[i] = c;
1128 level = c->level;
1129 }
1130 }
1131 }
1132 }
1133}
1134
1135static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1136{
1137 struct cbq_class *split = NULL;
1138
1139 if (splitid == 0) {
1140 if ((split = cl->split) == NULL)
1141 return;
d77fea2e 1142 splitid = split->common.classid;
1da177e4
LT
1143 }
1144
d77fea2e 1145 if (split == NULL || split->common.classid != splitid) {
1da177e4 1146 for (split = cl->tparent; split; split = split->tparent)
d77fea2e 1147 if (split->common.classid == splitid)
1da177e4
LT
1148 break;
1149 }
1150
1151 if (split == NULL)
1152 return;
1153
1154 if (cl->split != split) {
1155 cl->defmap = 0;
1156 cbq_sync_defmap(cl);
1157 cl->split = split;
1158 cl->defmap = def&mask;
1159 } else
1160 cl->defmap = (cl->defmap&~mask)|(def&mask);
1161
1162 cbq_sync_defmap(cl);
1163}
1164
1165static void cbq_unlink_class(struct cbq_class *this)
1166{
1167 struct cbq_class *cl, **clp;
1168 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1169
d77fea2e 1170 qdisc_class_hash_remove(&q->clhash, &this->common);
1da177e4
LT
1171
1172 if (this->tparent) {
1173 clp=&this->sibling;
1174 cl = *clp;
1175 do {
1176 if (cl == this) {
1177 *clp = cl->sibling;
1178 break;
1179 }
1180 clp = &cl->sibling;
1181 } while ((cl = *clp) != this->sibling);
1182
1183 if (this->tparent->children == this) {
1184 this->tparent->children = this->sibling;
1185 if (this->sibling == this)
1186 this->tparent->children = NULL;
1187 }
1188 } else {
547b792c 1189 WARN_ON(this->sibling != this);
1da177e4
LT
1190 }
1191}
1192
1193static void cbq_link_class(struct cbq_class *this)
1194{
1195 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1da177e4
LT
1196 struct cbq_class *parent = this->tparent;
1197
1198 this->sibling = this;
d77fea2e 1199 qdisc_class_hash_insert(&q->clhash, &this->common);
1da177e4
LT
1200
1201 if (parent == NULL)
1202 return;
1203
1204 if (parent->children == NULL) {
1205 parent->children = this;
1206 } else {
1207 this->sibling = parent->children->sibling;
1208 parent->children->sibling = this;
1209 }
1210}
1211
1212static unsigned int cbq_drop(struct Qdisc* sch)
1213{
1214 struct cbq_sched_data *q = qdisc_priv(sch);
1215 struct cbq_class *cl, *cl_head;
1216 int prio;
1217 unsigned int len;
1218
1219 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1220 if ((cl_head = q->active[prio]) == NULL)
1221 continue;
1222
1223 cl = cl_head;
1224 do {
1225 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1226 sch->q.qlen--;
a37ef2e3
JP
1227 if (!cl->q->q.qlen)
1228 cbq_deactivate_class(cl);
1da177e4
LT
1229 return len;
1230 }
1231 } while ((cl = cl->next_alive) != cl_head);
1232 }
1233 return 0;
1234}
1235
1236static void
1237cbq_reset(struct Qdisc* sch)
1238{
1239 struct cbq_sched_data *q = qdisc_priv(sch);
1240 struct cbq_class *cl;
d77fea2e 1241 struct hlist_node *n;
1da177e4
LT
1242 int prio;
1243 unsigned h;
1244
1245 q->activemask = 0;
1246 q->pmask = 0;
1247 q->tx_class = NULL;
1248 q->tx_borrowed = NULL;
88a99354 1249 qdisc_watchdog_cancel(&q->watchdog);
1a13cb63 1250 hrtimer_cancel(&q->delay_timer);
1da177e4 1251 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1252 q->now = psched_get_time();
1da177e4
LT
1253 q->now_rt = q->now;
1254
1255 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1256 q->active[prio] = NULL;
1257
d77fea2e
PM
1258 for (h = 0; h < q->clhash.hashsize; h++) {
1259 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1260 qdisc_reset(cl->q);
1261
1262 cl->next_alive = NULL;
a084980d 1263 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1264 cl->avgidle = cl->maxidle;
1265 cl->deficit = cl->quantum;
1266 cl->cpriority = cl->priority;
1267 }
1268 }
1269 sch->q.qlen = 0;
1270}
1271
1272
1273static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1274{
1275 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1276 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1277 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1278 }
1279 if (lss->change&TCF_CBQ_LSS_EWMA)
1280 cl->ewma_log = lss->ewma_log;
1281 if (lss->change&TCF_CBQ_LSS_AVPKT)
1282 cl->avpkt = lss->avpkt;
1283 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1284 cl->minidle = -(long)lss->minidle;
1285 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1286 cl->maxidle = lss->maxidle;
1287 cl->avgidle = lss->maxidle;
1288 }
1289 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1290 cl->offtime = lss->offtime;
1291 return 0;
1292}
1293
1294static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1295{
1296 q->nclasses[cl->priority]--;
1297 q->quanta[cl->priority] -= cl->weight;
1298 cbq_normalize_quanta(q, cl->priority);
1299}
1300
1301static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1302{
1303 q->nclasses[cl->priority]++;
1304 q->quanta[cl->priority] += cl->weight;
1305 cbq_normalize_quanta(q, cl->priority);
1306}
1307
1308static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1309{
1310 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1311
1312 if (wrr->allot)
1313 cl->allot = wrr->allot;
1314 if (wrr->weight)
1315 cl->weight = wrr->weight;
1316 if (wrr->priority) {
1317 cl->priority = wrr->priority-1;
1318 cl->cpriority = cl->priority;
1319 if (cl->priority >= cl->priority2)
1320 cl->priority2 = TC_CBQ_MAXPRIO-1;
1321 }
1322
1323 cbq_addprio(q, cl);
1324 return 0;
1325}
1326
1327static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1328{
1329 switch (ovl->strategy) {
1330 case TC_CBQ_OVL_CLASSIC:
1331 cl->overlimit = cbq_ovl_classic;
1332 break;
1333 case TC_CBQ_OVL_DELAY:
1334 cl->overlimit = cbq_ovl_delay;
1335 break;
1336 case TC_CBQ_OVL_LOWPRIO:
1337 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1338 ovl->priority2-1 <= cl->priority)
1339 return -EINVAL;
1340 cl->priority2 = ovl->priority2-1;
1341 cl->overlimit = cbq_ovl_lowprio;
1342 break;
1343 case TC_CBQ_OVL_DROP:
1344 cl->overlimit = cbq_ovl_drop;
1345 break;
1346 case TC_CBQ_OVL_RCLASSIC:
1347 cl->overlimit = cbq_ovl_rclassic;
1348 break;
1349 default:
1350 return -EINVAL;
1351 }
1a13cb63 1352 cl->penalty = ovl->penalty;
1da177e4
LT
1353 return 0;
1354}
1355
c3bc7cff 1356#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1357static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1358{
1359 cl->police = p->police;
1360
1361 if (cl->q->handle) {
1362 if (p->police == TC_POLICE_RECLASSIFY)
1363 cl->q->reshape_fail = cbq_reshape_fail;
1364 else
1365 cl->q->reshape_fail = NULL;
1366 }
1367 return 0;
1368}
1369#endif
1370
1371static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1372{
1373 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1374 return 0;
1375}
1376
27a3421e
PM
1377static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1378 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1379 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1380 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1381 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1382 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1383 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1384 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1385};
1386
1e90474c 1387static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1388{
1389 struct cbq_sched_data *q = qdisc_priv(sch);
1e90474c 1390 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4 1391 struct tc_ratespec *r;
cee63723
PM
1392 int err;
1393
27a3421e 1394 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1395 if (err < 0)
1396 return err;
1da177e4 1397
27a3421e 1398 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1da177e4
LT
1399 return -EINVAL;
1400
1e90474c 1401 r = nla_data(tb[TCA_CBQ_RATE]);
1da177e4 1402
1e90474c 1403 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1da177e4
LT
1404 return -EINVAL;
1405
d77fea2e
PM
1406 err = qdisc_class_hash_init(&q->clhash);
1407 if (err < 0)
1408 goto put_rtab;
1409
1da177e4
LT
1410 q->link.refcnt = 1;
1411 q->link.sibling = &q->link;
d77fea2e 1412 q->link.common.classid = sch->handle;
1da177e4 1413 q->link.qdisc = sch;
5ce2d488 1414 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1415 &pfifo_qdisc_ops,
9f9afec4 1416 sch->handle)))
1da177e4
LT
1417 q->link.q = &noop_qdisc;
1418
1419 q->link.priority = TC_CBQ_MAXPRIO-1;
1420 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1421 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1422 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1423 q->link.overlimit = cbq_ovl_classic;
5ce2d488 1424 q->link.allot = psched_mtu(qdisc_dev(sch));
1da177e4
LT
1425 q->link.quantum = q->link.allot;
1426 q->link.weight = q->link.R_tab->rate.rate;
1427
1428 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1429 q->link.avpkt = q->link.allot/2;
1430 q->link.minidle = -0x7FFFFFFF;
1da177e4 1431
88a99354 1432 qdisc_watchdog_init(&q->watchdog, sch);
1a13cb63 1433 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1da177e4
LT
1434 q->delay_timer.function = cbq_undelay;
1435 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1436 q->now = psched_get_time();
1da177e4
LT
1437 q->now_rt = q->now;
1438
1439 cbq_link_class(&q->link);
1440
1e90474c
PM
1441 if (tb[TCA_CBQ_LSSOPT])
1442 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4
LT
1443
1444 cbq_addprio(q, &q->link);
1445 return 0;
d77fea2e
PM
1446
1447put_rtab:
1448 qdisc_put_rtab(q->link.R_tab);
1449 return err;
1da177e4
LT
1450}
1451
1452static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1453{
27a884dc 1454 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1455
1e90474c 1456 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1da177e4
LT
1457 return skb->len;
1458
1e90474c 1459nla_put_failure:
dc5fc579 1460 nlmsg_trim(skb, b);
1da177e4
LT
1461 return -1;
1462}
1463
1464static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1465{
27a884dc 1466 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1467 struct tc_cbq_lssopt opt;
1468
1469 opt.flags = 0;
1470 if (cl->borrow == NULL)
1471 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1472 if (cl->share == NULL)
1473 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1474 opt.ewma_log = cl->ewma_log;
1475 opt.level = cl->level;
1476 opt.avpkt = cl->avpkt;
1477 opt.maxidle = cl->maxidle;
1478 opt.minidle = (u32)(-cl->minidle);
1479 opt.offtime = cl->offtime;
1480 opt.change = ~0;
1e90474c 1481 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1da177e4
LT
1482 return skb->len;
1483
1e90474c 1484nla_put_failure:
dc5fc579 1485 nlmsg_trim(skb, b);
1da177e4
LT
1486 return -1;
1487}
1488
1489static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1490{
27a884dc 1491 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1492 struct tc_cbq_wrropt opt;
1493
1494 opt.flags = 0;
1495 opt.allot = cl->allot;
1496 opt.priority = cl->priority+1;
1497 opt.cpriority = cl->cpriority+1;
1498 opt.weight = cl->weight;
1e90474c 1499 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1da177e4
LT
1500 return skb->len;
1501
1e90474c 1502nla_put_failure:
dc5fc579 1503 nlmsg_trim(skb, b);
1da177e4
LT
1504 return -1;
1505}
1506
1507static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1508{
27a884dc 1509 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1510 struct tc_cbq_ovl opt;
1511
1512 opt.strategy = cl->ovl_strategy;
1513 opt.priority2 = cl->priority2+1;
8a47077a 1514 opt.pad = 0;
1a13cb63 1515 opt.penalty = cl->penalty;
1e90474c 1516 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1da177e4
LT
1517 return skb->len;
1518
1e90474c 1519nla_put_failure:
dc5fc579 1520 nlmsg_trim(skb, b);
1da177e4
LT
1521 return -1;
1522}
1523
1524static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1525{
27a884dc 1526 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1527 struct tc_cbq_fopt opt;
1528
1529 if (cl->split || cl->defmap) {
d77fea2e 1530 opt.split = cl->split ? cl->split->common.classid : 0;
1da177e4
LT
1531 opt.defmap = cl->defmap;
1532 opt.defchange = ~0;
1e90474c 1533 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1da177e4
LT
1534 }
1535 return skb->len;
1536
1e90474c 1537nla_put_failure:
dc5fc579 1538 nlmsg_trim(skb, b);
1da177e4
LT
1539 return -1;
1540}
1541
c3bc7cff 1542#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1543static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1544{
27a884dc 1545 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1546 struct tc_cbq_police opt;
1547
1548 if (cl->police) {
1549 opt.police = cl->police;
9ef1d4c7
PM
1550 opt.__res1 = 0;
1551 opt.__res2 = 0;
1e90474c 1552 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1da177e4
LT
1553 }
1554 return skb->len;
1555
1e90474c 1556nla_put_failure:
dc5fc579 1557 nlmsg_trim(skb, b);
1da177e4
LT
1558 return -1;
1559}
1560#endif
1561
1562static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1563{
1564 if (cbq_dump_lss(skb, cl) < 0 ||
1565 cbq_dump_rate(skb, cl) < 0 ||
1566 cbq_dump_wrr(skb, cl) < 0 ||
1567 cbq_dump_ovl(skb, cl) < 0 ||
c3bc7cff 1568#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1569 cbq_dump_police(skb, cl) < 0 ||
1570#endif
1571 cbq_dump_fopt(skb, cl) < 0)
1572 return -1;
1573 return 0;
1574}
1575
1576static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1577{
1578 struct cbq_sched_data *q = qdisc_priv(sch);
4b3550ef 1579 struct nlattr *nest;
1da177e4 1580
4b3550ef
PM
1581 nest = nla_nest_start(skb, TCA_OPTIONS);
1582 if (nest == NULL)
1583 goto nla_put_failure;
1da177e4 1584 if (cbq_dump_attr(skb, &q->link) < 0)
1e90474c 1585 goto nla_put_failure;
4b3550ef 1586 nla_nest_end(skb, nest);
1da177e4
LT
1587 return skb->len;
1588
1e90474c 1589nla_put_failure:
4b3550ef 1590 nla_nest_cancel(skb, nest);
1da177e4
LT
1591 return -1;
1592}
1593
1594static int
1595cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1596{
1597 struct cbq_sched_data *q = qdisc_priv(sch);
1598
1599 q->link.xstats.avgidle = q->link.avgidle;
1600 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1601}
1602
1603static int
1604cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1605 struct sk_buff *skb, struct tcmsg *tcm)
1606{
1607 struct cbq_class *cl = (struct cbq_class*)arg;
4b3550ef 1608 struct nlattr *nest;
1da177e4
LT
1609
1610 if (cl->tparent)
d77fea2e 1611 tcm->tcm_parent = cl->tparent->common.classid;
1da177e4
LT
1612 else
1613 tcm->tcm_parent = TC_H_ROOT;
d77fea2e 1614 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1615 tcm->tcm_info = cl->q->handle;
1616
4b3550ef
PM
1617 nest = nla_nest_start(skb, TCA_OPTIONS);
1618 if (nest == NULL)
1619 goto nla_put_failure;
1da177e4 1620 if (cbq_dump_attr(skb, cl) < 0)
1e90474c 1621 goto nla_put_failure;
4b3550ef 1622 nla_nest_end(skb, nest);
1da177e4
LT
1623 return skb->len;
1624
1e90474c 1625nla_put_failure:
4b3550ef 1626 nla_nest_cancel(skb, nest);
1da177e4
LT
1627 return -1;
1628}
1629
1630static int
1631cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1632 struct gnet_dump *d)
1633{
1634 struct cbq_sched_data *q = qdisc_priv(sch);
1635 struct cbq_class *cl = (struct cbq_class*)arg;
1636
1637 cl->qstats.qlen = cl->q->q.qlen;
1638 cl->xstats.avgidle = cl->avgidle;
1639 cl->xstats.undertime = 0;
1640
a084980d 1641 if (cl->undertime != PSCHED_PASTPERFECT)
8edc0c31 1642 cl->xstats.undertime = cl->undertime - q->now;
1da177e4
LT
1643
1644 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1da177e4 1645 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1da177e4
LT
1646 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1647 return -1;
1648
1649 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1650}
1651
1652static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1653 struct Qdisc **old)
1654{
1655 struct cbq_class *cl = (struct cbq_class*)arg;
1656
1657 if (cl) {
1658 if (new == NULL) {
5ce2d488 1659 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1660 &pfifo_qdisc_ops,
d77fea2e
PM
1661 cl->common.classid);
1662 if (new == NULL)
1da177e4
LT
1663 return -ENOBUFS;
1664 } else {
c3bc7cff 1665#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1666 if (cl->police == TC_POLICE_RECLASSIFY)
1667 new->reshape_fail = cbq_reshape_fail;
1668#endif
1669 }
1670 sch_tree_lock(sch);
a37ef2e3 1671 *old = xchg(&cl->q, new);
5e50da01 1672 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1673 qdisc_reset(*old);
1674 sch_tree_unlock(sch);
1675
1676 return 0;
1677 }
1678 return -ENOENT;
1679}
1680
1681static struct Qdisc *
1682cbq_leaf(struct Qdisc *sch, unsigned long arg)
1683{
1684 struct cbq_class *cl = (struct cbq_class*)arg;
1685
1686 return cl ? cl->q : NULL;
1687}
1688
a37ef2e3
JP
1689static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1690{
1691 struct cbq_class *cl = (struct cbq_class *)arg;
1692
1693 if (cl->q->q.qlen == 0)
1694 cbq_deactivate_class(cl);
1695}
1696
1da177e4
LT
1697static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1698{
1699 struct cbq_sched_data *q = qdisc_priv(sch);
1700 struct cbq_class *cl = cbq_class_lookup(q, classid);
1701
1702 if (cl) {
1703 cl->refcnt++;
1704 return (unsigned long)cl;
1705 }
1706 return 0;
1707}
1708
1da177e4
LT
1709static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1710{
1711 struct cbq_sched_data *q = qdisc_priv(sch);
1712
547b792c 1713 WARN_ON(cl->filters);
1da177e4 1714
ff31ab56 1715 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1716 qdisc_destroy(cl->q);
1717 qdisc_put_rtab(cl->R_tab);
1da177e4 1718 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1719 if (cl != &q->link)
1720 kfree(cl);
1721}
1722
1723static void
1724cbq_destroy(struct Qdisc* sch)
1725{
1726 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e 1727 struct hlist_node *n, *next;
1da177e4
LT
1728 struct cbq_class *cl;
1729 unsigned h;
1730
c3bc7cff 1731#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1732 q->rx_class = NULL;
1733#endif
1734 /*
1735 * Filters must be destroyed first because we don't destroy the
1736 * classes from root to leafs which means that filters can still
1737 * be bound to classes which have been destroyed already. --TGR '04
1738 */
d77fea2e
PM
1739 for (h = 0; h < q->clhash.hashsize; h++) {
1740 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
ff31ab56 1741 tcf_destroy_chain(&cl->filter_list);
b00b4bf9 1742 }
d77fea2e
PM
1743 for (h = 0; h < q->clhash.hashsize; h++) {
1744 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1745 common.hnode)
1da177e4 1746 cbq_destroy_class(sch, cl);
1da177e4 1747 }
d77fea2e 1748 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1749}
1750
1751static void cbq_put(struct Qdisc *sch, unsigned long arg)
1752{
1753 struct cbq_class *cl = (struct cbq_class*)arg;
1754
1755 if (--cl->refcnt == 0) {
c3bc7cff 1756#ifdef CONFIG_NET_CLS_ACT
7698b4fc 1757 spinlock_t *root_lock = qdisc_root_lock(sch);
1da177e4
LT
1758 struct cbq_sched_data *q = qdisc_priv(sch);
1759
7698b4fc 1760 spin_lock_bh(root_lock);
1da177e4
LT
1761 if (q->rx_class == cl)
1762 q->rx_class = NULL;
7698b4fc 1763 spin_unlock_bh(root_lock);
1da177e4
LT
1764#endif
1765
1766 cbq_destroy_class(sch, cl);
1767 }
1768}
1769
1770static int
1e90474c 1771cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1da177e4
LT
1772 unsigned long *arg)
1773{
1774 int err;
1775 struct cbq_sched_data *q = qdisc_priv(sch);
1776 struct cbq_class *cl = (struct cbq_class*)*arg;
1e90474c
PM
1777 struct nlattr *opt = tca[TCA_OPTIONS];
1778 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4
LT
1779 struct cbq_class *parent;
1780 struct qdisc_rate_table *rtab = NULL;
1781
cee63723 1782 if (opt == NULL)
1da177e4
LT
1783 return -EINVAL;
1784
27a3421e 1785 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1786 if (err < 0)
1787 return err;
1788
1da177e4
LT
1789 if (cl) {
1790 /* Check parent */
1791 if (parentid) {
d77fea2e
PM
1792 if (cl->tparent &&
1793 cl->tparent->common.classid != parentid)
1da177e4
LT
1794 return -EINVAL;
1795 if (!cl->tparent && parentid != TC_H_ROOT)
1796 return -EINVAL;
1797 }
1798
1e90474c
PM
1799 if (tb[TCA_CBQ_RATE]) {
1800 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1801 if (rtab == NULL)
1802 return -EINVAL;
1803 }
1804
1805 /* Change class parameters */
1806 sch_tree_lock(sch);
1807
1808 if (cl->next_alive != NULL)
1809 cbq_deactivate_class(cl);
1810
1811 if (rtab) {
1812 rtab = xchg(&cl->R_tab, rtab);
1813 qdisc_put_rtab(rtab);
1814 }
1815
1e90474c
PM
1816 if (tb[TCA_CBQ_LSSOPT])
1817 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4 1818
1e90474c 1819 if (tb[TCA_CBQ_WRROPT]) {
1da177e4 1820 cbq_rmprio(q, cl);
1e90474c 1821 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1822 }
1823
1e90474c
PM
1824 if (tb[TCA_CBQ_OVL_STRATEGY])
1825 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1da177e4 1826
c3bc7cff 1827#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1828 if (tb[TCA_CBQ_POLICE])
1829 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4
LT
1830#endif
1831
1e90474c
PM
1832 if (tb[TCA_CBQ_FOPT])
1833 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1834
1835 if (cl->q->q.qlen)
1836 cbq_activate_class(cl);
1837
1838 sch_tree_unlock(sch);
1839
1e90474c 1840 if (tca[TCA_RATE])
1da177e4 1841 gen_replace_estimator(&cl->bstats, &cl->rate_est,
f6f9b93f 1842 qdisc_root_sleeping_lock(sch),
1e90474c 1843 tca[TCA_RATE]);
1da177e4
LT
1844 return 0;
1845 }
1846
1847 if (parentid == TC_H_ROOT)
1848 return -EINVAL;
1849
1e90474c
PM
1850 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1851 tb[TCA_CBQ_LSSOPT] == NULL)
1da177e4
LT
1852 return -EINVAL;
1853
1e90474c 1854 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1855 if (rtab == NULL)
1856 return -EINVAL;
1857
1858 if (classid) {
1859 err = -EINVAL;
1860 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1861 goto failure;
1862 } else {
1863 int i;
1864 classid = TC_H_MAKE(sch->handle,0x8000);
1865
1866 for (i=0; i<0x8000; i++) {
1867 if (++q->hgenerator >= 0x8000)
1868 q->hgenerator = 1;
1869 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1870 break;
1871 }
1872 err = -ENOSR;
1873 if (i >= 0x8000)
1874 goto failure;
1875 classid = classid|q->hgenerator;
1876 }
1877
1878 parent = &q->link;
1879 if (parentid) {
1880 parent = cbq_class_lookup(q, parentid);
1881 err = -EINVAL;
1882 if (parent == NULL)
1883 goto failure;
1884 }
1885
1886 err = -ENOBUFS;
0da974f4 1887 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1da177e4
LT
1888 if (cl == NULL)
1889 goto failure;
1da177e4
LT
1890 cl->R_tab = rtab;
1891 rtab = NULL;
1892 cl->refcnt = 1;
5ce2d488 1893 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1894 &pfifo_qdisc_ops, classid)))
1da177e4 1895 cl->q = &noop_qdisc;
d77fea2e 1896 cl->common.classid = classid;
1da177e4
LT
1897 cl->tparent = parent;
1898 cl->qdisc = sch;
1899 cl->allot = parent->allot;
1900 cl->quantum = cl->allot;
1901 cl->weight = cl->R_tab->rate.rate;
1da177e4
LT
1902
1903 sch_tree_lock(sch);
1904 cbq_link_class(cl);
1905 cl->borrow = cl->tparent;
1906 if (cl->tparent != &q->link)
1907 cl->share = cl->tparent;
1908 cbq_adjust_levels(parent);
1909 cl->minidle = -0x7FFFFFFF;
1e90474c
PM
1910 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1911 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1912 if (cl->ewma_log==0)
1913 cl->ewma_log = q->link.ewma_log;
1914 if (cl->maxidle==0)
1915 cl->maxidle = q->link.maxidle;
1916 if (cl->avpkt==0)
1917 cl->avpkt = q->link.avpkt;
1918 cl->overlimit = cbq_ovl_classic;
1e90474c
PM
1919 if (tb[TCA_CBQ_OVL_STRATEGY])
1920 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
c3bc7cff 1921#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1922 if (tb[TCA_CBQ_POLICE])
1923 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4 1924#endif
1e90474c
PM
1925 if (tb[TCA_CBQ_FOPT])
1926 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1927 sch_tree_unlock(sch);
1928
d77fea2e
PM
1929 qdisc_class_hash_grow(sch, &q->clhash);
1930
1e90474c 1931 if (tca[TCA_RATE])
1da177e4 1932 gen_new_estimator(&cl->bstats, &cl->rate_est,
f6f9b93f 1933 qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
1da177e4
LT
1934
1935 *arg = (unsigned long)cl;
1936 return 0;
1937
1938failure:
1939 qdisc_put_rtab(rtab);
1940 return err;
1941}
1942
1943static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1944{
1945 struct cbq_sched_data *q = qdisc_priv(sch);
1946 struct cbq_class *cl = (struct cbq_class*)arg;
a37ef2e3 1947 unsigned int qlen;
1da177e4
LT
1948
1949 if (cl->filters || cl->children || cl == &q->link)
1950 return -EBUSY;
1951
1952 sch_tree_lock(sch);
1953
a37ef2e3
JP
1954 qlen = cl->q->q.qlen;
1955 qdisc_reset(cl->q);
1956 qdisc_tree_decrease_qlen(cl->q, qlen);
1957
1da177e4
LT
1958 if (cl->next_alive)
1959 cbq_deactivate_class(cl);
1960
1961 if (q->tx_borrowed == cl)
1962 q->tx_borrowed = q->tx_class;
1963 if (q->tx_class == cl) {
1964 q->tx_class = NULL;
1965 q->tx_borrowed = NULL;
1966 }
c3bc7cff 1967#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1968 if (q->rx_class == cl)
1969 q->rx_class = NULL;
1970#endif
1971
1972 cbq_unlink_class(cl);
1973 cbq_adjust_levels(cl->tparent);
1974 cl->defmap = 0;
1975 cbq_sync_defmap(cl);
1976
1977 cbq_rmprio(q, cl);
1978 sch_tree_unlock(sch);
1979
1980 if (--cl->refcnt == 0)
1981 cbq_destroy_class(sch, cl);
1982
1983 return 0;
1984}
1985
1986static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1987{
1988 struct cbq_sched_data *q = qdisc_priv(sch);
1989 struct cbq_class *cl = (struct cbq_class *)arg;
1990
1991 if (cl == NULL)
1992 cl = &q->link;
1993
1994 return &cl->filter_list;
1995}
1996
1997static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1998 u32 classid)
1999{
2000 struct cbq_sched_data *q = qdisc_priv(sch);
2001 struct cbq_class *p = (struct cbq_class*)parent;
2002 struct cbq_class *cl = cbq_class_lookup(q, classid);
2003
2004 if (cl) {
2005 if (p && p->level <= cl->level)
2006 return 0;
2007 cl->filters++;
2008 return (unsigned long)cl;
2009 }
2010 return 0;
2011}
2012
2013static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2014{
2015 struct cbq_class *cl = (struct cbq_class*)arg;
2016
2017 cl->filters--;
2018}
2019
2020static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2021{
2022 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e
PM
2023 struct cbq_class *cl;
2024 struct hlist_node *n;
1da177e4
LT
2025 unsigned h;
2026
2027 if (arg->stop)
2028 return;
2029
d77fea2e
PM
2030 for (h = 0; h < q->clhash.hashsize; h++) {
2031 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
2032 if (arg->count < arg->skip) {
2033 arg->count++;
2034 continue;
2035 }
2036 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2037 arg->stop = 1;
2038 return;
2039 }
2040 arg->count++;
2041 }
2042 }
2043}
2044
20fea08b 2045static const struct Qdisc_class_ops cbq_class_ops = {
1da177e4
LT
2046 .graft = cbq_graft,
2047 .leaf = cbq_leaf,
a37ef2e3 2048 .qlen_notify = cbq_qlen_notify,
1da177e4
LT
2049 .get = cbq_get,
2050 .put = cbq_put,
2051 .change = cbq_change_class,
2052 .delete = cbq_delete,
2053 .walk = cbq_walk,
2054 .tcf_chain = cbq_find_tcf,
2055 .bind_tcf = cbq_bind_filter,
2056 .unbind_tcf = cbq_unbind_filter,
2057 .dump = cbq_dump_class,
2058 .dump_stats = cbq_dump_class_stats,
2059};
2060
20fea08b 2061static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
1da177e4
LT
2062 .next = NULL,
2063 .cl_ops = &cbq_class_ops,
2064 .id = "cbq",
2065 .priv_size = sizeof(struct cbq_sched_data),
2066 .enqueue = cbq_enqueue,
2067 .dequeue = cbq_dequeue,
2068 .requeue = cbq_requeue,
2069 .drop = cbq_drop,
2070 .init = cbq_init,
2071 .reset = cbq_reset,
2072 .destroy = cbq_destroy,
2073 .change = NULL,
2074 .dump = cbq_dump,
2075 .dump_stats = cbq_dump_stats,
2076 .owner = THIS_MODULE,
2077};
2078
2079static int __init cbq_module_init(void)
2080{
2081 return register_qdisc(&cbq_qdisc_ops);
2082}
10297b99 2083static void __exit cbq_module_exit(void)
1da177e4
LT
2084{
2085 unregister_qdisc(&cbq_qdisc_ops);
2086}
2087module_init(cbq_module_init)
2088module_exit(cbq_module_exit)
2089MODULE_LICENSE("GPL");