]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/sched/sch_htb.c
net: convert BUG_TRAP to generic WARN_ON
[net-next-2.6.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
dc5fc579 38#include <net/netlink.h>
1da177e4 39#include <net/pkt_sched.h>
1da177e4
LT
40
41/* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 45 it allows to assign priority to each class in hierarchy.
1da177e4
LT
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
10297b99 49 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52*/
53
47083fc0 54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 55#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
47083fc0
JDB
61/* Module parameter and sysfs export */
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
1da177e4
LT
65/* used internaly to keep status of single class */
66enum htb_cmode {
87990467
SH
67 HTB_CANT_SEND, /* class can't send and can't borrow */
68 HTB_MAY_BORROW, /* class can't send but may borrow */
69 HTB_CAN_SEND /* class can send */
1da177e4
LT
70};
71
72/* interior & leaf nodes; props specific to leaves are marked L: */
87990467 73struct htb_class {
f4c1f3e0 74 struct Qdisc_class_common common;
87990467 75 /* general class parameters */
87990467
SH
76 struct gnet_stats_basic bstats;
77 struct gnet_stats_queue qstats;
78 struct gnet_stats_rate_est rate_est;
79 struct tc_htb_xstats xstats; /* our special stats */
80 int refcnt; /* usage count of this class */
1da177e4 81
87990467
SH
82 /* topology */
83 int level; /* our level (see above) */
42077599 84 unsigned int children;
87990467 85 struct htb_class *parent; /* parent class */
87990467
SH
86
87 union {
88 struct htb_class_leaf {
89 struct Qdisc *q;
90 int prio;
91 int aprio;
92 int quantum;
93 int deficit[TC_HTB_MAXDEPTH];
94 struct list_head drop_list;
95 } leaf;
96 struct htb_class_inner {
97 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
98 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
99 /* When class changes from state 1->2 and disconnects from
100 parent's feed then we lost ptr value and start from the
101 first child again. Here we store classid of the
102 last valid ptr (used when ptr is NULL). */
103 u32 last_ptr_id[TC_HTB_NUMPRIO];
104 } inner;
105 } un;
106 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
107 struct rb_node pq_node; /* node for event queue */
fb983d45 108 psched_time_t pq_key;
87990467
SH
109
110 int prio_activity; /* for which prios are we active */
111 enum htb_cmode cmode; /* current mode of the class */
112
113 /* class attached filters */
114 struct tcf_proto *filter_list;
115 int filter_cnt;
116
117 int warned; /* only one warning about non work conserving .. */
118
119 /* token bucket parameters */
120 struct qdisc_rate_table *rate; /* rate table of the class itself */
121 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
122 long buffer, cbuffer; /* token bucket depth/rate */
123 psched_tdiff_t mbuffer; /* max wait time */
124 long tokens, ctokens; /* current number of tokens */
125 psched_time_t t_c; /* checkpoint time */
160d5e10
JP
126
127 int prio; /* For parent to leaf return possible here */
128 int quantum; /* we do backup. Finally full replacement */
129 /* of un.leaf originals should be done. */
1da177e4
LT
130};
131
87990467
SH
132static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
133 int size)
134{
e9bef55d
JDB
135 long result = qdisc_l2t(rate, size);
136 return result;
1da177e4
LT
137}
138
87990467 139struct htb_sched {
f4c1f3e0 140 struct Qdisc_class_hash clhash;
0cef296d 141 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
142
143 /* self list - roots of self generating tree */
144 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
145 int row_mask[TC_HTB_MAXDEPTH];
146 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
147 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 148
87990467
SH
149 /* self wait list - roots of wait PQs per row */
150 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 151
87990467 152 /* time of nearest event per level (row) */
fb983d45 153 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 154
87990467
SH
155 /* whether we hit non-work conserving class during this dequeue; we use */
156 int nwc_hit; /* this to disable mindelay complaint in dequeue */
1da177e4 157
87990467 158 int defcls; /* class where unclassified flows go to */
1da177e4 159
87990467
SH
160 /* filters for qdisc itself */
161 struct tcf_proto *filter_list;
1da177e4 162
87990467
SH
163 int rate2quantum; /* quant = rate / rate2quantum */
164 psched_time_t now; /* cached dequeue time */
fb983d45 165 struct qdisc_watchdog watchdog;
1da177e4 166
87990467
SH
167 /* non shaped skbs; let them go directly thru */
168 struct sk_buff_head direct_queue;
169 int direct_qlen; /* max qlen of above */
170
171 long direct_pkts;
1da177e4
LT
172};
173
1da177e4 174/* find class in global hash table using given handle */
87990467 175static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
176{
177 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 178 struct Qdisc_class_common *clc;
0cef296d 179
f4c1f3e0
PM
180 clc = qdisc_class_find(&q->clhash, handle);
181 if (clc == NULL)
1da177e4 182 return NULL;
f4c1f3e0 183 return container_of(clc, struct htb_class, common);
1da177e4
LT
184}
185
186/**
187 * htb_classify - classify a packet into class
188 *
189 * It returns NULL if the packet should be dropped or -1 if the packet
190 * should be passed directly thru. In all other cases leaf class is returned.
191 * We allow direct class selection by classid in priority. The we examine
192 * filters in qdisc and in inner nodes (if higher filter points to the inner
193 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 194 * internal fifo (direct). These packets then go directly thru. If we still
1da177e4
LT
195 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
196 * then finish and return direct queue.
197 */
198#define HTB_DIRECT (struct htb_class*)-1
1da177e4 199
87990467
SH
200static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
201 int *qerr)
1da177e4
LT
202{
203 struct htb_sched *q = qdisc_priv(sch);
204 struct htb_class *cl;
205 struct tcf_result res;
206 struct tcf_proto *tcf;
207 int result;
208
209 /* allow to select class by setting skb->priority to valid classid;
210 note that nfmark can be used too by attaching filter fw with no
211 rules in it */
212 if (skb->priority == sch->handle)
87990467
SH
213 return HTB_DIRECT; /* X:0 (direct flow) selected */
214 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
1da177e4
LT
215 return cl;
216
29f1df6c 217 *qerr = NET_XMIT_BYPASS;
1da177e4
LT
218 tcf = q->filter_list;
219 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
220#ifdef CONFIG_NET_CLS_ACT
221 switch (result) {
222 case TC_ACT_QUEUED:
87990467 223 case TC_ACT_STOLEN:
1da177e4
LT
224 *qerr = NET_XMIT_SUCCESS;
225 case TC_ACT_SHOT:
226 return NULL;
227 }
1da177e4 228#endif
87990467 229 if ((cl = (void *)res.class) == NULL) {
1da177e4 230 if (res.classid == sch->handle)
87990467
SH
231 return HTB_DIRECT; /* X:0 (direct flow) */
232 if ((cl = htb_find(res.classid, sch)) == NULL)
233 break; /* filter selected invalid classid */
1da177e4
LT
234 }
235 if (!cl->level)
87990467 236 return cl; /* we hit leaf; return it */
1da177e4
LT
237
238 /* we have got inner class; apply inner filter chain */
239 tcf = cl->filter_list;
240 }
241 /* classification failed; try to use default class */
87990467 242 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 243 if (!cl || cl->level)
87990467 244 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
245 return cl;
246}
247
1da177e4
LT
248/**
249 * htb_add_to_id_tree - adds class to the round robin list
250 *
251 * Routine adds class to the list (actually tree) sorted by classid.
252 * Make sure that class is not already on such list for given prio.
253 */
87990467
SH
254static void htb_add_to_id_tree(struct rb_root *root,
255 struct htb_class *cl, int prio)
1da177e4
LT
256{
257 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 258
1da177e4 259 while (*p) {
87990467
SH
260 struct htb_class *c;
261 parent = *p;
1da177e4 262 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 263
f4c1f3e0 264 if (cl->common.classid > c->common.classid)
1da177e4 265 p = &parent->rb_right;
87990467 266 else
1da177e4
LT
267 p = &parent->rb_left;
268 }
269 rb_link_node(&cl->node[prio], parent, p);
270 rb_insert_color(&cl->node[prio], root);
271}
272
273/**
274 * htb_add_to_wait_tree - adds class to the event queue with delay
275 *
276 * The class is added to priority event queue to indicate that class will
277 * change its mode in cl->pq_key microseconds. Make sure that class is not
278 * already in the queue.
279 */
87990467
SH
280static void htb_add_to_wait_tree(struct htb_sched *q,
281 struct htb_class *cl, long delay)
1da177e4
LT
282{
283 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 284
fb983d45
PM
285 cl->pq_key = q->now + delay;
286 if (cl->pq_key == q->now)
1da177e4
LT
287 cl->pq_key++;
288
289 /* update the nearest event cache */
fb983d45 290 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 291 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 292
1da177e4 293 while (*p) {
87990467
SH
294 struct htb_class *c;
295 parent = *p;
1da177e4 296 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 297 if (cl->pq_key >= c->pq_key)
1da177e4 298 p = &parent->rb_right;
87990467 299 else
1da177e4
LT
300 p = &parent->rb_left;
301 }
302 rb_link_node(&cl->pq_node, parent, p);
303 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
304}
305
306/**
307 * htb_next_rb_node - finds next node in binary tree
308 *
309 * When we are past last key we return NULL.
310 * Average complexity is 2 steps per call.
311 */
3696f625 312static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
313{
314 *n = rb_next(*n);
315}
316
317/**
318 * htb_add_class_to_row - add class to its row
319 *
320 * The class is added to row at priorities marked in mask.
321 * It does nothing if mask == 0.
322 */
87990467
SH
323static inline void htb_add_class_to_row(struct htb_sched *q,
324 struct htb_class *cl, int mask)
1da177e4 325{
1da177e4
LT
326 q->row_mask[cl->level] |= mask;
327 while (mask) {
328 int prio = ffz(~mask);
329 mask &= ~(1 << prio);
87990467 330 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
331 }
332}
333
3696f625
SH
334/* If this triggers, it is a bug in this code, but it need not be fatal */
335static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
336{
81771b3b 337 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
338 WARN_ON(1);
339 } else {
340 rb_erase(rb, root);
341 RB_CLEAR_NODE(rb);
342 }
343}
344
345
1da177e4
LT
346/**
347 * htb_remove_class_from_row - removes class from its row
348 *
349 * The class is removed from row at priorities marked in mask.
350 * It does nothing if mask == 0.
351 */
87990467
SH
352static inline void htb_remove_class_from_row(struct htb_sched *q,
353 struct htb_class *cl, int mask)
1da177e4
LT
354{
355 int m = 0;
3bf72957 356
1da177e4
LT
357 while (mask) {
358 int prio = ffz(~mask);
3696f625 359
1da177e4 360 mask &= ~(1 << prio);
87990467
SH
361 if (q->ptr[cl->level][prio] == cl->node + prio)
362 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
363
364 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 365 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
366 m |= 1 << prio;
367 }
1da177e4
LT
368 q->row_mask[cl->level] &= ~m;
369}
370
371/**
372 * htb_activate_prios - creates active classe's feed chain
373 *
374 * The class is connected to ancestors and/or appropriate rows
10297b99 375 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
376 * (activated) mode. It does nothing if cl->prio_activity == 0.
377 */
87990467 378static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
379{
380 struct htb_class *p = cl->parent;
87990467 381 long m, mask = cl->prio_activity;
1da177e4
LT
382
383 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
384 m = mask;
385 while (m) {
1da177e4
LT
386 int prio = ffz(~m);
387 m &= ~(1 << prio);
87990467 388
1da177e4
LT
389 if (p->un.inner.feed[prio].rb_node)
390 /* parent already has its feed in use so that
391 reset bit in mask as parent is already ok */
392 mask &= ~(1 << prio);
87990467
SH
393
394 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 395 }
1da177e4 396 p->prio_activity |= mask;
87990467
SH
397 cl = p;
398 p = cl->parent;
3bf72957 399
1da177e4
LT
400 }
401 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 402 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
403}
404
405/**
406 * htb_deactivate_prios - remove class from feed chain
407 *
10297b99 408 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
409 * nothing if cl->prio_activity == 0. Class is removed from all feed
410 * chains and rows.
411 */
412static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
413{
414 struct htb_class *p = cl->parent;
87990467 415 long m, mask = cl->prio_activity;
1da177e4
LT
416
417 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
418 m = mask;
419 mask = 0;
1da177e4
LT
420 while (m) {
421 int prio = ffz(~m);
422 m &= ~(1 << prio);
87990467
SH
423
424 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4
LT
425 /* we are removing child which is pointed to from
426 parent feed - forget the pointer but remember
427 classid */
f4c1f3e0 428 p->un.inner.last_ptr_id[prio] = cl->common.classid;
1da177e4
LT
429 p->un.inner.ptr[prio] = NULL;
430 }
87990467 431
3696f625 432 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
433
434 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
435 mask |= 1 << prio;
436 }
3bf72957 437
1da177e4 438 p->prio_activity &= ~mask;
87990467
SH
439 cl = p;
440 p = cl->parent;
3bf72957 441
1da177e4 442 }
87990467
SH
443 if (cl->cmode == HTB_CAN_SEND && mask)
444 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
445}
446
18a63e86
SH
447static inline long htb_lowater(const struct htb_class *cl)
448{
47083fc0
JDB
449 if (htb_hysteresis)
450 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
451 else
452 return 0;
18a63e86
SH
453}
454static inline long htb_hiwater(const struct htb_class *cl)
455{
47083fc0
JDB
456 if (htb_hysteresis)
457 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
458 else
459 return 0;
18a63e86 460}
47083fc0 461
18a63e86 462
1da177e4
LT
463/**
464 * htb_class_mode - computes and returns current class mode
465 *
466 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
467 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 468 * from now to time when cl will change its state.
1da177e4 469 * Also it is worth to note that class mode doesn't change simply
10297b99 470 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
471 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
472 * mode transitions per time unit. The speed gain is about 1/6.
473 */
87990467
SH
474static inline enum htb_cmode
475htb_class_mode(struct htb_class *cl, long *diff)
1da177e4 476{
87990467 477 long toks;
1da177e4 478
87990467
SH
479 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
480 *diff = -toks;
481 return HTB_CANT_SEND;
482 }
18a63e86 483
87990467
SH
484 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
485 return HTB_CAN_SEND;
1da177e4 486
87990467
SH
487 *diff = -toks;
488 return HTB_MAY_BORROW;
1da177e4
LT
489}
490
491/**
492 * htb_change_class_mode - changes classe's mode
493 *
494 * This should be the only way how to change classe's mode under normal
495 * cirsumstances. Routine will update feed lists linkage, change mode
496 * and add class to the wait event queue if appropriate. New mode should
497 * be different from old one and cl->pq_key has to be valid if changing
498 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
499 */
87990467 500static void
1da177e4 501htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
87990467
SH
502{
503 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
504
505 if (new_mode == cl->cmode)
87990467
SH
506 return;
507
508 if (cl->prio_activity) { /* not necessary: speed optimization */
509 if (cl->cmode != HTB_CANT_SEND)
510 htb_deactivate_prios(q, cl);
1da177e4 511 cl->cmode = new_mode;
87990467
SH
512 if (new_mode != HTB_CANT_SEND)
513 htb_activate_prios(q, cl);
514 } else
1da177e4
LT
515 cl->cmode = new_mode;
516}
517
518/**
10297b99 519 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
520 *
521 * Routine learns (new) priority of leaf and activates feed chain
522 * for the prio. It can be called on already active leaf safely.
523 * It also adds leaf into droplist.
524 */
87990467 525static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 526{
547b792c 527 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 528
1da177e4
LT
529 if (!cl->prio_activity) {
530 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
87990467
SH
531 htb_activate_prios(q, cl);
532 list_add_tail(&cl->un.leaf.drop_list,
533 q->drops + cl->un.leaf.aprio);
1da177e4
LT
534 }
535}
536
537/**
10297b99 538 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
539 *
540 * Make sure that leaf is active. In the other words it can't be called
541 * with non-active leaf. It also removes class from the drop list.
542 */
87990467 543static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 544{
547b792c 545 WARN_ON(!cl->prio_activity);
3bf72957 546
87990467 547 htb_deactivate_prios(q, cl);
1da177e4
LT
548 cl->prio_activity = 0;
549 list_del_init(&cl->un.leaf.drop_list);
550}
551
552static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
553{
87990467
SH
554 int ret;
555 struct htb_sched *q = qdisc_priv(sch);
556 struct htb_class *cl = htb_classify(skb, sch, &ret);
557
558 if (cl == HTB_DIRECT) {
559 /* enqueue to helper queue */
560 if (q->direct_queue.qlen < q->direct_qlen) {
561 __skb_queue_tail(&q->direct_queue, skb);
562 q->direct_pkts++;
563 } else {
564 kfree_skb(skb);
565 sch->qstats.drops++;
566 return NET_XMIT_DROP;
567 }
1da177e4 568#ifdef CONFIG_NET_CLS_ACT
87990467
SH
569 } else if (!cl) {
570 if (ret == NET_XMIT_BYPASS)
571 sch->qstats.drops++;
572 kfree_skb(skb);
573 return ret;
1da177e4 574#endif
5f86173b 575 } else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
87990467
SH
576 sch->qstats.drops++;
577 cl->qstats.drops++;
578 return NET_XMIT_DROP;
579 } else {
c9726d68
RM
580 cl->bstats.packets +=
581 skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 582 cl->bstats.bytes += qdisc_pkt_len(skb);
87990467
SH
583 htb_activate(q, cl);
584 }
585
586 sch->q.qlen++;
c9726d68 587 sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 588 sch->bstats.bytes += qdisc_pkt_len(skb);
87990467 589 return NET_XMIT_SUCCESS;
1da177e4
LT
590}
591
592/* TODO: requeuing packet charges it to policers again !! */
593static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
594{
21347456 595 int ret;
87990467 596 struct htb_sched *q = qdisc_priv(sch);
87990467
SH
597 struct htb_class *cl = htb_classify(skb, sch, &ret);
598 struct sk_buff *tskb;
599
21347456 600 if (cl == HTB_DIRECT) {
87990467 601 /* enqueue to helper queue */
21347456 602 if (q->direct_queue.qlen < q->direct_qlen) {
87990467
SH
603 __skb_queue_head(&q->direct_queue, skb);
604 } else {
605 __skb_queue_head(&q->direct_queue, skb);
606 tskb = __skb_dequeue_tail(&q->direct_queue);
607 kfree_skb(tskb);
608 sch->qstats.drops++;
609 return NET_XMIT_CN;
610 }
21347456
JP
611#ifdef CONFIG_NET_CLS_ACT
612 } else if (!cl) {
613 if (ret == NET_XMIT_BYPASS)
614 sch->qstats.drops++;
615 kfree_skb(skb);
616 return ret;
617#endif
87990467
SH
618 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
619 NET_XMIT_SUCCESS) {
620 sch->qstats.drops++;
621 cl->qstats.drops++;
622 return NET_XMIT_DROP;
623 } else
624 htb_activate(q, cl);
625
626 sch->q.qlen++;
627 sch->qstats.requeues++;
628 return NET_XMIT_SUCCESS;
1da177e4
LT
629}
630
1da177e4
LT
631/**
632 * htb_charge_class - charges amount "bytes" to leaf and ancestors
633 *
634 * Routine assumes that packet "bytes" long was dequeued from leaf cl
635 * borrowing from "level". It accounts bytes to ceil leaky bucket for
636 * leaf and all ancestors and to rate bucket for ancestors at levels
637 * "level" and higher. It also handles possible change of mode resulting
638 * from the update. Note that mode can also increase here (MAY_BORROW to
639 * CAN_SEND) because we can use more precise clock that event queue here.
640 * In such case we remove class from event queue first.
641 */
87990467 642static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 643 int level, struct sk_buff *skb)
87990467 644{
0abf77e5 645 int bytes = qdisc_pkt_len(skb);
87990467 646 long toks, diff;
1da177e4 647 enum htb_cmode old_mode;
1da177e4
LT
648
649#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
650 if (toks > cl->B) toks = cl->B; \
651 toks -= L2T(cl, cl->R, bytes); \
652 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
653 cl->T = toks
654
655 while (cl) {
03cc45c0 656 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
1da177e4 657 if (cl->level >= level) {
87990467
SH
658 if (cl->level == level)
659 cl->xstats.lends++;
660 HTB_ACCNT(tokens, buffer, rate);
1da177e4
LT
661 } else {
662 cl->xstats.borrows++;
87990467 663 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 664 }
87990467 665 HTB_ACCNT(ctokens, cbuffer, ceil);
1da177e4 666 cl->t_c = q->now;
1da177e4 667
87990467
SH
668 old_mode = cl->cmode;
669 diff = 0;
670 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
671 if (old_mode != cl->cmode) {
672 if (old_mode != HTB_CAN_SEND)
3696f625 673 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 674 if (cl->cmode != HTB_CAN_SEND)
87990467 675 htb_add_to_wait_tree(q, cl, diff);
1da177e4 676 }
1da177e4
LT
677
678 /* update byte stats except for leaves which are already updated */
679 if (cl->level) {
680 cl->bstats.bytes += bytes;
c9726d68
RM
681 cl->bstats.packets += skb_is_gso(skb)?
682 skb_shinfo(skb)->gso_segs:1;
1da177e4
LT
683 }
684 cl = cl->parent;
685 }
686}
687
688/**
689 * htb_do_events - make mode changes to classes at the level
690 *
fb983d45 691 * Scans event queue for pending events and applies them. Returns time of
1da177e4 692 * next pending event (0 for no event in pq).
fb983d45 693 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 694 */
fb983d45 695static psched_time_t htb_do_events(struct htb_sched *q, int level)
1da177e4 696{
8f3ea33a
MD
697 /* don't run for longer than 2 jiffies; 2 is used instead of
698 1 to simplify things when jiffy is going to be incremented
699 too soon */
700 unsigned long stop_at = jiffies + 2;
701 while (time_before(jiffies, stop_at)) {
1da177e4
LT
702 struct htb_class *cl;
703 long diff;
30bdbe39
AM
704 struct rb_node *p = rb_first(&q->wait_pq[level]);
705
87990467
SH
706 if (!p)
707 return 0;
1da177e4
LT
708
709 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
710 if (cl->pq_key > q->now)
711 return cl->pq_key;
712
3696f625 713 htb_safe_rb_erase(p, q->wait_pq + level);
03cc45c0 714 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
87990467 715 htb_change_class_mode(q, cl, &diff);
1da177e4 716 if (cl->cmode != HTB_CAN_SEND)
87990467 717 htb_add_to_wait_tree(q, cl, diff);
1da177e4 718 }
8f3ea33a
MD
719 /* too much load - let's continue on next jiffie */
720 return q->now + PSCHED_TICKS_PER_SEC / HZ;
1da177e4
LT
721}
722
723/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
724 is no such one exists. */
87990467
SH
725static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
726 u32 id)
1da177e4
LT
727{
728 struct rb_node *r = NULL;
729 while (n) {
87990467
SH
730 struct htb_class *cl =
731 rb_entry(n, struct htb_class, node[prio]);
f4c1f3e0 732 if (id == cl->common.classid)
87990467
SH
733 return n;
734
f4c1f3e0 735 if (id > cl->common.classid) {
1da177e4
LT
736 n = n->rb_right;
737 } else {
738 r = n;
739 n = n->rb_left;
740 }
741 }
742 return r;
743}
744
745/**
746 * htb_lookup_leaf - returns next leaf class in DRR order
747 *
748 * Find leaf where current feed pointers points to.
749 */
87990467
SH
750static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
751 struct rb_node **pptr, u32 * pid)
1da177e4
LT
752{
753 int i;
754 struct {
755 struct rb_node *root;
756 struct rb_node **pptr;
757 u32 *pid;
87990467
SH
758 } stk[TC_HTB_MAXDEPTH], *sp = stk;
759
547b792c 760 WARN_ON(!tree->rb_node);
1da177e4
LT
761 sp->root = tree->rb_node;
762 sp->pptr = pptr;
763 sp->pid = pid;
764
765 for (i = 0; i < 65535; i++) {
87990467 766 if (!*sp->pptr && *sp->pid) {
10297b99 767 /* ptr was invalidated but id is valid - try to recover
1da177e4 768 the original or next ptr */
87990467
SH
769 *sp->pptr =
770 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 771 }
87990467
SH
772 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
773 can become out of date quickly */
774 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 775 *sp->pptr = sp->root;
87990467 776 while ((*sp->pptr)->rb_left)
1da177e4
LT
777 *sp->pptr = (*sp->pptr)->rb_left;
778 if (sp > stk) {
779 sp--;
547b792c 780 WARN_ON(!*sp->pptr);
87990467
SH
781 if (!*sp->pptr)
782 return NULL;
783 htb_next_rb_node(sp->pptr);
1da177e4
LT
784 }
785 } else {
786 struct htb_class *cl;
87990467
SH
787 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
788 if (!cl->level)
1da177e4
LT
789 return cl;
790 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
791 sp->pptr = cl->un.inner.ptr + prio;
792 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
793 }
794 }
547b792c 795 WARN_ON(1);
1da177e4
LT
796 return NULL;
797}
798
799/* dequeues packet at given priority and level; call only if
800 you are sure that there is active class at prio/level */
87990467
SH
801static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
802 int level)
1da177e4
LT
803{
804 struct sk_buff *skb = NULL;
87990467 805 struct htb_class *cl, *start;
1da177e4 806 /* look initial class up in the row */
87990467
SH
807 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
808 q->ptr[level] + prio,
809 q->last_ptr_id[level] + prio);
810
1da177e4
LT
811 do {
812next:
547b792c 813 WARN_ON(!cl);
87990467
SH
814 if (!cl)
815 return NULL;
1da177e4
LT
816
817 /* class can be empty - it is unlikely but can be true if leaf
818 qdisc drops packets in enqueue routine or if someone used
10297b99 819 graft operation on the leaf since last dequeue;
1da177e4
LT
820 simply deactivate and skip such class */
821 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
822 struct htb_class *next;
87990467 823 htb_deactivate(q, cl);
1da177e4
LT
824
825 /* row/level might become empty */
826 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 827 return NULL;
1da177e4 828
87990467
SH
829 next = htb_lookup_leaf(q->row[level] + prio,
830 prio, q->ptr[level] + prio,
831 q->last_ptr_id[level] + prio);
832
833 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
834 start = next;
835 cl = next;
836 goto next;
837 }
87990467
SH
838
839 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
840 if (likely(skb != NULL))
1da177e4
LT
841 break;
842 if (!cl->warned) {
87990467
SH
843 printk(KERN_WARNING
844 "htb: class %X isn't work conserving ?!\n",
f4c1f3e0 845 cl->common.classid);
1da177e4
LT
846 cl->warned = 1;
847 }
848 q->nwc_hit++;
87990467
SH
849 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
850 ptr[0]) + prio);
851 cl = htb_lookup_leaf(q->row[level] + prio, prio,
852 q->ptr[level] + prio,
853 q->last_ptr_id[level] + prio);
1da177e4
LT
854
855 } while (cl != start);
856
857 if (likely(skb != NULL)) {
0abf77e5
JK
858 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
859 if (cl->un.leaf.deficit[level] < 0) {
1da177e4 860 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
87990467
SH
861 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
862 ptr[0]) + prio);
1da177e4
LT
863 }
864 /* this used to be after charge_class but this constelation
865 gives us slightly better performance */
866 if (!cl->un.leaf.q->q.qlen)
87990467 867 htb_deactivate(q, cl);
c9726d68 868 htb_charge_class(q, cl, level, skb);
1da177e4
LT
869 }
870 return skb;
871}
872
1da177e4
LT
873static struct sk_buff *htb_dequeue(struct Qdisc *sch)
874{
875 struct sk_buff *skb = NULL;
876 struct htb_sched *q = qdisc_priv(sch);
877 int level;
fb983d45 878 psched_time_t next_event;
1da177e4
LT
879
880 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
881 skb = __skb_dequeue(&q->direct_queue);
882 if (skb != NULL) {
1da177e4
LT
883 sch->flags &= ~TCQ_F_THROTTLED;
884 sch->q.qlen--;
885 return skb;
886 }
887
87990467
SH
888 if (!sch->q.qlen)
889 goto fin;
3bebcda2 890 q->now = psched_get_time();
1da177e4 891
fb983d45 892 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
1da177e4
LT
893 q->nwc_hit = 0;
894 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
895 /* common case optimization - skip event handler quickly */
896 int m;
fb983d45
PM
897 psched_time_t event;
898
899 if (q->now >= q->near_ev_cache[level]) {
900 event = htb_do_events(q, level);
2e4b3b0e
PM
901 if (!event)
902 event = q->now + PSCHED_TICKS_PER_SEC;
903 q->near_ev_cache[level] = event;
1da177e4 904 } else
fb983d45
PM
905 event = q->near_ev_cache[level];
906
907 if (event && next_event > event)
908 next_event = event;
87990467 909
1da177e4
LT
910 m = ~q->row_mask[level];
911 while (m != (int)(-1)) {
87990467 912 int prio = ffz(m);
1da177e4 913 m |= 1 << prio;
87990467 914 skb = htb_dequeue_tree(q, prio, level);
1da177e4
LT
915 if (likely(skb != NULL)) {
916 sch->q.qlen--;
917 sch->flags &= ~TCQ_F_THROTTLED;
918 goto fin;
919 }
920 }
921 }
fb983d45
PM
922 sch->qstats.overlimits++;
923 qdisc_watchdog_schedule(&q->watchdog, next_event);
1da177e4 924fin:
1da177e4
LT
925 return skb;
926}
927
928/* try to drop from each class (by prio) until one succeed */
87990467 929static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
930{
931 struct htb_sched *q = qdisc_priv(sch);
932 int prio;
933
934 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
935 struct list_head *p;
87990467 936 list_for_each(p, q->drops + prio) {
1da177e4
LT
937 struct htb_class *cl = list_entry(p, struct htb_class,
938 un.leaf.drop_list);
939 unsigned int len;
87990467
SH
940 if (cl->un.leaf.q->ops->drop &&
941 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
942 sch->q.qlen--;
943 if (!cl->un.leaf.q->q.qlen)
87990467 944 htb_deactivate(q, cl);
1da177e4
LT
945 return len;
946 }
947 }
948 }
949 return 0;
950}
951
952/* reset all classes */
953/* always caled under BH & queue lock */
87990467 954static void htb_reset(struct Qdisc *sch)
1da177e4
LT
955{
956 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
957 struct htb_class *cl;
958 struct hlist_node *n;
959 unsigned int i;
0cef296d 960
f4c1f3e0
PM
961 for (i = 0; i < q->clhash.hashsize; i++) {
962 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4 963 if (cl->level)
87990467 964 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 965 else {
87990467 966 if (cl->un.leaf.q)
1da177e4
LT
967 qdisc_reset(cl->un.leaf.q);
968 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
969 }
970 cl->prio_activity = 0;
971 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
972
973 }
974 }
fb983d45 975 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
976 __skb_queue_purge(&q->direct_queue);
977 sch->q.qlen = 0;
87990467
SH
978 memset(q->row, 0, sizeof(q->row));
979 memset(q->row_mask, 0, sizeof(q->row_mask));
980 memset(q->wait_pq, 0, sizeof(q->wait_pq));
981 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 982 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 983 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
984}
985
27a3421e
PM
986static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
987 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
988 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
989 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
990 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
991};
992
1e90474c 993static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
994{
995 struct htb_sched *q = qdisc_priv(sch);
1e90474c 996 struct nlattr *tb[TCA_HTB_INIT + 1];
1da177e4 997 struct tc_htb_glob *gopt;
cee63723 998 int err;
1da177e4 999 int i;
cee63723
PM
1000
1001 if (!opt)
1002 return -EINVAL;
1003
27a3421e 1004 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
cee63723
PM
1005 if (err < 0)
1006 return err;
1007
27a3421e 1008 if (tb[TCA_HTB_INIT] == NULL) {
1da177e4
LT
1009 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1010 return -EINVAL;
1011 }
1e90474c 1012 gopt = nla_data(tb[TCA_HTB_INIT]);
1da177e4 1013 if (gopt->version != HTB_VER >> 16) {
87990467
SH
1014 printk(KERN_ERR
1015 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1016 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1da177e4
LT
1017 return -EINVAL;
1018 }
1da177e4 1019
f4c1f3e0
PM
1020 err = qdisc_class_hash_init(&q->clhash);
1021 if (err < 0)
1022 return err;
1da177e4 1023 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1024 INIT_LIST_HEAD(q->drops + i);
1da177e4 1025
fb983d45 1026 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
1027 skb_queue_head_init(&q->direct_queue);
1028
5ce2d488 1029 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
87990467 1030 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1da177e4 1031 q->direct_qlen = 2;
1da177e4 1032
1da177e4
LT
1033 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1034 q->rate2quantum = 1;
1035 q->defcls = gopt->defcls;
1036
1037 return 0;
1038}
1039
1040static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1041{
7698b4fc 1042 spinlock_t *root_lock = qdisc_root_lock(sch);
1da177e4 1043 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1044 struct nlattr *nest;
1da177e4 1045 struct tc_htb_glob gopt;
4b3550ef 1046
7698b4fc 1047 spin_lock_bh(root_lock);
1da177e4 1048
4b3550ef 1049 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1050 gopt.version = HTB_VER;
1051 gopt.rate2quantum = q->rate2quantum;
1052 gopt.defcls = q->defcls;
3bf72957 1053 gopt.debug = 0;
4b3550ef
PM
1054
1055 nest = nla_nest_start(skb, TCA_OPTIONS);
1056 if (nest == NULL)
1057 goto nla_put_failure;
1e90474c 1058 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
4b3550ef
PM
1059 nla_nest_end(skb, nest);
1060
7698b4fc 1061 spin_unlock_bh(root_lock);
1da177e4 1062 return skb->len;
4b3550ef 1063
1e90474c 1064nla_put_failure:
7698b4fc 1065 spin_unlock_bh(root_lock);
4b3550ef 1066 nla_nest_cancel(skb, nest);
1da177e4
LT
1067 return -1;
1068}
1069
1070static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1071 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1072{
87990467 1073 struct htb_class *cl = (struct htb_class *)arg;
7698b4fc 1074 spinlock_t *root_lock = qdisc_root_lock(sch);
4b3550ef 1075 struct nlattr *nest;
1da177e4
LT
1076 struct tc_htb_opt opt;
1077
7698b4fc 1078 spin_lock_bh(root_lock);
f4c1f3e0
PM
1079 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1080 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1081 if (!cl->level && cl->un.leaf.q)
1082 tcm->tcm_info = cl->un.leaf.q->handle;
1083
4b3550ef
PM
1084 nest = nla_nest_start(skb, TCA_OPTIONS);
1085 if (nest == NULL)
1086 goto nla_put_failure;
1da177e4 1087
87990467 1088 memset(&opt, 0, sizeof(opt));
1da177e4 1089
87990467
SH
1090 opt.rate = cl->rate->rate;
1091 opt.buffer = cl->buffer;
1092 opt.ceil = cl->ceil->rate;
1093 opt.cbuffer = cl->cbuffer;
1094 opt.quantum = cl->un.leaf.quantum;
1095 opt.prio = cl->un.leaf.prio;
1096 opt.level = cl->level;
1e90474c 1097 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
4b3550ef
PM
1098
1099 nla_nest_end(skb, nest);
7698b4fc 1100 spin_unlock_bh(root_lock);
1da177e4 1101 return skb->len;
4b3550ef 1102
1e90474c 1103nla_put_failure:
7698b4fc 1104 spin_unlock_bh(root_lock);
4b3550ef 1105 nla_nest_cancel(skb, nest);
1da177e4
LT
1106 return -1;
1107}
1108
1109static int
87990467 1110htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1111{
87990467 1112 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1113
1da177e4
LT
1114 if (!cl->level && cl->un.leaf.q)
1115 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1116 cl->xstats.tokens = cl->tokens;
1117 cl->xstats.ctokens = cl->ctokens;
1118
1119 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1120 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1121 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1122 return -1;
1123
1124 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1125}
1126
1127static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1128 struct Qdisc **old)
1da177e4 1129{
87990467 1130 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1131
1132 if (cl && !cl->level) {
9f9afec4 1133 if (new == NULL &&
5ce2d488 1134 (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1135 &pfifo_qdisc_ops,
f4c1f3e0 1136 cl->common.classid))
87990467
SH
1137 == NULL)
1138 return -ENOBUFS;
1da177e4
LT
1139 sch_tree_lock(sch);
1140 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
256d61b8 1141 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1142 qdisc_reset(*old);
1143 }
1144 sch_tree_unlock(sch);
1145 return 0;
1146 }
1147 return -ENOENT;
1148}
1149
87990467 1150static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1151{
87990467 1152 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1153 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1154}
1155
256d61b8
PM
1156static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1157{
1158 struct htb_class *cl = (struct htb_class *)arg;
1159
1160 if (cl->un.leaf.q->q.qlen == 0)
1161 htb_deactivate(qdisc_priv(sch), cl);
1162}
1163
1da177e4
LT
1164static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1165{
87990467
SH
1166 struct htb_class *cl = htb_find(classid, sch);
1167 if (cl)
1da177e4
LT
1168 cl->refcnt++;
1169 return (unsigned long)cl;
1170}
1171
160d5e10
JP
1172static inline int htb_parent_last_child(struct htb_class *cl)
1173{
1174 if (!cl->parent)
1175 /* the root class */
1176 return 0;
42077599 1177 if (cl->parent->children > 1)
160d5e10
JP
1178 /* not the last child */
1179 return 0;
160d5e10
JP
1180 return 1;
1181}
1182
3ba08b00
JP
1183static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1184 struct Qdisc *new_q)
160d5e10
JP
1185{
1186 struct htb_class *parent = cl->parent;
1187
547b792c 1188 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1189
3ba08b00
JP
1190 if (parent->cmode != HTB_CAN_SEND)
1191 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1192
160d5e10
JP
1193 parent->level = 0;
1194 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1195 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1196 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1197 parent->un.leaf.quantum = parent->quantum;
1198 parent->un.leaf.prio = parent->prio;
1199 parent->tokens = parent->buffer;
1200 parent->ctokens = parent->cbuffer;
3bebcda2 1201 parent->t_c = psched_get_time();
160d5e10
JP
1202 parent->cmode = HTB_CAN_SEND;
1203}
1204
87990467 1205static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1206{
1da177e4 1207 if (!cl->level) {
547b792c 1208 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1209 qdisc_destroy(cl->un.leaf.q);
1210 }
ee39e10c 1211 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1212 qdisc_put_rtab(cl->rate);
1213 qdisc_put_rtab(cl->ceil);
87990467 1214
ff31ab56 1215 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1216 kfree(cl);
1217}
1218
1219/* always caled under BH & queue lock */
87990467 1220static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1221{
1222 struct htb_sched *q = qdisc_priv(sch);
fbd8f137
PM
1223 struct hlist_node *n, *next;
1224 struct htb_class *cl;
1225 unsigned int i;
1da177e4 1226
fb983d45 1227 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1228 /* This line used to be after htb_destroy_class call below
10297b99 1229 and surprisingly it worked in 2.4. But it must precede it
1da177e4
LT
1230 because filter need its target class alive to be able to call
1231 unbind_filter on it (without Oops). */
ff31ab56 1232 tcf_destroy_chain(&q->filter_list);
87990467 1233
f4c1f3e0
PM
1234 for (i = 0; i < q->clhash.hashsize; i++) {
1235 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1236 tcf_destroy_chain(&cl->filter_list);
1237 }
f4c1f3e0
PM
1238 for (i = 0; i < q->clhash.hashsize; i++) {
1239 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1240 common.hnode)
fbd8f137
PM
1241 htb_destroy_class(sch, cl);
1242 }
f4c1f3e0 1243 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1244 __skb_queue_purge(&q->direct_queue);
1245}
1246
1247static int htb_delete(struct Qdisc *sch, unsigned long arg)
1248{
1249 struct htb_sched *q = qdisc_priv(sch);
87990467 1250 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1251 unsigned int qlen;
160d5e10
JP
1252 struct Qdisc *new_q = NULL;
1253 int last_child = 0;
1da177e4
LT
1254
1255 // TODO: why don't allow to delete subtree ? references ? does
1256 // tc subsys quarantee us that in htb_destroy it holds no class
1257 // refs so that we can remove children safely there ?
42077599 1258 if (cl->children || cl->filter_cnt)
1da177e4 1259 return -EBUSY;
87990467 1260
160d5e10 1261 if (!cl->level && htb_parent_last_child(cl)) {
5ce2d488 1262 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd
DM
1263 &pfifo_qdisc_ops,
1264 cl->parent->common.classid);
160d5e10
JP
1265 last_child = 1;
1266 }
1267
1da177e4 1268 sch_tree_lock(sch);
87990467 1269
814a175e 1270 if (!cl->level) {
256d61b8 1271 qlen = cl->un.leaf.q->q.qlen;
814a175e 1272 qdisc_reset(cl->un.leaf.q);
256d61b8 1273 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1274 }
1275
f4c1f3e0
PM
1276 /* delete from hash and active; remainder in destroy_class */
1277 qdisc_class_hash_remove(&q->clhash, &cl->common);
42077599 1278 cl->parent->children--;
c38c83cb 1279
1da177e4 1280 if (cl->prio_activity)
87990467 1281 htb_deactivate(q, cl);
1da177e4 1282
fbd8f137
PM
1283 if (cl->cmode != HTB_CAN_SEND)
1284 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1285
160d5e10 1286 if (last_child)
3ba08b00 1287 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1288
1da177e4 1289 if (--cl->refcnt == 0)
87990467 1290 htb_destroy_class(sch, cl);
1da177e4
LT
1291
1292 sch_tree_unlock(sch);
1293 return 0;
1294}
1295
1296static void htb_put(struct Qdisc *sch, unsigned long arg)
1297{
87990467 1298 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1299
1300 if (--cl->refcnt == 0)
87990467 1301 htb_destroy_class(sch, cl);
1da177e4
LT
1302}
1303
87990467 1304static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1305 u32 parentid, struct nlattr **tca,
87990467 1306 unsigned long *arg)
1da177e4
LT
1307{
1308 int err = -EINVAL;
1309 struct htb_sched *q = qdisc_priv(sch);
87990467 1310 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1311 struct nlattr *opt = tca[TCA_OPTIONS];
1da177e4 1312 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1e90474c 1313 struct nlattr *tb[TCA_HTB_RTAB + 1];
1da177e4
LT
1314 struct tc_htb_opt *hopt;
1315
1316 /* extract all subattrs from opt attr */
cee63723
PM
1317 if (!opt)
1318 goto failure;
1319
27a3421e 1320 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
cee63723
PM
1321 if (err < 0)
1322 goto failure;
1323
1324 err = -EINVAL;
27a3421e 1325 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1326 goto failure;
1da177e4 1327
87990467
SH
1328 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1329
1e90474c 1330 hopt = nla_data(tb[TCA_HTB_PARMS]);
3bf72957 1331
1e90474c
PM
1332 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1333 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
87990467
SH
1334 if (!rtab || !ctab)
1335 goto failure;
1da177e4 1336
87990467 1337 if (!cl) { /* new class */
1da177e4 1338 struct Qdisc *new_q;
3696f625 1339 int prio;
ee39e10c 1340 struct {
1e90474c 1341 struct nlattr nla;
ee39e10c
PM
1342 struct gnet_estimator opt;
1343 } est = {
1e90474c
PM
1344 .nla = {
1345 .nla_len = nla_attr_size(sizeof(est.opt)),
1346 .nla_type = TCA_RATE,
ee39e10c
PM
1347 },
1348 .opt = {
1349 /* 4s interval, 16s averaging constant */
1350 .interval = 2,
1351 .ewma_log = 2,
1352 },
1353 };
3696f625 1354
1da177e4 1355 /* check for valid classid */
87990467
SH
1356 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1357 || htb_find(classid, sch))
1da177e4
LT
1358 goto failure;
1359
1360 /* check maximal depth */
1361 if (parent && parent->parent && parent->parent->level < 2) {
1362 printk(KERN_ERR "htb: tree is too deep\n");
1363 goto failure;
1364 }
1365 err = -ENOBUFS;
0da974f4 1366 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1da177e4 1367 goto failure;
87990467 1368
ee39e10c 1369 gen_new_estimator(&cl->bstats, &cl->rate_est,
7698b4fc 1370 qdisc_root_lock(sch),
1e90474c 1371 tca[TCA_RATE] ? : &est.nla);
1da177e4 1372 cl->refcnt = 1;
42077599 1373 cl->children = 0;
1da177e4 1374 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1375 RB_CLEAR_NODE(&cl->pq_node);
1376
1377 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1378 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1379
1380 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1381 so that can't be used inside of sch_tree_lock
1382 -- thanks to Karlis Peisenieks */
5ce2d488 1383 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1384 &pfifo_qdisc_ops, classid);
1da177e4
LT
1385 sch_tree_lock(sch);
1386 if (parent && !parent->level) {
256d61b8
PM
1387 unsigned int qlen = parent->un.leaf.q->q.qlen;
1388
1da177e4 1389 /* turn parent into inner node */
256d61b8
PM
1390 qdisc_reset(parent->un.leaf.q);
1391 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1392 qdisc_destroy(parent->un.leaf.q);
1393 if (parent->prio_activity)
1394 htb_deactivate(q, parent);
1da177e4
LT
1395
1396 /* remove from evt list because of level change */
1397 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1398 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1399 parent->cmode = HTB_CAN_SEND;
1400 }
1401 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1402 : TC_HTB_MAXDEPTH) - 1;
1403 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1404 }
1405 /* leaf (we) needs elementary qdisc */
1406 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1407
f4c1f3e0 1408 cl->common.classid = classid;
87990467 1409 cl->parent = parent;
1da177e4
LT
1410
1411 /* set class to be in HTB_CAN_SEND state */
1412 cl->tokens = hopt->buffer;
1413 cl->ctokens = hopt->cbuffer;
00c04af9 1414 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
3bebcda2 1415 cl->t_c = psched_get_time();
1da177e4
LT
1416 cl->cmode = HTB_CAN_SEND;
1417
1418 /* attach to the hash list and parent's family */
f4c1f3e0 1419 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1420 if (parent)
1421 parent->children++;
ee39e10c 1422 } else {
1e90474c 1423 if (tca[TCA_RATE])
ee39e10c 1424 gen_replace_estimator(&cl->bstats, &cl->rate_est,
7698b4fc 1425 qdisc_root_lock(sch),
1e90474c 1426 tca[TCA_RATE]);
87990467 1427 sch_tree_lock(sch);
ee39e10c 1428 }
1da177e4
LT
1429
1430 /* it used to be a nasty bug here, we have to check that node
87990467 1431 is really leaf before changing cl->un.leaf ! */
1da177e4
LT
1432 if (!cl->level) {
1433 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1434 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
87990467
SH
1435 printk(KERN_WARNING
1436 "HTB: quantum of class %X is small. Consider r2q change.\n",
f4c1f3e0 1437 cl->common.classid);
1da177e4
LT
1438 cl->un.leaf.quantum = 1000;
1439 }
1440 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
87990467
SH
1441 printk(KERN_WARNING
1442 "HTB: quantum of class %X is big. Consider r2q change.\n",
f4c1f3e0 1443 cl->common.classid);
1da177e4
LT
1444 cl->un.leaf.quantum = 200000;
1445 }
1446 if (hopt->quantum)
1447 cl->un.leaf.quantum = hopt->quantum;
1448 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1449 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
160d5e10
JP
1450
1451 /* backup for htb_parent_to_leaf */
1452 cl->quantum = cl->un.leaf.quantum;
1453 cl->prio = cl->un.leaf.prio;
1da177e4
LT
1454 }
1455
1456 cl->buffer = hopt->buffer;
1457 cl->cbuffer = hopt->cbuffer;
87990467
SH
1458 if (cl->rate)
1459 qdisc_put_rtab(cl->rate);
1460 cl->rate = rtab;
1461 if (cl->ceil)
1462 qdisc_put_rtab(cl->ceil);
1463 cl->ceil = ctab;
1da177e4
LT
1464 sch_tree_unlock(sch);
1465
f4c1f3e0
PM
1466 qdisc_class_hash_grow(sch, &q->clhash);
1467
1da177e4
LT
1468 *arg = (unsigned long)cl;
1469 return 0;
1470
1471failure:
87990467
SH
1472 if (rtab)
1473 qdisc_put_rtab(rtab);
1474 if (ctab)
1475 qdisc_put_rtab(ctab);
1da177e4
LT
1476 return err;
1477}
1478
1479static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1480{
1481 struct htb_sched *q = qdisc_priv(sch);
1482 struct htb_class *cl = (struct htb_class *)arg;
1483 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1484
1da177e4
LT
1485 return fl;
1486}
1487
1488static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1489 u32 classid)
1da177e4 1490{
87990467 1491 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1492
1da177e4 1493 /*if (cl && !cl->level) return 0;
87990467
SH
1494 The line above used to be there to prevent attaching filters to
1495 leaves. But at least tc_index filter uses this just to get class
1496 for other reasons so that we have to allow for it.
1497 ----
1498 19.6.2002 As Werner explained it is ok - bind filter is just
1499 another way to "lock" the class - unlike "get" this lock can
1500 be broken by class during destroy IIUC.
1da177e4 1501 */
87990467
SH
1502 if (cl)
1503 cl->filter_cnt++;
1da177e4
LT
1504 return (unsigned long)cl;
1505}
1506
1507static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1508{
1da177e4 1509 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1510
87990467
SH
1511 if (cl)
1512 cl->filter_cnt--;
1da177e4
LT
1513}
1514
1515static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1516{
1517 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
1518 struct htb_class *cl;
1519 struct hlist_node *n;
1520 unsigned int i;
1da177e4
LT
1521
1522 if (arg->stop)
1523 return;
1524
f4c1f3e0
PM
1525 for (i = 0; i < q->clhash.hashsize; i++) {
1526 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1527 if (arg->count < arg->skip) {
1528 arg->count++;
1529 continue;
1530 }
1531 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1532 arg->stop = 1;
1533 return;
1534 }
1535 arg->count++;
1536 }
1537 }
1538}
1539
20fea08b 1540static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1541 .graft = htb_graft,
1542 .leaf = htb_leaf,
256d61b8 1543 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1544 .get = htb_get,
1545 .put = htb_put,
1546 .change = htb_change_class,
1547 .delete = htb_delete,
1548 .walk = htb_walk,
1549 .tcf_chain = htb_find_tcf,
1550 .bind_tcf = htb_bind_filter,
1551 .unbind_tcf = htb_unbind_filter,
1552 .dump = htb_dump_class,
1553 .dump_stats = htb_dump_class_stats,
1554};
1555
20fea08b 1556static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1557 .next = NULL,
1558 .cl_ops = &htb_class_ops,
1559 .id = "htb",
1560 .priv_size = sizeof(struct htb_sched),
1561 .enqueue = htb_enqueue,
1562 .dequeue = htb_dequeue,
1563 .requeue = htb_requeue,
1564 .drop = htb_drop,
1565 .init = htb_init,
1566 .reset = htb_reset,
1567 .destroy = htb_destroy,
1568 .change = NULL /* htb_change */,
1569 .dump = htb_dump,
1570 .owner = THIS_MODULE,
1571};
1572
1573static int __init htb_module_init(void)
1574{
87990467 1575 return register_qdisc(&htb_qdisc_ops);
1da177e4 1576}
87990467 1577static void __exit htb_module_exit(void)
1da177e4 1578{
87990467 1579 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1580}
87990467 1581
1da177e4
LT
1582module_init(htb_module_init)
1583module_exit(htb_module_exit)
1584MODULE_LICENSE("GPL");