]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/sched/sch_htb.c
pkt_sched: sch_htb: Remove htb_sched nwc_hit field
[net-next-2.6.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
dc5fc579 38#include <net/netlink.h>
1da177e4 39#include <net/pkt_sched.h>
1da177e4
LT
40
41/* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 45 it allows to assign priority to each class in hierarchy.
1da177e4
LT
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
10297b99 49 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52*/
53
47083fc0 54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 55#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
47083fc0
JDB
61/* Module parameter and sysfs export */
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
1da177e4
LT
65/* used internaly to keep status of single class */
66enum htb_cmode {
87990467
SH
67 HTB_CANT_SEND, /* class can't send and can't borrow */
68 HTB_MAY_BORROW, /* class can't send but may borrow */
69 HTB_CAN_SEND /* class can send */
1da177e4
LT
70};
71
72/* interior & leaf nodes; props specific to leaves are marked L: */
87990467 73struct htb_class {
f4c1f3e0 74 struct Qdisc_class_common common;
87990467 75 /* general class parameters */
87990467
SH
76 struct gnet_stats_basic bstats;
77 struct gnet_stats_queue qstats;
78 struct gnet_stats_rate_est rate_est;
79 struct tc_htb_xstats xstats; /* our special stats */
80 int refcnt; /* usage count of this class */
1da177e4 81
87990467
SH
82 /* topology */
83 int level; /* our level (see above) */
42077599 84 unsigned int children;
87990467 85 struct htb_class *parent; /* parent class */
87990467
SH
86
87 union {
88 struct htb_class_leaf {
89 struct Qdisc *q;
90 int prio;
87990467
SH
91 int quantum;
92 int deficit[TC_HTB_MAXDEPTH];
93 struct list_head drop_list;
94 } leaf;
95 struct htb_class_inner {
96 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
97 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
98 /* When class changes from state 1->2 and disconnects from
99 parent's feed then we lost ptr value and start from the
100 first child again. Here we store classid of the
101 last valid ptr (used when ptr is NULL). */
102 u32 last_ptr_id[TC_HTB_NUMPRIO];
103 } inner;
104 } un;
105 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
106 struct rb_node pq_node; /* node for event queue */
fb983d45 107 psched_time_t pq_key;
87990467
SH
108
109 int prio_activity; /* for which prios are we active */
110 enum htb_cmode cmode; /* current mode of the class */
111
112 /* class attached filters */
113 struct tcf_proto *filter_list;
114 int filter_cnt;
115
116 int warned; /* only one warning about non work conserving .. */
117
118 /* token bucket parameters */
119 struct qdisc_rate_table *rate; /* rate table of the class itself */
120 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
121 long buffer, cbuffer; /* token bucket depth/rate */
122 psched_tdiff_t mbuffer; /* max wait time */
123 long tokens, ctokens; /* current number of tokens */
124 psched_time_t t_c; /* checkpoint time */
160d5e10
JP
125
126 int prio; /* For parent to leaf return possible here */
127 int quantum; /* we do backup. Finally full replacement */
128 /* of un.leaf originals should be done. */
1da177e4
LT
129};
130
87990467
SH
131static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
132 int size)
133{
e9bef55d
JDB
134 long result = qdisc_l2t(rate, size);
135 return result;
1da177e4
LT
136}
137
87990467 138struct htb_sched {
f4c1f3e0 139 struct Qdisc_class_hash clhash;
0cef296d 140 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
141
142 /* self list - roots of self generating tree */
143 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
144 int row_mask[TC_HTB_MAXDEPTH];
145 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
146 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 147
87990467
SH
148 /* self wait list - roots of wait PQs per row */
149 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 150
87990467 151 /* time of nearest event per level (row) */
fb983d45 152 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 153
87990467 154 int defcls; /* class where unclassified flows go to */
1da177e4 155
87990467
SH
156 /* filters for qdisc itself */
157 struct tcf_proto *filter_list;
1da177e4 158
87990467
SH
159 int rate2quantum; /* quant = rate / rate2quantum */
160 psched_time_t now; /* cached dequeue time */
fb983d45 161 struct qdisc_watchdog watchdog;
1da177e4 162
87990467
SH
163 /* non shaped skbs; let them go directly thru */
164 struct sk_buff_head direct_queue;
165 int direct_qlen; /* max qlen of above */
166
167 long direct_pkts;
1da177e4
LT
168};
169
1da177e4 170/* find class in global hash table using given handle */
87990467 171static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
172{
173 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 174 struct Qdisc_class_common *clc;
0cef296d 175
f4c1f3e0
PM
176 clc = qdisc_class_find(&q->clhash, handle);
177 if (clc == NULL)
1da177e4 178 return NULL;
f4c1f3e0 179 return container_of(clc, struct htb_class, common);
1da177e4
LT
180}
181
182/**
183 * htb_classify - classify a packet into class
184 *
185 * It returns NULL if the packet should be dropped or -1 if the packet
186 * should be passed directly thru. In all other cases leaf class is returned.
187 * We allow direct class selection by classid in priority. The we examine
188 * filters in qdisc and in inner nodes (if higher filter points to the inner
189 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 190 * internal fifo (direct). These packets then go directly thru. If we still
1da177e4
LT
191 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
192 * then finish and return direct queue.
193 */
194#define HTB_DIRECT (struct htb_class*)-1
1da177e4 195
87990467
SH
196static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
197 int *qerr)
1da177e4
LT
198{
199 struct htb_sched *q = qdisc_priv(sch);
200 struct htb_class *cl;
201 struct tcf_result res;
202 struct tcf_proto *tcf;
203 int result;
204
205 /* allow to select class by setting skb->priority to valid classid;
206 note that nfmark can be used too by attaching filter fw with no
207 rules in it */
208 if (skb->priority == sch->handle)
87990467
SH
209 return HTB_DIRECT; /* X:0 (direct flow) selected */
210 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
1da177e4
LT
211 return cl;
212
c27f339a 213 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
214 tcf = q->filter_list;
215 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
216#ifdef CONFIG_NET_CLS_ACT
217 switch (result) {
218 case TC_ACT_QUEUED:
87990467 219 case TC_ACT_STOLEN:
378a2f09 220 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
221 case TC_ACT_SHOT:
222 return NULL;
223 }
1da177e4 224#endif
87990467 225 if ((cl = (void *)res.class) == NULL) {
1da177e4 226 if (res.classid == sch->handle)
87990467
SH
227 return HTB_DIRECT; /* X:0 (direct flow) */
228 if ((cl = htb_find(res.classid, sch)) == NULL)
229 break; /* filter selected invalid classid */
1da177e4
LT
230 }
231 if (!cl->level)
87990467 232 return cl; /* we hit leaf; return it */
1da177e4
LT
233
234 /* we have got inner class; apply inner filter chain */
235 tcf = cl->filter_list;
236 }
237 /* classification failed; try to use default class */
87990467 238 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 239 if (!cl || cl->level)
87990467 240 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
241 return cl;
242}
243
1da177e4
LT
244/**
245 * htb_add_to_id_tree - adds class to the round robin list
246 *
247 * Routine adds class to the list (actually tree) sorted by classid.
248 * Make sure that class is not already on such list for given prio.
249 */
87990467
SH
250static void htb_add_to_id_tree(struct rb_root *root,
251 struct htb_class *cl, int prio)
1da177e4
LT
252{
253 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 254
1da177e4 255 while (*p) {
87990467
SH
256 struct htb_class *c;
257 parent = *p;
1da177e4 258 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 259
f4c1f3e0 260 if (cl->common.classid > c->common.classid)
1da177e4 261 p = &parent->rb_right;
87990467 262 else
1da177e4
LT
263 p = &parent->rb_left;
264 }
265 rb_link_node(&cl->node[prio], parent, p);
266 rb_insert_color(&cl->node[prio], root);
267}
268
269/**
270 * htb_add_to_wait_tree - adds class to the event queue with delay
271 *
272 * The class is added to priority event queue to indicate that class will
273 * change its mode in cl->pq_key microseconds. Make sure that class is not
274 * already in the queue.
275 */
87990467
SH
276static void htb_add_to_wait_tree(struct htb_sched *q,
277 struct htb_class *cl, long delay)
1da177e4
LT
278{
279 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 280
fb983d45
PM
281 cl->pq_key = q->now + delay;
282 if (cl->pq_key == q->now)
1da177e4
LT
283 cl->pq_key++;
284
285 /* update the nearest event cache */
fb983d45 286 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 287 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 288
1da177e4 289 while (*p) {
87990467
SH
290 struct htb_class *c;
291 parent = *p;
1da177e4 292 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 293 if (cl->pq_key >= c->pq_key)
1da177e4 294 p = &parent->rb_right;
87990467 295 else
1da177e4
LT
296 p = &parent->rb_left;
297 }
298 rb_link_node(&cl->pq_node, parent, p);
299 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
300}
301
302/**
303 * htb_next_rb_node - finds next node in binary tree
304 *
305 * When we are past last key we return NULL.
306 * Average complexity is 2 steps per call.
307 */
3696f625 308static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
309{
310 *n = rb_next(*n);
311}
312
313/**
314 * htb_add_class_to_row - add class to its row
315 *
316 * The class is added to row at priorities marked in mask.
317 * It does nothing if mask == 0.
318 */
87990467
SH
319static inline void htb_add_class_to_row(struct htb_sched *q,
320 struct htb_class *cl, int mask)
1da177e4 321{
1da177e4
LT
322 q->row_mask[cl->level] |= mask;
323 while (mask) {
324 int prio = ffz(~mask);
325 mask &= ~(1 << prio);
87990467 326 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
327 }
328}
329
3696f625
SH
330/* If this triggers, it is a bug in this code, but it need not be fatal */
331static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
332{
81771b3b 333 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
334 WARN_ON(1);
335 } else {
336 rb_erase(rb, root);
337 RB_CLEAR_NODE(rb);
338 }
339}
340
341
1da177e4
LT
342/**
343 * htb_remove_class_from_row - removes class from its row
344 *
345 * The class is removed from row at priorities marked in mask.
346 * It does nothing if mask == 0.
347 */
87990467
SH
348static inline void htb_remove_class_from_row(struct htb_sched *q,
349 struct htb_class *cl, int mask)
1da177e4
LT
350{
351 int m = 0;
3bf72957 352
1da177e4
LT
353 while (mask) {
354 int prio = ffz(~mask);
3696f625 355
1da177e4 356 mask &= ~(1 << prio);
87990467
SH
357 if (q->ptr[cl->level][prio] == cl->node + prio)
358 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
359
360 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 361 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
362 m |= 1 << prio;
363 }
1da177e4
LT
364 q->row_mask[cl->level] &= ~m;
365}
366
367/**
368 * htb_activate_prios - creates active classe's feed chain
369 *
370 * The class is connected to ancestors and/or appropriate rows
10297b99 371 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
372 * (activated) mode. It does nothing if cl->prio_activity == 0.
373 */
87990467 374static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
375{
376 struct htb_class *p = cl->parent;
87990467 377 long m, mask = cl->prio_activity;
1da177e4
LT
378
379 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
380 m = mask;
381 while (m) {
1da177e4
LT
382 int prio = ffz(~m);
383 m &= ~(1 << prio);
87990467 384
1da177e4
LT
385 if (p->un.inner.feed[prio].rb_node)
386 /* parent already has its feed in use so that
387 reset bit in mask as parent is already ok */
388 mask &= ~(1 << prio);
87990467
SH
389
390 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 391 }
1da177e4 392 p->prio_activity |= mask;
87990467
SH
393 cl = p;
394 p = cl->parent;
3bf72957 395
1da177e4
LT
396 }
397 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 398 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
399}
400
401/**
402 * htb_deactivate_prios - remove class from feed chain
403 *
10297b99 404 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
405 * nothing if cl->prio_activity == 0. Class is removed from all feed
406 * chains and rows.
407 */
408static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
409{
410 struct htb_class *p = cl->parent;
87990467 411 long m, mask = cl->prio_activity;
1da177e4
LT
412
413 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
414 m = mask;
415 mask = 0;
1da177e4
LT
416 while (m) {
417 int prio = ffz(~m);
418 m &= ~(1 << prio);
87990467
SH
419
420 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4
LT
421 /* we are removing child which is pointed to from
422 parent feed - forget the pointer but remember
423 classid */
f4c1f3e0 424 p->un.inner.last_ptr_id[prio] = cl->common.classid;
1da177e4
LT
425 p->un.inner.ptr[prio] = NULL;
426 }
87990467 427
3696f625 428 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
429
430 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
431 mask |= 1 << prio;
432 }
3bf72957 433
1da177e4 434 p->prio_activity &= ~mask;
87990467
SH
435 cl = p;
436 p = cl->parent;
3bf72957 437
1da177e4 438 }
87990467
SH
439 if (cl->cmode == HTB_CAN_SEND && mask)
440 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
441}
442
18a63e86
SH
443static inline long htb_lowater(const struct htb_class *cl)
444{
47083fc0
JDB
445 if (htb_hysteresis)
446 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
447 else
448 return 0;
18a63e86
SH
449}
450static inline long htb_hiwater(const struct htb_class *cl)
451{
47083fc0
JDB
452 if (htb_hysteresis)
453 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
454 else
455 return 0;
18a63e86 456}
47083fc0 457
18a63e86 458
1da177e4
LT
459/**
460 * htb_class_mode - computes and returns current class mode
461 *
462 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
463 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 464 * from now to time when cl will change its state.
1da177e4 465 * Also it is worth to note that class mode doesn't change simply
10297b99 466 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
467 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
468 * mode transitions per time unit. The speed gain is about 1/6.
469 */
87990467
SH
470static inline enum htb_cmode
471htb_class_mode(struct htb_class *cl, long *diff)
1da177e4 472{
87990467 473 long toks;
1da177e4 474
87990467
SH
475 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
476 *diff = -toks;
477 return HTB_CANT_SEND;
478 }
18a63e86 479
87990467
SH
480 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
481 return HTB_CAN_SEND;
1da177e4 482
87990467
SH
483 *diff = -toks;
484 return HTB_MAY_BORROW;
1da177e4
LT
485}
486
487/**
488 * htb_change_class_mode - changes classe's mode
489 *
490 * This should be the only way how to change classe's mode under normal
491 * cirsumstances. Routine will update feed lists linkage, change mode
492 * and add class to the wait event queue if appropriate. New mode should
493 * be different from old one and cl->pq_key has to be valid if changing
494 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
495 */
87990467 496static void
1da177e4 497htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
87990467
SH
498{
499 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
500
501 if (new_mode == cl->cmode)
87990467
SH
502 return;
503
504 if (cl->prio_activity) { /* not necessary: speed optimization */
505 if (cl->cmode != HTB_CANT_SEND)
506 htb_deactivate_prios(q, cl);
1da177e4 507 cl->cmode = new_mode;
87990467
SH
508 if (new_mode != HTB_CANT_SEND)
509 htb_activate_prios(q, cl);
510 } else
1da177e4
LT
511 cl->cmode = new_mode;
512}
513
514/**
10297b99 515 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
516 *
517 * Routine learns (new) priority of leaf and activates feed chain
518 * for the prio. It can be called on already active leaf safely.
519 * It also adds leaf into droplist.
520 */
87990467 521static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 522{
547b792c 523 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 524
1da177e4 525 if (!cl->prio_activity) {
4164d661 526 cl->prio_activity = 1 << cl->un.leaf.prio;
87990467
SH
527 htb_activate_prios(q, cl);
528 list_add_tail(&cl->un.leaf.drop_list,
4164d661 529 q->drops + cl->un.leaf.prio);
1da177e4
LT
530 }
531}
532
533/**
10297b99 534 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
535 *
536 * Make sure that leaf is active. In the other words it can't be called
537 * with non-active leaf. It also removes class from the drop list.
538 */
87990467 539static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 540{
547b792c 541 WARN_ON(!cl->prio_activity);
3bf72957 542
87990467 543 htb_deactivate_prios(q, cl);
1da177e4
LT
544 cl->prio_activity = 0;
545 list_del_init(&cl->un.leaf.drop_list);
546}
547
548static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
549{
f30ab418 550 int uninitialized_var(ret);
87990467
SH
551 struct htb_sched *q = qdisc_priv(sch);
552 struct htb_class *cl = htb_classify(skb, sch, &ret);
553
554 if (cl == HTB_DIRECT) {
555 /* enqueue to helper queue */
556 if (q->direct_queue.qlen < q->direct_qlen) {
557 __skb_queue_tail(&q->direct_queue, skb);
558 q->direct_pkts++;
559 } else {
560 kfree_skb(skb);
561 sch->qstats.drops++;
562 return NET_XMIT_DROP;
563 }
1da177e4 564#ifdef CONFIG_NET_CLS_ACT
87990467 565 } else if (!cl) {
c27f339a 566 if (ret & __NET_XMIT_BYPASS)
87990467
SH
567 sch->qstats.drops++;
568 kfree_skb(skb);
569 return ret;
1da177e4 570#endif
378a2f09
JP
571 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
572 if (net_xmit_drop_count(ret)) {
573 sch->qstats.drops++;
574 cl->qstats.drops++;
575 }
69747650 576 return ret;
87990467 577 } else {
c9726d68
RM
578 cl->bstats.packets +=
579 skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 580 cl->bstats.bytes += qdisc_pkt_len(skb);
87990467
SH
581 htb_activate(q, cl);
582 }
583
584 sch->q.qlen++;
c9726d68 585 sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 586 sch->bstats.bytes += qdisc_pkt_len(skb);
87990467 587 return NET_XMIT_SUCCESS;
1da177e4
LT
588}
589
1da177e4
LT
590/**
591 * htb_charge_class - charges amount "bytes" to leaf and ancestors
592 *
593 * Routine assumes that packet "bytes" long was dequeued from leaf cl
594 * borrowing from "level". It accounts bytes to ceil leaky bucket for
595 * leaf and all ancestors and to rate bucket for ancestors at levels
596 * "level" and higher. It also handles possible change of mode resulting
597 * from the update. Note that mode can also increase here (MAY_BORROW to
598 * CAN_SEND) because we can use more precise clock that event queue here.
599 * In such case we remove class from event queue first.
600 */
87990467 601static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 602 int level, struct sk_buff *skb)
87990467 603{
0abf77e5 604 int bytes = qdisc_pkt_len(skb);
87990467 605 long toks, diff;
1da177e4 606 enum htb_cmode old_mode;
1da177e4
LT
607
608#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
609 if (toks > cl->B) toks = cl->B; \
610 toks -= L2T(cl, cl->R, bytes); \
611 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
612 cl->T = toks
613
614 while (cl) {
03cc45c0 615 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
1da177e4 616 if (cl->level >= level) {
87990467
SH
617 if (cl->level == level)
618 cl->xstats.lends++;
619 HTB_ACCNT(tokens, buffer, rate);
1da177e4
LT
620 } else {
621 cl->xstats.borrows++;
87990467 622 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 623 }
87990467 624 HTB_ACCNT(ctokens, cbuffer, ceil);
1da177e4 625 cl->t_c = q->now;
1da177e4 626
87990467
SH
627 old_mode = cl->cmode;
628 diff = 0;
629 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
630 if (old_mode != cl->cmode) {
631 if (old_mode != HTB_CAN_SEND)
3696f625 632 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 633 if (cl->cmode != HTB_CAN_SEND)
87990467 634 htb_add_to_wait_tree(q, cl, diff);
1da177e4 635 }
1da177e4
LT
636
637 /* update byte stats except for leaves which are already updated */
638 if (cl->level) {
639 cl->bstats.bytes += bytes;
c9726d68
RM
640 cl->bstats.packets += skb_is_gso(skb)?
641 skb_shinfo(skb)->gso_segs:1;
1da177e4
LT
642 }
643 cl = cl->parent;
644 }
645}
646
647/**
648 * htb_do_events - make mode changes to classes at the level
649 *
fb983d45 650 * Scans event queue for pending events and applies them. Returns time of
1da177e4 651 * next pending event (0 for no event in pq).
fb983d45 652 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 653 */
fb983d45 654static psched_time_t htb_do_events(struct htb_sched *q, int level)
1da177e4 655{
8f3ea33a
MD
656 /* don't run for longer than 2 jiffies; 2 is used instead of
657 1 to simplify things when jiffy is going to be incremented
658 too soon */
659 unsigned long stop_at = jiffies + 2;
660 while (time_before(jiffies, stop_at)) {
1da177e4
LT
661 struct htb_class *cl;
662 long diff;
30bdbe39
AM
663 struct rb_node *p = rb_first(&q->wait_pq[level]);
664
87990467
SH
665 if (!p)
666 return 0;
1da177e4
LT
667
668 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
669 if (cl->pq_key > q->now)
670 return cl->pq_key;
671
3696f625 672 htb_safe_rb_erase(p, q->wait_pq + level);
03cc45c0 673 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
87990467 674 htb_change_class_mode(q, cl, &diff);
1da177e4 675 if (cl->cmode != HTB_CAN_SEND)
87990467 676 htb_add_to_wait_tree(q, cl, diff);
1da177e4 677 }
8f3ea33a
MD
678 /* too much load - let's continue on next jiffie */
679 return q->now + PSCHED_TICKS_PER_SEC / HZ;
1da177e4
LT
680}
681
682/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
683 is no such one exists. */
87990467
SH
684static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
685 u32 id)
1da177e4
LT
686{
687 struct rb_node *r = NULL;
688 while (n) {
87990467
SH
689 struct htb_class *cl =
690 rb_entry(n, struct htb_class, node[prio]);
f4c1f3e0 691 if (id == cl->common.classid)
87990467
SH
692 return n;
693
f4c1f3e0 694 if (id > cl->common.classid) {
1da177e4
LT
695 n = n->rb_right;
696 } else {
697 r = n;
698 n = n->rb_left;
699 }
700 }
701 return r;
702}
703
704/**
705 * htb_lookup_leaf - returns next leaf class in DRR order
706 *
707 * Find leaf where current feed pointers points to.
708 */
87990467
SH
709static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
710 struct rb_node **pptr, u32 * pid)
1da177e4
LT
711{
712 int i;
713 struct {
714 struct rb_node *root;
715 struct rb_node **pptr;
716 u32 *pid;
87990467
SH
717 } stk[TC_HTB_MAXDEPTH], *sp = stk;
718
547b792c 719 WARN_ON(!tree->rb_node);
1da177e4
LT
720 sp->root = tree->rb_node;
721 sp->pptr = pptr;
722 sp->pid = pid;
723
724 for (i = 0; i < 65535; i++) {
87990467 725 if (!*sp->pptr && *sp->pid) {
10297b99 726 /* ptr was invalidated but id is valid - try to recover
1da177e4 727 the original or next ptr */
87990467
SH
728 *sp->pptr =
729 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 730 }
87990467
SH
731 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
732 can become out of date quickly */
733 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 734 *sp->pptr = sp->root;
87990467 735 while ((*sp->pptr)->rb_left)
1da177e4
LT
736 *sp->pptr = (*sp->pptr)->rb_left;
737 if (sp > stk) {
738 sp--;
547b792c 739 WARN_ON(!*sp->pptr);
87990467
SH
740 if (!*sp->pptr)
741 return NULL;
742 htb_next_rb_node(sp->pptr);
1da177e4
LT
743 }
744 } else {
745 struct htb_class *cl;
87990467
SH
746 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
747 if (!cl->level)
1da177e4
LT
748 return cl;
749 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
750 sp->pptr = cl->un.inner.ptr + prio;
751 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
752 }
753 }
547b792c 754 WARN_ON(1);
1da177e4
LT
755 return NULL;
756}
757
758/* dequeues packet at given priority and level; call only if
759 you are sure that there is active class at prio/level */
87990467
SH
760static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
761 int level)
1da177e4
LT
762{
763 struct sk_buff *skb = NULL;
87990467 764 struct htb_class *cl, *start;
1da177e4 765 /* look initial class up in the row */
87990467
SH
766 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
767 q->ptr[level] + prio,
768 q->last_ptr_id[level] + prio);
769
1da177e4
LT
770 do {
771next:
547b792c 772 WARN_ON(!cl);
87990467
SH
773 if (!cl)
774 return NULL;
1da177e4
LT
775
776 /* class can be empty - it is unlikely but can be true if leaf
777 qdisc drops packets in enqueue routine or if someone used
10297b99 778 graft operation on the leaf since last dequeue;
1da177e4
LT
779 simply deactivate and skip such class */
780 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
781 struct htb_class *next;
87990467 782 htb_deactivate(q, cl);
1da177e4
LT
783
784 /* row/level might become empty */
785 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 786 return NULL;
1da177e4 787
87990467
SH
788 next = htb_lookup_leaf(q->row[level] + prio,
789 prio, q->ptr[level] + prio,
790 q->last_ptr_id[level] + prio);
791
792 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
793 start = next;
794 cl = next;
795 goto next;
796 }
87990467
SH
797
798 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
799 if (likely(skb != NULL))
1da177e4
LT
800 break;
801 if (!cl->warned) {
87990467
SH
802 printk(KERN_WARNING
803 "htb: class %X isn't work conserving ?!\n",
f4c1f3e0 804 cl->common.classid);
1da177e4
LT
805 cl->warned = 1;
806 }
633fe66e 807
87990467
SH
808 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
809 ptr[0]) + prio);
810 cl = htb_lookup_leaf(q->row[level] + prio, prio,
811 q->ptr[level] + prio,
812 q->last_ptr_id[level] + prio);
1da177e4
LT
813
814 } while (cl != start);
815
816 if (likely(skb != NULL)) {
0abf77e5
JK
817 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
818 if (cl->un.leaf.deficit[level] < 0) {
1da177e4 819 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
87990467
SH
820 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
821 ptr[0]) + prio);
1da177e4
LT
822 }
823 /* this used to be after charge_class but this constelation
824 gives us slightly better performance */
825 if (!cl->un.leaf.q->q.qlen)
87990467 826 htb_deactivate(q, cl);
c9726d68 827 htb_charge_class(q, cl, level, skb);
1da177e4
LT
828 }
829 return skb;
830}
831
1da177e4
LT
832static struct sk_buff *htb_dequeue(struct Qdisc *sch)
833{
834 struct sk_buff *skb = NULL;
835 struct htb_sched *q = qdisc_priv(sch);
836 int level;
fb983d45 837 psched_time_t next_event;
1da177e4
LT
838
839 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
840 skb = __skb_dequeue(&q->direct_queue);
841 if (skb != NULL) {
1da177e4
LT
842 sch->flags &= ~TCQ_F_THROTTLED;
843 sch->q.qlen--;
844 return skb;
845 }
846
87990467
SH
847 if (!sch->q.qlen)
848 goto fin;
3bebcda2 849 q->now = psched_get_time();
1da177e4 850
fb983d45 851 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
633fe66e 852
1da177e4
LT
853 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
854 /* common case optimization - skip event handler quickly */
855 int m;
fb983d45
PM
856 psched_time_t event;
857
858 if (q->now >= q->near_ev_cache[level]) {
859 event = htb_do_events(q, level);
2e4b3b0e
PM
860 if (!event)
861 event = q->now + PSCHED_TICKS_PER_SEC;
862 q->near_ev_cache[level] = event;
1da177e4 863 } else
fb983d45
PM
864 event = q->near_ev_cache[level];
865
866 if (event && next_event > event)
867 next_event = event;
87990467 868
1da177e4
LT
869 m = ~q->row_mask[level];
870 while (m != (int)(-1)) {
87990467 871 int prio = ffz(m);
1da177e4 872 m |= 1 << prio;
87990467 873 skb = htb_dequeue_tree(q, prio, level);
1da177e4
LT
874 if (likely(skb != NULL)) {
875 sch->q.qlen--;
876 sch->flags &= ~TCQ_F_THROTTLED;
877 goto fin;
878 }
879 }
880 }
fb983d45
PM
881 sch->qstats.overlimits++;
882 qdisc_watchdog_schedule(&q->watchdog, next_event);
1da177e4 883fin:
1da177e4
LT
884 return skb;
885}
886
887/* try to drop from each class (by prio) until one succeed */
87990467 888static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
889{
890 struct htb_sched *q = qdisc_priv(sch);
891 int prio;
892
893 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
894 struct list_head *p;
87990467 895 list_for_each(p, q->drops + prio) {
1da177e4
LT
896 struct htb_class *cl = list_entry(p, struct htb_class,
897 un.leaf.drop_list);
898 unsigned int len;
87990467
SH
899 if (cl->un.leaf.q->ops->drop &&
900 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
901 sch->q.qlen--;
902 if (!cl->un.leaf.q->q.qlen)
87990467 903 htb_deactivate(q, cl);
1da177e4
LT
904 return len;
905 }
906 }
907 }
908 return 0;
909}
910
911/* reset all classes */
912/* always caled under BH & queue lock */
87990467 913static void htb_reset(struct Qdisc *sch)
1da177e4
LT
914{
915 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
916 struct htb_class *cl;
917 struct hlist_node *n;
918 unsigned int i;
0cef296d 919
f4c1f3e0
PM
920 for (i = 0; i < q->clhash.hashsize; i++) {
921 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4 922 if (cl->level)
87990467 923 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 924 else {
87990467 925 if (cl->un.leaf.q)
1da177e4
LT
926 qdisc_reset(cl->un.leaf.q);
927 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
928 }
929 cl->prio_activity = 0;
930 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
931
932 }
933 }
fb983d45 934 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
935 __skb_queue_purge(&q->direct_queue);
936 sch->q.qlen = 0;
87990467
SH
937 memset(q->row, 0, sizeof(q->row));
938 memset(q->row_mask, 0, sizeof(q->row_mask));
939 memset(q->wait_pq, 0, sizeof(q->wait_pq));
940 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 941 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 942 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
943}
944
27a3421e
PM
945static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
946 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
947 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
948 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
949 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
950};
951
1e90474c 952static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
953{
954 struct htb_sched *q = qdisc_priv(sch);
1e90474c 955 struct nlattr *tb[TCA_HTB_INIT + 1];
1da177e4 956 struct tc_htb_glob *gopt;
cee63723 957 int err;
1da177e4 958 int i;
cee63723
PM
959
960 if (!opt)
961 return -EINVAL;
962
27a3421e 963 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
cee63723
PM
964 if (err < 0)
965 return err;
966
27a3421e 967 if (tb[TCA_HTB_INIT] == NULL) {
1da177e4
LT
968 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
969 return -EINVAL;
970 }
1e90474c 971 gopt = nla_data(tb[TCA_HTB_INIT]);
1da177e4 972 if (gopt->version != HTB_VER >> 16) {
87990467
SH
973 printk(KERN_ERR
974 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
975 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1da177e4
LT
976 return -EINVAL;
977 }
1da177e4 978
f4c1f3e0
PM
979 err = qdisc_class_hash_init(&q->clhash);
980 if (err < 0)
981 return err;
1da177e4 982 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 983 INIT_LIST_HEAD(q->drops + i);
1da177e4 984
fb983d45 985 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
986 skb_queue_head_init(&q->direct_queue);
987
5ce2d488 988 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
87990467 989 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1da177e4 990 q->direct_qlen = 2;
1da177e4 991
1da177e4
LT
992 if ((q->rate2quantum = gopt->rate2quantum) < 1)
993 q->rate2quantum = 1;
994 q->defcls = gopt->defcls;
995
996 return 0;
997}
998
999static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1000{
102396ae 1001 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4 1002 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1003 struct nlattr *nest;
1da177e4 1004 struct tc_htb_glob gopt;
4b3550ef 1005
7698b4fc 1006 spin_lock_bh(root_lock);
1da177e4 1007
4b3550ef 1008 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1009 gopt.version = HTB_VER;
1010 gopt.rate2quantum = q->rate2quantum;
1011 gopt.defcls = q->defcls;
3bf72957 1012 gopt.debug = 0;
4b3550ef
PM
1013
1014 nest = nla_nest_start(skb, TCA_OPTIONS);
1015 if (nest == NULL)
1016 goto nla_put_failure;
1e90474c 1017 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
4b3550ef
PM
1018 nla_nest_end(skb, nest);
1019
7698b4fc 1020 spin_unlock_bh(root_lock);
1da177e4 1021 return skb->len;
4b3550ef 1022
1e90474c 1023nla_put_failure:
7698b4fc 1024 spin_unlock_bh(root_lock);
4b3550ef 1025 nla_nest_cancel(skb, nest);
1da177e4
LT
1026 return -1;
1027}
1028
1029static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1030 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1031{
87990467 1032 struct htb_class *cl = (struct htb_class *)arg;
102396ae 1033 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
4b3550ef 1034 struct nlattr *nest;
1da177e4
LT
1035 struct tc_htb_opt opt;
1036
7698b4fc 1037 spin_lock_bh(root_lock);
f4c1f3e0
PM
1038 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1039 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1040 if (!cl->level && cl->un.leaf.q)
1041 tcm->tcm_info = cl->un.leaf.q->handle;
1042
4b3550ef
PM
1043 nest = nla_nest_start(skb, TCA_OPTIONS);
1044 if (nest == NULL)
1045 goto nla_put_failure;
1da177e4 1046
87990467 1047 memset(&opt, 0, sizeof(opt));
1da177e4 1048
87990467
SH
1049 opt.rate = cl->rate->rate;
1050 opt.buffer = cl->buffer;
1051 opt.ceil = cl->ceil->rate;
1052 opt.cbuffer = cl->cbuffer;
1053 opt.quantum = cl->un.leaf.quantum;
1054 opt.prio = cl->un.leaf.prio;
1055 opt.level = cl->level;
1e90474c 1056 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
4b3550ef
PM
1057
1058 nla_nest_end(skb, nest);
7698b4fc 1059 spin_unlock_bh(root_lock);
1da177e4 1060 return skb->len;
4b3550ef 1061
1e90474c 1062nla_put_failure:
7698b4fc 1063 spin_unlock_bh(root_lock);
4b3550ef 1064 nla_nest_cancel(skb, nest);
1da177e4
LT
1065 return -1;
1066}
1067
1068static int
87990467 1069htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1070{
87990467 1071 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1072
1da177e4
LT
1073 if (!cl->level && cl->un.leaf.q)
1074 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1075 cl->xstats.tokens = cl->tokens;
1076 cl->xstats.ctokens = cl->ctokens;
1077
1078 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1079 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1080 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1081 return -1;
1082
1083 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1084}
1085
1086static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1087 struct Qdisc **old)
1da177e4 1088{
87990467 1089 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1090
1091 if (cl && !cl->level) {
9f9afec4 1092 if (new == NULL &&
5ce2d488 1093 (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1094 &pfifo_qdisc_ops,
f4c1f3e0 1095 cl->common.classid))
87990467
SH
1096 == NULL)
1097 return -ENOBUFS;
1da177e4 1098 sch_tree_lock(sch);
b94c8afc
PM
1099 *old = cl->un.leaf.q;
1100 cl->un.leaf.q = new;
1101 if (*old != NULL) {
256d61b8 1102 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1103 qdisc_reset(*old);
1104 }
1105 sch_tree_unlock(sch);
1106 return 0;
1107 }
1108 return -ENOENT;
1109}
1110
87990467 1111static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1112{
87990467 1113 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1114 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1115}
1116
256d61b8
PM
1117static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1118{
1119 struct htb_class *cl = (struct htb_class *)arg;
1120
1121 if (cl->un.leaf.q->q.qlen == 0)
1122 htb_deactivate(qdisc_priv(sch), cl);
1123}
1124
1da177e4
LT
1125static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1126{
87990467
SH
1127 struct htb_class *cl = htb_find(classid, sch);
1128 if (cl)
1da177e4
LT
1129 cl->refcnt++;
1130 return (unsigned long)cl;
1131}
1132
160d5e10
JP
1133static inline int htb_parent_last_child(struct htb_class *cl)
1134{
1135 if (!cl->parent)
1136 /* the root class */
1137 return 0;
42077599 1138 if (cl->parent->children > 1)
160d5e10
JP
1139 /* not the last child */
1140 return 0;
160d5e10
JP
1141 return 1;
1142}
1143
3ba08b00
JP
1144static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1145 struct Qdisc *new_q)
160d5e10
JP
1146{
1147 struct htb_class *parent = cl->parent;
1148
547b792c 1149 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1150
3ba08b00
JP
1151 if (parent->cmode != HTB_CAN_SEND)
1152 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1153
160d5e10
JP
1154 parent->level = 0;
1155 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1156 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1157 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1158 parent->un.leaf.quantum = parent->quantum;
1159 parent->un.leaf.prio = parent->prio;
1160 parent->tokens = parent->buffer;
1161 parent->ctokens = parent->cbuffer;
3bebcda2 1162 parent->t_c = psched_get_time();
160d5e10
JP
1163 parent->cmode = HTB_CAN_SEND;
1164}
1165
87990467 1166static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1167{
1da177e4 1168 if (!cl->level) {
547b792c 1169 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1170 qdisc_destroy(cl->un.leaf.q);
1171 }
ee39e10c 1172 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1173 qdisc_put_rtab(cl->rate);
1174 qdisc_put_rtab(cl->ceil);
87990467 1175
ff31ab56 1176 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1177 kfree(cl);
1178}
1179
1180/* always caled under BH & queue lock */
87990467 1181static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1182{
1183 struct htb_sched *q = qdisc_priv(sch);
fbd8f137
PM
1184 struct hlist_node *n, *next;
1185 struct htb_class *cl;
1186 unsigned int i;
1da177e4 1187
fb983d45 1188 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1189 /* This line used to be after htb_destroy_class call below
10297b99 1190 and surprisingly it worked in 2.4. But it must precede it
1da177e4
LT
1191 because filter need its target class alive to be able to call
1192 unbind_filter on it (without Oops). */
ff31ab56 1193 tcf_destroy_chain(&q->filter_list);
87990467 1194
f4c1f3e0
PM
1195 for (i = 0; i < q->clhash.hashsize; i++) {
1196 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1197 tcf_destroy_chain(&cl->filter_list);
1198 }
f4c1f3e0
PM
1199 for (i = 0; i < q->clhash.hashsize; i++) {
1200 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1201 common.hnode)
fbd8f137
PM
1202 htb_destroy_class(sch, cl);
1203 }
f4c1f3e0 1204 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1205 __skb_queue_purge(&q->direct_queue);
1206}
1207
1208static int htb_delete(struct Qdisc *sch, unsigned long arg)
1209{
1210 struct htb_sched *q = qdisc_priv(sch);
87990467 1211 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1212 unsigned int qlen;
160d5e10
JP
1213 struct Qdisc *new_q = NULL;
1214 int last_child = 0;
1da177e4
LT
1215
1216 // TODO: why don't allow to delete subtree ? references ? does
1217 // tc subsys quarantee us that in htb_destroy it holds no class
1218 // refs so that we can remove children safely there ?
42077599 1219 if (cl->children || cl->filter_cnt)
1da177e4 1220 return -EBUSY;
87990467 1221
160d5e10 1222 if (!cl->level && htb_parent_last_child(cl)) {
5ce2d488 1223 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd
DM
1224 &pfifo_qdisc_ops,
1225 cl->parent->common.classid);
160d5e10
JP
1226 last_child = 1;
1227 }
1228
1da177e4 1229 sch_tree_lock(sch);
87990467 1230
814a175e 1231 if (!cl->level) {
256d61b8 1232 qlen = cl->un.leaf.q->q.qlen;
814a175e 1233 qdisc_reset(cl->un.leaf.q);
256d61b8 1234 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1235 }
1236
f4c1f3e0
PM
1237 /* delete from hash and active; remainder in destroy_class */
1238 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1239 if (cl->parent)
1240 cl->parent->children--;
c38c83cb 1241
1da177e4 1242 if (cl->prio_activity)
87990467 1243 htb_deactivate(q, cl);
1da177e4 1244
fbd8f137
PM
1245 if (cl->cmode != HTB_CAN_SEND)
1246 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1247
160d5e10 1248 if (last_child)
3ba08b00 1249 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1250
1da177e4 1251 if (--cl->refcnt == 0)
87990467 1252 htb_destroy_class(sch, cl);
1da177e4
LT
1253
1254 sch_tree_unlock(sch);
1255 return 0;
1256}
1257
1258static void htb_put(struct Qdisc *sch, unsigned long arg)
1259{
87990467 1260 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1261
1262 if (--cl->refcnt == 0)
87990467 1263 htb_destroy_class(sch, cl);
1da177e4
LT
1264}
1265
87990467 1266static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1267 u32 parentid, struct nlattr **tca,
87990467 1268 unsigned long *arg)
1da177e4
LT
1269{
1270 int err = -EINVAL;
1271 struct htb_sched *q = qdisc_priv(sch);
87990467 1272 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1273 struct nlattr *opt = tca[TCA_OPTIONS];
1da177e4 1274 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1e90474c 1275 struct nlattr *tb[TCA_HTB_RTAB + 1];
1da177e4
LT
1276 struct tc_htb_opt *hopt;
1277
1278 /* extract all subattrs from opt attr */
cee63723
PM
1279 if (!opt)
1280 goto failure;
1281
27a3421e 1282 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
cee63723
PM
1283 if (err < 0)
1284 goto failure;
1285
1286 err = -EINVAL;
27a3421e 1287 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1288 goto failure;
1da177e4 1289
87990467
SH
1290 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1291
1e90474c 1292 hopt = nla_data(tb[TCA_HTB_PARMS]);
3bf72957 1293
1e90474c
PM
1294 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1295 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
87990467
SH
1296 if (!rtab || !ctab)
1297 goto failure;
1da177e4 1298
87990467 1299 if (!cl) { /* new class */
1da177e4 1300 struct Qdisc *new_q;
3696f625 1301 int prio;
ee39e10c 1302 struct {
1e90474c 1303 struct nlattr nla;
ee39e10c
PM
1304 struct gnet_estimator opt;
1305 } est = {
1e90474c
PM
1306 .nla = {
1307 .nla_len = nla_attr_size(sizeof(est.opt)),
1308 .nla_type = TCA_RATE,
ee39e10c
PM
1309 },
1310 .opt = {
1311 /* 4s interval, 16s averaging constant */
1312 .interval = 2,
1313 .ewma_log = 2,
1314 },
1315 };
3696f625 1316
1da177e4 1317 /* check for valid classid */
87990467
SH
1318 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1319 || htb_find(classid, sch))
1da177e4
LT
1320 goto failure;
1321
1322 /* check maximal depth */
1323 if (parent && parent->parent && parent->parent->level < 2) {
1324 printk(KERN_ERR "htb: tree is too deep\n");
1325 goto failure;
1326 }
1327 err = -ENOBUFS;
0da974f4 1328 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1da177e4 1329 goto failure;
87990467 1330
71bcb09a
SH
1331 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1332 qdisc_root_sleeping_lock(sch),
1333 tca[TCA_RATE] ? : &est.nla);
1334 if (err) {
1335 kfree(cl);
1336 goto failure;
1337 }
1338
1da177e4 1339 cl->refcnt = 1;
42077599 1340 cl->children = 0;
1da177e4 1341 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1342 RB_CLEAR_NODE(&cl->pq_node);
1343
1344 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1345 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1346
1347 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1348 so that can't be used inside of sch_tree_lock
1349 -- thanks to Karlis Peisenieks */
5ce2d488 1350 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1351 &pfifo_qdisc_ops, classid);
1da177e4
LT
1352 sch_tree_lock(sch);
1353 if (parent && !parent->level) {
256d61b8
PM
1354 unsigned int qlen = parent->un.leaf.q->q.qlen;
1355
1da177e4 1356 /* turn parent into inner node */
256d61b8
PM
1357 qdisc_reset(parent->un.leaf.q);
1358 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1359 qdisc_destroy(parent->un.leaf.q);
1360 if (parent->prio_activity)
1361 htb_deactivate(q, parent);
1da177e4
LT
1362
1363 /* remove from evt list because of level change */
1364 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1365 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1366 parent->cmode = HTB_CAN_SEND;
1367 }
1368 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1369 : TC_HTB_MAXDEPTH) - 1;
1370 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1371 }
1372 /* leaf (we) needs elementary qdisc */
1373 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1374
f4c1f3e0 1375 cl->common.classid = classid;
87990467 1376 cl->parent = parent;
1da177e4
LT
1377
1378 /* set class to be in HTB_CAN_SEND state */
1379 cl->tokens = hopt->buffer;
1380 cl->ctokens = hopt->cbuffer;
00c04af9 1381 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
3bebcda2 1382 cl->t_c = psched_get_time();
1da177e4
LT
1383 cl->cmode = HTB_CAN_SEND;
1384
1385 /* attach to the hash list and parent's family */
f4c1f3e0 1386 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1387 if (parent)
1388 parent->children++;
ee39e10c 1389 } else {
71bcb09a
SH
1390 if (tca[TCA_RATE]) {
1391 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1392 qdisc_root_sleeping_lock(sch),
1393 tca[TCA_RATE]);
1394 if (err)
1395 return err;
1396 }
87990467 1397 sch_tree_lock(sch);
ee39e10c 1398 }
1da177e4
LT
1399
1400 /* it used to be a nasty bug here, we have to check that node
87990467 1401 is really leaf before changing cl->un.leaf ! */
1da177e4
LT
1402 if (!cl->level) {
1403 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1404 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
87990467
SH
1405 printk(KERN_WARNING
1406 "HTB: quantum of class %X is small. Consider r2q change.\n",
f4c1f3e0 1407 cl->common.classid);
1da177e4
LT
1408 cl->un.leaf.quantum = 1000;
1409 }
1410 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
87990467
SH
1411 printk(KERN_WARNING
1412 "HTB: quantum of class %X is big. Consider r2q change.\n",
f4c1f3e0 1413 cl->common.classid);
1da177e4
LT
1414 cl->un.leaf.quantum = 200000;
1415 }
1416 if (hopt->quantum)
1417 cl->un.leaf.quantum = hopt->quantum;
1418 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1419 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
160d5e10
JP
1420
1421 /* backup for htb_parent_to_leaf */
1422 cl->quantum = cl->un.leaf.quantum;
1423 cl->prio = cl->un.leaf.prio;
1da177e4
LT
1424 }
1425
1426 cl->buffer = hopt->buffer;
1427 cl->cbuffer = hopt->cbuffer;
87990467
SH
1428 if (cl->rate)
1429 qdisc_put_rtab(cl->rate);
1430 cl->rate = rtab;
1431 if (cl->ceil)
1432 qdisc_put_rtab(cl->ceil);
1433 cl->ceil = ctab;
1da177e4
LT
1434 sch_tree_unlock(sch);
1435
f4c1f3e0
PM
1436 qdisc_class_hash_grow(sch, &q->clhash);
1437
1da177e4
LT
1438 *arg = (unsigned long)cl;
1439 return 0;
1440
1441failure:
87990467
SH
1442 if (rtab)
1443 qdisc_put_rtab(rtab);
1444 if (ctab)
1445 qdisc_put_rtab(ctab);
1da177e4
LT
1446 return err;
1447}
1448
1449static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1450{
1451 struct htb_sched *q = qdisc_priv(sch);
1452 struct htb_class *cl = (struct htb_class *)arg;
1453 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1454
1da177e4
LT
1455 return fl;
1456}
1457
1458static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1459 u32 classid)
1da177e4 1460{
87990467 1461 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1462
1da177e4 1463 /*if (cl && !cl->level) return 0;
87990467
SH
1464 The line above used to be there to prevent attaching filters to
1465 leaves. But at least tc_index filter uses this just to get class
1466 for other reasons so that we have to allow for it.
1467 ----
1468 19.6.2002 As Werner explained it is ok - bind filter is just
1469 another way to "lock" the class - unlike "get" this lock can
1470 be broken by class during destroy IIUC.
1da177e4 1471 */
87990467
SH
1472 if (cl)
1473 cl->filter_cnt++;
1da177e4
LT
1474 return (unsigned long)cl;
1475}
1476
1477static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1478{
1da177e4 1479 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1480
87990467
SH
1481 if (cl)
1482 cl->filter_cnt--;
1da177e4
LT
1483}
1484
1485static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1486{
1487 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
1488 struct htb_class *cl;
1489 struct hlist_node *n;
1490 unsigned int i;
1da177e4
LT
1491
1492 if (arg->stop)
1493 return;
1494
f4c1f3e0
PM
1495 for (i = 0; i < q->clhash.hashsize; i++) {
1496 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1497 if (arg->count < arg->skip) {
1498 arg->count++;
1499 continue;
1500 }
1501 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1502 arg->stop = 1;
1503 return;
1504 }
1505 arg->count++;
1506 }
1507 }
1508}
1509
20fea08b 1510static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1511 .graft = htb_graft,
1512 .leaf = htb_leaf,
256d61b8 1513 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1514 .get = htb_get,
1515 .put = htb_put,
1516 .change = htb_change_class,
1517 .delete = htb_delete,
1518 .walk = htb_walk,
1519 .tcf_chain = htb_find_tcf,
1520 .bind_tcf = htb_bind_filter,
1521 .unbind_tcf = htb_unbind_filter,
1522 .dump = htb_dump_class,
1523 .dump_stats = htb_dump_class_stats,
1524};
1525
20fea08b 1526static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1527 .next = NULL,
1528 .cl_ops = &htb_class_ops,
1529 .id = "htb",
1530 .priv_size = sizeof(struct htb_sched),
1531 .enqueue = htb_enqueue,
1532 .dequeue = htb_dequeue,
77be155c 1533 .peek = qdisc_peek_dequeued,
1da177e4
LT
1534 .drop = htb_drop,
1535 .init = htb_init,
1536 .reset = htb_reset,
1537 .destroy = htb_destroy,
1538 .change = NULL /* htb_change */,
1539 .dump = htb_dump,
1540 .owner = THIS_MODULE,
1541};
1542
1543static int __init htb_module_init(void)
1544{
87990467 1545 return register_qdisc(&htb_qdisc_ops);
1da177e4 1546}
87990467 1547static void __exit htb_module_exit(void)
1da177e4 1548{
87990467 1549 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1550}
87990467 1551
1da177e4
LT
1552module_init(htb_module_init)
1553module_exit(htb_module_exit)
1554MODULE_LICENSE("GPL");