[NET_SCHED]: sch_htb: use hrtimer based watchdog
[deliverable/linux.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
27 *
28 * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
29 */
1da177e4
LT
30#include <linux/module.h>
31#include <asm/uaccess.h>
32#include <asm/system.h>
33#include <linux/bitops.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
1da177e4
LT
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_ether.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/etherdevice.h>
47#include <linux/notifier.h>
48#include <net/ip.h>
49#include <net/route.h>
50#include <linux/skbuff.h>
51#include <linux/list.h>
52#include <linux/compiler.h>
53#include <net/sock.h>
54#include <net/pkt_sched.h>
55#include <linux/rbtree.h>
56
57/* HTB algorithm.
58 Author: devik@cdi.cz
59 ========================================================================
60 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 61 it allows to assign priority to each class in hierarchy.
1da177e4
LT
62 In fact it is another implementation of Floyd's formal sharing.
63
64 Levels:
10297b99 65 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
66 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
67 one less than their parent.
68*/
69
87990467
SH
70#define HTB_HSIZE 16 /* classid hash size */
71#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
72#define HTB_RATECM 1 /* whether to use rate computer */
73#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */
74#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
75
76#if HTB_VER >> 16 != TC_HTB_PROTOVER
77#error "Mismatched sch_htb.c and pkt_sch.h"
78#endif
79
1da177e4
LT
80/* used internaly to keep status of single class */
81enum htb_cmode {
87990467
SH
82 HTB_CANT_SEND, /* class can't send and can't borrow */
83 HTB_MAY_BORROW, /* class can't send but may borrow */
84 HTB_CAN_SEND /* class can send */
1da177e4
LT
85};
86
87/* interior & leaf nodes; props specific to leaves are marked L: */
87990467
SH
88struct htb_class {
89 /* general class parameters */
90 u32 classid;
91 struct gnet_stats_basic bstats;
92 struct gnet_stats_queue qstats;
93 struct gnet_stats_rate_est rate_est;
94 struct tc_htb_xstats xstats; /* our special stats */
95 int refcnt; /* usage count of this class */
1da177e4
LT
96
97#ifdef HTB_RATECM
87990467
SH
98 /* rate measurement counters */
99 unsigned long rate_bytes, sum_bytes;
100 unsigned long rate_packets, sum_packets;
1da177e4
LT
101#endif
102
87990467
SH
103 /* topology */
104 int level; /* our level (see above) */
105 struct htb_class *parent; /* parent class */
0cef296d 106 struct hlist_node hlist; /* classid hash list item */
87990467
SH
107 struct list_head sibling; /* sibling list item */
108 struct list_head children; /* children list */
109
110 union {
111 struct htb_class_leaf {
112 struct Qdisc *q;
113 int prio;
114 int aprio;
115 int quantum;
116 int deficit[TC_HTB_MAXDEPTH];
117 struct list_head drop_list;
118 } leaf;
119 struct htb_class_inner {
120 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
121 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
122 /* When class changes from state 1->2 and disconnects from
123 parent's feed then we lost ptr value and start from the
124 first child again. Here we store classid of the
125 last valid ptr (used when ptr is NULL). */
126 u32 last_ptr_id[TC_HTB_NUMPRIO];
127 } inner;
128 } un;
129 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
130 struct rb_node pq_node; /* node for event queue */
fb983d45 131 psched_time_t pq_key;
87990467
SH
132
133 int prio_activity; /* for which prios are we active */
134 enum htb_cmode cmode; /* current mode of the class */
135
136 /* class attached filters */
137 struct tcf_proto *filter_list;
138 int filter_cnt;
139
140 int warned; /* only one warning about non work conserving .. */
141
142 /* token bucket parameters */
143 struct qdisc_rate_table *rate; /* rate table of the class itself */
144 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
145 long buffer, cbuffer; /* token bucket depth/rate */
146 psched_tdiff_t mbuffer; /* max wait time */
147 long tokens, ctokens; /* current number of tokens */
148 psched_time_t t_c; /* checkpoint time */
160d5e10
JP
149
150 int prio; /* For parent to leaf return possible here */
151 int quantum; /* we do backup. Finally full replacement */
152 /* of un.leaf originals should be done. */
1da177e4
LT
153};
154
155/* TODO: maybe compute rate when size is too large .. or drop ? */
87990467
SH
156static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
157 int size)
158{
159 int slot = size >> rate->rate.cell_log;
160 if (slot > 255) {
161 cl->xstats.giants++;
162 slot = 255;
163 }
164 return rate->data[slot];
1da177e4
LT
165}
166
87990467
SH
167struct htb_sched {
168 struct list_head root; /* root classes list */
0cef296d
SH
169 struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */
170 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
171
172 /* self list - roots of self generating tree */
173 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
174 int row_mask[TC_HTB_MAXDEPTH];
175 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
176 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 177
87990467
SH
178 /* self wait list - roots of wait PQs per row */
179 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 180
87990467 181 /* time of nearest event per level (row) */
fb983d45 182 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 183
87990467
SH
184 /* whether we hit non-work conserving class during this dequeue; we use */
185 int nwc_hit; /* this to disable mindelay complaint in dequeue */
1da177e4 186
87990467 187 int defcls; /* class where unclassified flows go to */
1da177e4 188
87990467
SH
189 /* filters for qdisc itself */
190 struct tcf_proto *filter_list;
191 int filter_cnt;
1da177e4 192
87990467
SH
193 int rate2quantum; /* quant = rate / rate2quantum */
194 psched_time_t now; /* cached dequeue time */
fb983d45 195 struct qdisc_watchdog watchdog;
1da177e4 196#ifdef HTB_RATECM
87990467
SH
197 struct timer_list rttim; /* rate computer timer */
198 int recmp_bucket; /* which hash bucket to recompute next */
1da177e4 199#endif
1da177e4 200
87990467
SH
201 /* non shaped skbs; let them go directly thru */
202 struct sk_buff_head direct_queue;
203 int direct_qlen; /* max qlen of above */
204
205 long direct_pkts;
1da177e4
LT
206};
207
208/* compute hash of size HTB_HSIZE for given handle */
87990467 209static inline int htb_hash(u32 h)
1da177e4
LT
210{
211#if HTB_HSIZE != 16
87990467 212#error "Declare new hash for your HTB_HSIZE"
1da177e4 213#endif
87990467
SH
214 h ^= h >> 8; /* stolen from cbq_hash */
215 h ^= h >> 4;
216 return h & 0xf;
1da177e4
LT
217}
218
219/* find class in global hash table using given handle */
87990467 220static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
221{
222 struct htb_sched *q = qdisc_priv(sch);
0cef296d
SH
223 struct hlist_node *p;
224 struct htb_class *cl;
225
87990467 226 if (TC_H_MAJ(handle) != sch->handle)
1da177e4 227 return NULL;
87990467 228
0cef296d 229 hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
1da177e4
LT
230 if (cl->classid == handle)
231 return cl;
232 }
233 return NULL;
234}
235
236/**
237 * htb_classify - classify a packet into class
238 *
239 * It returns NULL if the packet should be dropped or -1 if the packet
240 * should be passed directly thru. In all other cases leaf class is returned.
241 * We allow direct class selection by classid in priority. The we examine
242 * filters in qdisc and in inner nodes (if higher filter points to the inner
243 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 244 * internal fifo (direct). These packets then go directly thru. If we still
1da177e4
LT
245 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
246 * then finish and return direct queue.
247 */
248#define HTB_DIRECT (struct htb_class*)-1
249static inline u32 htb_classid(struct htb_class *cl)
250{
251 return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
252}
253
87990467
SH
254static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
255 int *qerr)
1da177e4
LT
256{
257 struct htb_sched *q = qdisc_priv(sch);
258 struct htb_class *cl;
259 struct tcf_result res;
260 struct tcf_proto *tcf;
261 int result;
262
263 /* allow to select class by setting skb->priority to valid classid;
264 note that nfmark can be used too by attaching filter fw with no
265 rules in it */
266 if (skb->priority == sch->handle)
87990467
SH
267 return HTB_DIRECT; /* X:0 (direct flow) selected */
268 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
1da177e4
LT
269 return cl;
270
29f1df6c 271 *qerr = NET_XMIT_BYPASS;
1da177e4
LT
272 tcf = q->filter_list;
273 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
274#ifdef CONFIG_NET_CLS_ACT
275 switch (result) {
276 case TC_ACT_QUEUED:
87990467 277 case TC_ACT_STOLEN:
1da177e4
LT
278 *qerr = NET_XMIT_SUCCESS;
279 case TC_ACT_SHOT:
280 return NULL;
281 }
282#elif defined(CONFIG_NET_CLS_POLICE)
283 if (result == TC_POLICE_SHOT)
284 return HTB_DIRECT;
285#endif
87990467 286 if ((cl = (void *)res.class) == NULL) {
1da177e4 287 if (res.classid == sch->handle)
87990467
SH
288 return HTB_DIRECT; /* X:0 (direct flow) */
289 if ((cl = htb_find(res.classid, sch)) == NULL)
290 break; /* filter selected invalid classid */
1da177e4
LT
291 }
292 if (!cl->level)
87990467 293 return cl; /* we hit leaf; return it */
1da177e4
LT
294
295 /* we have got inner class; apply inner filter chain */
296 tcf = cl->filter_list;
297 }
298 /* classification failed; try to use default class */
87990467 299 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 300 if (!cl || cl->level)
87990467 301 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
302 return cl;
303}
304
1da177e4
LT
305/**
306 * htb_add_to_id_tree - adds class to the round robin list
307 *
308 * Routine adds class to the list (actually tree) sorted by classid.
309 * Make sure that class is not already on such list for given prio.
310 */
87990467
SH
311static void htb_add_to_id_tree(struct rb_root *root,
312 struct htb_class *cl, int prio)
1da177e4
LT
313{
314 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 315
1da177e4 316 while (*p) {
87990467
SH
317 struct htb_class *c;
318 parent = *p;
1da177e4 319 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 320
1da177e4
LT
321 if (cl->classid > c->classid)
322 p = &parent->rb_right;
87990467 323 else
1da177e4
LT
324 p = &parent->rb_left;
325 }
326 rb_link_node(&cl->node[prio], parent, p);
327 rb_insert_color(&cl->node[prio], root);
328}
329
330/**
331 * htb_add_to_wait_tree - adds class to the event queue with delay
332 *
333 * The class is added to priority event queue to indicate that class will
334 * change its mode in cl->pq_key microseconds. Make sure that class is not
335 * already in the queue.
336 */
87990467
SH
337static void htb_add_to_wait_tree(struct htb_sched *q,
338 struct htb_class *cl, long delay)
1da177e4
LT
339{
340 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 341
fb983d45
PM
342 cl->pq_key = q->now + delay;
343 if (cl->pq_key == q->now)
1da177e4
LT
344 cl->pq_key++;
345
346 /* update the nearest event cache */
fb983d45 347 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 348 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 349
1da177e4 350 while (*p) {
87990467
SH
351 struct htb_class *c;
352 parent = *p;
1da177e4 353 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 354 if (cl->pq_key >= c->pq_key)
1da177e4 355 p = &parent->rb_right;
87990467 356 else
1da177e4
LT
357 p = &parent->rb_left;
358 }
359 rb_link_node(&cl->pq_node, parent, p);
360 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
361}
362
363/**
364 * htb_next_rb_node - finds next node in binary tree
365 *
366 * When we are past last key we return NULL.
367 * Average complexity is 2 steps per call.
368 */
3696f625 369static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
370{
371 *n = rb_next(*n);
372}
373
374/**
375 * htb_add_class_to_row - add class to its row
376 *
377 * The class is added to row at priorities marked in mask.
378 * It does nothing if mask == 0.
379 */
87990467
SH
380static inline void htb_add_class_to_row(struct htb_sched *q,
381 struct htb_class *cl, int mask)
1da177e4 382{
1da177e4
LT
383 q->row_mask[cl->level] |= mask;
384 while (mask) {
385 int prio = ffz(~mask);
386 mask &= ~(1 << prio);
87990467 387 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
388 }
389}
390
3696f625
SH
391/* If this triggers, it is a bug in this code, but it need not be fatal */
392static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
393{
81771b3b 394 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
395 WARN_ON(1);
396 } else {
397 rb_erase(rb, root);
398 RB_CLEAR_NODE(rb);
399 }
400}
401
402
1da177e4
LT
403/**
404 * htb_remove_class_from_row - removes class from its row
405 *
406 * The class is removed from row at priorities marked in mask.
407 * It does nothing if mask == 0.
408 */
87990467
SH
409static inline void htb_remove_class_from_row(struct htb_sched *q,
410 struct htb_class *cl, int mask)
1da177e4
LT
411{
412 int m = 0;
3bf72957 413
1da177e4
LT
414 while (mask) {
415 int prio = ffz(~mask);
3696f625 416
1da177e4 417 mask &= ~(1 << prio);
87990467
SH
418 if (q->ptr[cl->level][prio] == cl->node + prio)
419 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
420
421 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 422 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
423 m |= 1 << prio;
424 }
1da177e4
LT
425 q->row_mask[cl->level] &= ~m;
426}
427
428/**
429 * htb_activate_prios - creates active classe's feed chain
430 *
431 * The class is connected to ancestors and/or appropriate rows
10297b99 432 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
433 * (activated) mode. It does nothing if cl->prio_activity == 0.
434 */
87990467 435static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
436{
437 struct htb_class *p = cl->parent;
87990467 438 long m, mask = cl->prio_activity;
1da177e4
LT
439
440 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
441 m = mask;
442 while (m) {
1da177e4
LT
443 int prio = ffz(~m);
444 m &= ~(1 << prio);
87990467 445
1da177e4
LT
446 if (p->un.inner.feed[prio].rb_node)
447 /* parent already has its feed in use so that
448 reset bit in mask as parent is already ok */
449 mask &= ~(1 << prio);
87990467
SH
450
451 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 452 }
1da177e4 453 p->prio_activity |= mask;
87990467
SH
454 cl = p;
455 p = cl->parent;
3bf72957 456
1da177e4
LT
457 }
458 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 459 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
460}
461
462/**
463 * htb_deactivate_prios - remove class from feed chain
464 *
10297b99 465 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
466 * nothing if cl->prio_activity == 0. Class is removed from all feed
467 * chains and rows.
468 */
469static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
470{
471 struct htb_class *p = cl->parent;
87990467 472 long m, mask = cl->prio_activity;
1da177e4
LT
473
474 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
475 m = mask;
476 mask = 0;
1da177e4
LT
477 while (m) {
478 int prio = ffz(~m);
479 m &= ~(1 << prio);
87990467
SH
480
481 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4
LT
482 /* we are removing child which is pointed to from
483 parent feed - forget the pointer but remember
484 classid */
485 p->un.inner.last_ptr_id[prio] = cl->classid;
486 p->un.inner.ptr[prio] = NULL;
487 }
87990467 488
3696f625 489 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
490
491 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
492 mask |= 1 << prio;
493 }
3bf72957 494
1da177e4 495 p->prio_activity &= ~mask;
87990467
SH
496 cl = p;
497 p = cl->parent;
3bf72957 498
1da177e4 499 }
87990467
SH
500 if (cl->cmode == HTB_CAN_SEND && mask)
501 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
502}
503
18a63e86
SH
504#if HTB_HYSTERESIS
505static inline long htb_lowater(const struct htb_class *cl)
506{
507 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
508}
509static inline long htb_hiwater(const struct htb_class *cl)
510{
511 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
512}
513#else
514#define htb_lowater(cl) (0)
515#define htb_hiwater(cl) (0)
516#endif
517
1da177e4
LT
518/**
519 * htb_class_mode - computes and returns current class mode
520 *
521 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
522 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 523 * from now to time when cl will change its state.
1da177e4 524 * Also it is worth to note that class mode doesn't change simply
10297b99 525 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
526 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
527 * mode transitions per time unit. The speed gain is about 1/6.
528 */
87990467
SH
529static inline enum htb_cmode
530htb_class_mode(struct htb_class *cl, long *diff)
1da177e4 531{
87990467 532 long toks;
1da177e4 533
87990467
SH
534 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
535 *diff = -toks;
536 return HTB_CANT_SEND;
537 }
18a63e86 538
87990467
SH
539 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
540 return HTB_CAN_SEND;
1da177e4 541
87990467
SH
542 *diff = -toks;
543 return HTB_MAY_BORROW;
1da177e4
LT
544}
545
546/**
547 * htb_change_class_mode - changes classe's mode
548 *
549 * This should be the only way how to change classe's mode under normal
550 * cirsumstances. Routine will update feed lists linkage, change mode
551 * and add class to the wait event queue if appropriate. New mode should
552 * be different from old one and cl->pq_key has to be valid if changing
553 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
554 */
87990467 555static void
1da177e4 556htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
87990467
SH
557{
558 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
559
560 if (new_mode == cl->cmode)
87990467
SH
561 return;
562
563 if (cl->prio_activity) { /* not necessary: speed optimization */
564 if (cl->cmode != HTB_CANT_SEND)
565 htb_deactivate_prios(q, cl);
1da177e4 566 cl->cmode = new_mode;
87990467
SH
567 if (new_mode != HTB_CANT_SEND)
568 htb_activate_prios(q, cl);
569 } else
1da177e4
LT
570 cl->cmode = new_mode;
571}
572
573/**
10297b99 574 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
575 *
576 * Routine learns (new) priority of leaf and activates feed chain
577 * for the prio. It can be called on already active leaf safely.
578 * It also adds leaf into droplist.
579 */
87990467 580static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
581{
582 BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
3bf72957 583
1da177e4
LT
584 if (!cl->prio_activity) {
585 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
87990467
SH
586 htb_activate_prios(q, cl);
587 list_add_tail(&cl->un.leaf.drop_list,
588 q->drops + cl->un.leaf.aprio);
1da177e4
LT
589 }
590}
591
592/**
10297b99 593 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
594 *
595 * Make sure that leaf is active. In the other words it can't be called
596 * with non-active leaf. It also removes class from the drop list.
597 */
87990467 598static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
599{
600 BUG_TRAP(cl->prio_activity);
3bf72957 601
87990467 602 htb_deactivate_prios(q, cl);
1da177e4
LT
603 cl->prio_activity = 0;
604 list_del_init(&cl->un.leaf.drop_list);
605}
606
607static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
608{
87990467
SH
609 int ret;
610 struct htb_sched *q = qdisc_priv(sch);
611 struct htb_class *cl = htb_classify(skb, sch, &ret);
612
613 if (cl == HTB_DIRECT) {
614 /* enqueue to helper queue */
615 if (q->direct_queue.qlen < q->direct_qlen) {
616 __skb_queue_tail(&q->direct_queue, skb);
617 q->direct_pkts++;
618 } else {
619 kfree_skb(skb);
620 sch->qstats.drops++;
621 return NET_XMIT_DROP;
622 }
1da177e4 623#ifdef CONFIG_NET_CLS_ACT
87990467
SH
624 } else if (!cl) {
625 if (ret == NET_XMIT_BYPASS)
626 sch->qstats.drops++;
627 kfree_skb(skb);
628 return ret;
1da177e4 629#endif
87990467
SH
630 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
631 NET_XMIT_SUCCESS) {
632 sch->qstats.drops++;
633 cl->qstats.drops++;
634 return NET_XMIT_DROP;
635 } else {
636 cl->bstats.packets++;
637 cl->bstats.bytes += skb->len;
638 htb_activate(q, cl);
639 }
640
641 sch->q.qlen++;
642 sch->bstats.packets++;
643 sch->bstats.bytes += skb->len;
644 return NET_XMIT_SUCCESS;
1da177e4
LT
645}
646
647/* TODO: requeuing packet charges it to policers again !! */
648static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
649{
87990467
SH
650 struct htb_sched *q = qdisc_priv(sch);
651 int ret = NET_XMIT_SUCCESS;
652 struct htb_class *cl = htb_classify(skb, sch, &ret);
653 struct sk_buff *tskb;
654
655 if (cl == HTB_DIRECT || !cl) {
656 /* enqueue to helper queue */
657 if (q->direct_queue.qlen < q->direct_qlen && cl) {
658 __skb_queue_head(&q->direct_queue, skb);
659 } else {
660 __skb_queue_head(&q->direct_queue, skb);
661 tskb = __skb_dequeue_tail(&q->direct_queue);
662 kfree_skb(tskb);
663 sch->qstats.drops++;
664 return NET_XMIT_CN;
665 }
666 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
667 NET_XMIT_SUCCESS) {
668 sch->qstats.drops++;
669 cl->qstats.drops++;
670 return NET_XMIT_DROP;
671 } else
672 htb_activate(q, cl);
673
674 sch->q.qlen++;
675 sch->qstats.requeues++;
676 return NET_XMIT_SUCCESS;
1da177e4
LT
677}
678
1da177e4
LT
679#ifdef HTB_RATECM
680#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
681static void htb_rate_timer(unsigned long arg)
682{
87990467 683 struct Qdisc *sch = (struct Qdisc *)arg;
1da177e4 684 struct htb_sched *q = qdisc_priv(sch);
0cef296d
SH
685 struct hlist_node *p;
686 struct htb_class *cl;
687
1da177e4
LT
688
689 /* lock queue so that we can muck with it */
9ac961ee 690 spin_lock_bh(&sch->dev->queue_lock);
1da177e4
LT
691
692 q->rttim.expires = jiffies + HZ;
693 add_timer(&q->rttim);
694
695 /* scan and recompute one bucket at time */
87990467 696 if (++q->recmp_bucket >= HTB_HSIZE)
1da177e4 697 q->recmp_bucket = 0;
3bf72957 698
0cef296d 699 hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
87990467
SH
700 RT_GEN(cl->sum_bytes, cl->rate_bytes);
701 RT_GEN(cl->sum_packets, cl->rate_packets);
1da177e4 702 }
9ac961ee 703 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
704}
705#endif
706
707/**
708 * htb_charge_class - charges amount "bytes" to leaf and ancestors
709 *
710 * Routine assumes that packet "bytes" long was dequeued from leaf cl
711 * borrowing from "level". It accounts bytes to ceil leaky bucket for
712 * leaf and all ancestors and to rate bucket for ancestors at levels
713 * "level" and higher. It also handles possible change of mode resulting
714 * from the update. Note that mode can also increase here (MAY_BORROW to
715 * CAN_SEND) because we can use more precise clock that event queue here.
716 * In such case we remove class from event queue first.
717 */
87990467
SH
718static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
719 int level, int bytes)
720{
721 long toks, diff;
1da177e4 722 enum htb_cmode old_mode;
1da177e4
LT
723
724#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
725 if (toks > cl->B) toks = cl->B; \
726 toks -= L2T(cl, cl->R, bytes); \
727 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
728 cl->T = toks
729
730 while (cl) {
87990467 731 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
1da177e4 732 if (cl->level >= level) {
87990467
SH
733 if (cl->level == level)
734 cl->xstats.lends++;
735 HTB_ACCNT(tokens, buffer, rate);
1da177e4
LT
736 } else {
737 cl->xstats.borrows++;
87990467 738 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 739 }
87990467 740 HTB_ACCNT(ctokens, cbuffer, ceil);
1da177e4 741 cl->t_c = q->now;
1da177e4 742
87990467
SH
743 old_mode = cl->cmode;
744 diff = 0;
745 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
746 if (old_mode != cl->cmode) {
747 if (old_mode != HTB_CAN_SEND)
3696f625 748 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 749 if (cl->cmode != HTB_CAN_SEND)
87990467 750 htb_add_to_wait_tree(q, cl, diff);
1da177e4 751 }
1da177e4
LT
752#ifdef HTB_RATECM
753 /* update rate counters */
87990467
SH
754 cl->sum_bytes += bytes;
755 cl->sum_packets++;
1da177e4
LT
756#endif
757
758 /* update byte stats except for leaves which are already updated */
759 if (cl->level) {
760 cl->bstats.bytes += bytes;
761 cl->bstats.packets++;
762 }
763 cl = cl->parent;
764 }
765}
766
767/**
768 * htb_do_events - make mode changes to classes at the level
769 *
fb983d45 770 * Scans event queue for pending events and applies them. Returns time of
1da177e4 771 * next pending event (0 for no event in pq).
fb983d45 772 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 773 */
fb983d45 774static psched_time_t htb_do_events(struct htb_sched *q, int level)
1da177e4
LT
775{
776 int i;
3bf72957 777
1da177e4
LT
778 for (i = 0; i < 500; i++) {
779 struct htb_class *cl;
780 long diff;
30bdbe39
AM
781 struct rb_node *p = rb_first(&q->wait_pq[level]);
782
87990467
SH
783 if (!p)
784 return 0;
1da177e4
LT
785
786 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
787 if (cl->pq_key > q->now)
788 return cl->pq_key;
789
3696f625 790 htb_safe_rb_erase(p, q->wait_pq + level);
87990467
SH
791 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
792 htb_change_class_mode(q, cl, &diff);
1da177e4 793 if (cl->cmode != HTB_CAN_SEND)
87990467 794 htb_add_to_wait_tree(q, cl, diff);
1da177e4
LT
795 }
796 if (net_ratelimit())
797 printk(KERN_WARNING "htb: too many events !\n");
fb983d45 798 return q->now + PSCHED_TICKS_PER_SEC / 10;
1da177e4
LT
799}
800
801/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
802 is no such one exists. */
87990467
SH
803static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
804 u32 id)
1da177e4
LT
805{
806 struct rb_node *r = NULL;
807 while (n) {
87990467
SH
808 struct htb_class *cl =
809 rb_entry(n, struct htb_class, node[prio]);
810 if (id == cl->classid)
811 return n;
812
1da177e4
LT
813 if (id > cl->classid) {
814 n = n->rb_right;
815 } else {
816 r = n;
817 n = n->rb_left;
818 }
819 }
820 return r;
821}
822
823/**
824 * htb_lookup_leaf - returns next leaf class in DRR order
825 *
826 * Find leaf where current feed pointers points to.
827 */
87990467
SH
828static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
829 struct rb_node **pptr, u32 * pid)
1da177e4
LT
830{
831 int i;
832 struct {
833 struct rb_node *root;
834 struct rb_node **pptr;
835 u32 *pid;
87990467
SH
836 } stk[TC_HTB_MAXDEPTH], *sp = stk;
837
1da177e4
LT
838 BUG_TRAP(tree->rb_node);
839 sp->root = tree->rb_node;
840 sp->pptr = pptr;
841 sp->pid = pid;
842
843 for (i = 0; i < 65535; i++) {
87990467 844 if (!*sp->pptr && *sp->pid) {
10297b99 845 /* ptr was invalidated but id is valid - try to recover
1da177e4 846 the original or next ptr */
87990467
SH
847 *sp->pptr =
848 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 849 }
87990467
SH
850 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
851 can become out of date quickly */
852 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 853 *sp->pptr = sp->root;
87990467 854 while ((*sp->pptr)->rb_left)
1da177e4
LT
855 *sp->pptr = (*sp->pptr)->rb_left;
856 if (sp > stk) {
857 sp--;
87990467
SH
858 BUG_TRAP(*sp->pptr);
859 if (!*sp->pptr)
860 return NULL;
861 htb_next_rb_node(sp->pptr);
1da177e4
LT
862 }
863 } else {
864 struct htb_class *cl;
87990467
SH
865 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
866 if (!cl->level)
1da177e4
LT
867 return cl;
868 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
869 sp->pptr = cl->un.inner.ptr + prio;
870 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
871 }
872 }
873 BUG_TRAP(0);
874 return NULL;
875}
876
877/* dequeues packet at given priority and level; call only if
878 you are sure that there is active class at prio/level */
87990467
SH
879static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
880 int level)
1da177e4
LT
881{
882 struct sk_buff *skb = NULL;
87990467 883 struct htb_class *cl, *start;
1da177e4 884 /* look initial class up in the row */
87990467
SH
885 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
886 q->ptr[level] + prio,
887 q->last_ptr_id[level] + prio);
888
1da177e4
LT
889 do {
890next:
87990467
SH
891 BUG_TRAP(cl);
892 if (!cl)
893 return NULL;
1da177e4
LT
894
895 /* class can be empty - it is unlikely but can be true if leaf
896 qdisc drops packets in enqueue routine or if someone used
10297b99 897 graft operation on the leaf since last dequeue;
1da177e4
LT
898 simply deactivate and skip such class */
899 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
900 struct htb_class *next;
87990467 901 htb_deactivate(q, cl);
1da177e4
LT
902
903 /* row/level might become empty */
904 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 905 return NULL;
1da177e4 906
87990467
SH
907 next = htb_lookup_leaf(q->row[level] + prio,
908 prio, q->ptr[level] + prio,
909 q->last_ptr_id[level] + prio);
910
911 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
912 start = next;
913 cl = next;
914 goto next;
915 }
87990467
SH
916
917 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
918 if (likely(skb != NULL))
1da177e4
LT
919 break;
920 if (!cl->warned) {
87990467
SH
921 printk(KERN_WARNING
922 "htb: class %X isn't work conserving ?!\n",
923 cl->classid);
1da177e4
LT
924 cl->warned = 1;
925 }
926 q->nwc_hit++;
87990467
SH
927 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
928 ptr[0]) + prio);
929 cl = htb_lookup_leaf(q->row[level] + prio, prio,
930 q->ptr[level] + prio,
931 q->last_ptr_id[level] + prio);
1da177e4
LT
932
933 } while (cl != start);
934
935 if (likely(skb != NULL)) {
936 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
1da177e4 937 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
87990467
SH
938 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
939 ptr[0]) + prio);
1da177e4
LT
940 }
941 /* this used to be after charge_class but this constelation
942 gives us slightly better performance */
943 if (!cl->un.leaf.q->q.qlen)
87990467
SH
944 htb_deactivate(q, cl);
945 htb_charge_class(q, cl, level, skb->len);
1da177e4
LT
946 }
947 return skb;
948}
949
1da177e4
LT
950static struct sk_buff *htb_dequeue(struct Qdisc *sch)
951{
952 struct sk_buff *skb = NULL;
953 struct htb_sched *q = qdisc_priv(sch);
954 int level;
fb983d45 955 psched_time_t next_event;
1da177e4
LT
956
957 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
958 skb = __skb_dequeue(&q->direct_queue);
959 if (skb != NULL) {
1da177e4
LT
960 sch->flags &= ~TCQ_F_THROTTLED;
961 sch->q.qlen--;
962 return skb;
963 }
964
87990467
SH
965 if (!sch->q.qlen)
966 goto fin;
1da177e4
LT
967 PSCHED_GET_TIME(q->now);
968
fb983d45 969 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
1da177e4
LT
970 q->nwc_hit = 0;
971 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
972 /* common case optimization - skip event handler quickly */
973 int m;
fb983d45
PM
974 psched_time_t event;
975
976 if (q->now >= q->near_ev_cache[level]) {
977 event = htb_do_events(q, level);
978 q->near_ev_cache[level] = event ? event :
979 PSCHED_TICKS_PER_SEC;
1da177e4 980 } else
fb983d45
PM
981 event = q->near_ev_cache[level];
982
983 if (event && next_event > event)
984 next_event = event;
87990467 985
1da177e4
LT
986 m = ~q->row_mask[level];
987 while (m != (int)(-1)) {
87990467 988 int prio = ffz(m);
1da177e4 989 m |= 1 << prio;
87990467 990 skb = htb_dequeue_tree(q, prio, level);
1da177e4
LT
991 if (likely(skb != NULL)) {
992 sch->q.qlen--;
993 sch->flags &= ~TCQ_F_THROTTLED;
994 goto fin;
995 }
996 }
997 }
fb983d45
PM
998 sch->qstats.overlimits++;
999 qdisc_watchdog_schedule(&q->watchdog, next_event);
1da177e4 1000fin:
1da177e4
LT
1001 return skb;
1002}
1003
1004/* try to drop from each class (by prio) until one succeed */
87990467 1005static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
1006{
1007 struct htb_sched *q = qdisc_priv(sch);
1008 int prio;
1009
1010 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
1011 struct list_head *p;
87990467 1012 list_for_each(p, q->drops + prio) {
1da177e4
LT
1013 struct htb_class *cl = list_entry(p, struct htb_class,
1014 un.leaf.drop_list);
1015 unsigned int len;
87990467
SH
1016 if (cl->un.leaf.q->ops->drop &&
1017 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
1018 sch->q.qlen--;
1019 if (!cl->un.leaf.q->q.qlen)
87990467 1020 htb_deactivate(q, cl);
1da177e4
LT
1021 return len;
1022 }
1023 }
1024 }
1025 return 0;
1026}
1027
1028/* reset all classes */
1029/* always caled under BH & queue lock */
87990467 1030static void htb_reset(struct Qdisc *sch)
1da177e4
LT
1031{
1032 struct htb_sched *q = qdisc_priv(sch);
1033 int i;
1da177e4
LT
1034
1035 for (i = 0; i < HTB_HSIZE; i++) {
0cef296d
SH
1036 struct hlist_node *p;
1037 struct htb_class *cl;
1038
1039 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
1da177e4 1040 if (cl->level)
87990467 1041 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 1042 else {
87990467 1043 if (cl->un.leaf.q)
1da177e4
LT
1044 qdisc_reset(cl->un.leaf.q);
1045 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1046 }
1047 cl->prio_activity = 0;
1048 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
1049
1050 }
1051 }
fb983d45 1052 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
1053 __skb_queue_purge(&q->direct_queue);
1054 sch->q.qlen = 0;
87990467
SH
1055 memset(q->row, 0, sizeof(q->row));
1056 memset(q->row_mask, 0, sizeof(q->row_mask));
1057 memset(q->wait_pq, 0, sizeof(q->wait_pq));
1058 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 1059 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1060 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
1061}
1062
1063static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1064{
1065 struct htb_sched *q = qdisc_priv(sch);
1066 struct rtattr *tb[TCA_HTB_INIT];
1067 struct tc_htb_glob *gopt;
1068 int i;
1da177e4 1069 if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
87990467
SH
1070 tb[TCA_HTB_INIT - 1] == NULL ||
1071 RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
1da177e4
LT
1072 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1073 return -EINVAL;
1074 }
87990467 1075 gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
1da177e4 1076 if (gopt->version != HTB_VER >> 16) {
87990467
SH
1077 printk(KERN_ERR
1078 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1079 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1da177e4
LT
1080 return -EINVAL;
1081 }
1da177e4
LT
1082
1083 INIT_LIST_HEAD(&q->root);
1084 for (i = 0; i < HTB_HSIZE; i++)
0cef296d 1085 INIT_HLIST_HEAD(q->hash + i);
1da177e4 1086 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1087 INIT_LIST_HEAD(q->drops + i);
1da177e4 1088
fb983d45 1089 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
1090 skb_queue_head_init(&q->direct_queue);
1091
1092 q->direct_qlen = sch->dev->tx_queue_len;
87990467 1093 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1da177e4 1094 q->direct_qlen = 2;
1da177e4
LT
1095
1096#ifdef HTB_RATECM
1097 init_timer(&q->rttim);
1098 q->rttim.function = htb_rate_timer;
1099 q->rttim.data = (unsigned long)sch;
1100 q->rttim.expires = jiffies + HZ;
1101 add_timer(&q->rttim);
1102#endif
1103 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1104 q->rate2quantum = 1;
1105 q->defcls = gopt->defcls;
1106
1107 return 0;
1108}
1109
1110static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1111{
1112 struct htb_sched *q = qdisc_priv(sch);
87990467 1113 unsigned char *b = skb->tail;
1da177e4
LT
1114 struct rtattr *rta;
1115 struct tc_htb_glob gopt;
9ac961ee 1116 spin_lock_bh(&sch->dev->queue_lock);
1da177e4
LT
1117 gopt.direct_pkts = q->direct_pkts;
1118
1da177e4
LT
1119 gopt.version = HTB_VER;
1120 gopt.rate2quantum = q->rate2quantum;
1121 gopt.defcls = q->defcls;
3bf72957 1122 gopt.debug = 0;
87990467 1123 rta = (struct rtattr *)b;
1da177e4
LT
1124 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1125 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1126 rta->rta_len = skb->tail - b;
9ac961ee 1127 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
1128 return skb->len;
1129rtattr_failure:
9ac961ee 1130 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
1131 skb_trim(skb, skb->tail - skb->data);
1132 return -1;
1133}
1134
1135static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1136 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1137{
87990467
SH
1138 struct htb_class *cl = (struct htb_class *)arg;
1139 unsigned char *b = skb->tail;
1da177e4
LT
1140 struct rtattr *rta;
1141 struct tc_htb_opt opt;
1142
9ac961ee 1143 spin_lock_bh(&sch->dev->queue_lock);
1da177e4
LT
1144 tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1145 tcm->tcm_handle = cl->classid;
1146 if (!cl->level && cl->un.leaf.q)
1147 tcm->tcm_info = cl->un.leaf.q->handle;
1148
87990467 1149 rta = (struct rtattr *)b;
1da177e4
LT
1150 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1151
87990467 1152 memset(&opt, 0, sizeof(opt));
1da177e4 1153
87990467
SH
1154 opt.rate = cl->rate->rate;
1155 opt.buffer = cl->buffer;
1156 opt.ceil = cl->ceil->rate;
1157 opt.cbuffer = cl->cbuffer;
1158 opt.quantum = cl->un.leaf.quantum;
1159 opt.prio = cl->un.leaf.prio;
1160 opt.level = cl->level;
1da177e4
LT
1161 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1162 rta->rta_len = skb->tail - b;
9ac961ee 1163 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
1164 return skb->len;
1165rtattr_failure:
9ac961ee 1166 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
1167 skb_trim(skb, b - skb->data);
1168 return -1;
1169}
1170
1171static int
87990467 1172htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1173{
87990467 1174 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1175
1176#ifdef HTB_RATECM
87990467
SH
1177 cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
1178 cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
1da177e4
LT
1179#endif
1180
1181 if (!cl->level && cl->un.leaf.q)
1182 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1183 cl->xstats.tokens = cl->tokens;
1184 cl->xstats.ctokens = cl->ctokens;
1185
1186 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1187 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1188 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1189 return -1;
1190
1191 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1192}
1193
1194static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1195 struct Qdisc **old)
1da177e4 1196{
87990467 1197 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1198
1199 if (cl && !cl->level) {
9f9afec4
PM
1200 if (new == NULL &&
1201 (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
10297b99 1202 cl->classid))
87990467
SH
1203 == NULL)
1204 return -ENOBUFS;
1da177e4
LT
1205 sch_tree_lock(sch);
1206 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
256d61b8 1207 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1208 qdisc_reset(*old);
1209 }
1210 sch_tree_unlock(sch);
1211 return 0;
1212 }
1213 return -ENOENT;
1214}
1215
87990467 1216static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1217{
87990467 1218 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1219 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1220}
1221
256d61b8
PM
1222static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1223{
1224 struct htb_class *cl = (struct htb_class *)arg;
1225
1226 if (cl->un.leaf.q->q.qlen == 0)
1227 htb_deactivate(qdisc_priv(sch), cl);
1228}
1229
1da177e4
LT
1230static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1231{
87990467
SH
1232 struct htb_class *cl = htb_find(classid, sch);
1233 if (cl)
1da177e4
LT
1234 cl->refcnt++;
1235 return (unsigned long)cl;
1236}
1237
1238static void htb_destroy_filters(struct tcf_proto **fl)
1239{
1240 struct tcf_proto *tp;
1241
1242 while ((tp = *fl) != NULL) {
1243 *fl = tp->next;
1244 tcf_destroy(tp);
1245 }
1246}
1247
160d5e10
JP
1248static inline int htb_parent_last_child(struct htb_class *cl)
1249{
1250 if (!cl->parent)
1251 /* the root class */
1252 return 0;
1253
1254 if (!(cl->parent->children.next == &cl->sibling &&
1255 cl->parent->children.prev == &cl->sibling))
1256 /* not the last child */
1257 return 0;
1258
1259 return 1;
1260}
1261
1262static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q)
1263{
1264 struct htb_class *parent = cl->parent;
1265
1266 BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
1267
1268 parent->level = 0;
1269 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1270 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1271 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1272 parent->un.leaf.quantum = parent->quantum;
1273 parent->un.leaf.prio = parent->prio;
1274 parent->tokens = parent->buffer;
1275 parent->ctokens = parent->cbuffer;
1276 PSCHED_GET_TIME(parent->t_c);
1277 parent->cmode = HTB_CAN_SEND;
1278}
1279
87990467 1280static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4
LT
1281{
1282 struct htb_sched *q = qdisc_priv(sch);
814a175e 1283
1da177e4
LT
1284 if (!cl->level) {
1285 BUG_TRAP(cl->un.leaf.q);
1da177e4
LT
1286 qdisc_destroy(cl->un.leaf.q);
1287 }
1288 qdisc_put_rtab(cl->rate);
1289 qdisc_put_rtab(cl->ceil);
87990467
SH
1290
1291 htb_destroy_filters(&cl->filter_list);
1292
1293 while (!list_empty(&cl->children))
1294 htb_destroy_class(sch, list_entry(cl->children.next,
1295 struct htb_class, sibling));
1da177e4
LT
1296
1297 /* note: this delete may happen twice (see htb_delete) */
da33e3eb 1298 hlist_del_init(&cl->hlist);
1da177e4 1299 list_del(&cl->sibling);
87990467 1300
1da177e4 1301 if (cl->prio_activity)
87990467
SH
1302 htb_deactivate(q, cl);
1303
1da177e4 1304 if (cl->cmode != HTB_CAN_SEND)
3696f625 1305 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
87990467 1306
1da177e4
LT
1307 kfree(cl);
1308}
1309
1310/* always caled under BH & queue lock */
87990467 1311static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1312{
1313 struct htb_sched *q = qdisc_priv(sch);
1da177e4 1314
fb983d45 1315 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1316#ifdef HTB_RATECM
87990467 1317 del_timer_sync(&q->rttim);
1da177e4
LT
1318#endif
1319 /* This line used to be after htb_destroy_class call below
10297b99 1320 and surprisingly it worked in 2.4. But it must precede it
1da177e4
LT
1321 because filter need its target class alive to be able to call
1322 unbind_filter on it (without Oops). */
1323 htb_destroy_filters(&q->filter_list);
87990467
SH
1324
1325 while (!list_empty(&q->root))
1326 htb_destroy_class(sch, list_entry(q->root.next,
1327 struct htb_class, sibling));
1da177e4
LT
1328
1329 __skb_queue_purge(&q->direct_queue);
1330}
1331
1332static int htb_delete(struct Qdisc *sch, unsigned long arg)
1333{
1334 struct htb_sched *q = qdisc_priv(sch);
87990467 1335 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1336 unsigned int qlen;
160d5e10
JP
1337 struct Qdisc *new_q = NULL;
1338 int last_child = 0;
1da177e4
LT
1339
1340 // TODO: why don't allow to delete subtree ? references ? does
1341 // tc subsys quarantee us that in htb_destroy it holds no class
1342 // refs so that we can remove children safely there ?
1343 if (!list_empty(&cl->children) || cl->filter_cnt)
1344 return -EBUSY;
87990467 1345
160d5e10
JP
1346 if (!cl->level && htb_parent_last_child(cl)) {
1347 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1348 cl->parent->classid);
1349 last_child = 1;
1350 }
1351
1da177e4 1352 sch_tree_lock(sch);
87990467 1353
814a175e 1354 if (!cl->level) {
256d61b8 1355 qlen = cl->un.leaf.q->q.qlen;
814a175e 1356 qdisc_reset(cl->un.leaf.q);
256d61b8 1357 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1358 }
1359
c38c83cb
PM
1360 /* delete from hash and active; remainder in destroy_class */
1361 hlist_del_init(&cl->hlist);
1362
1da177e4 1363 if (cl->prio_activity)
87990467 1364 htb_deactivate(q, cl);
1da177e4 1365
160d5e10
JP
1366 if (last_child)
1367 htb_parent_to_leaf(cl, new_q);
1368
1da177e4 1369 if (--cl->refcnt == 0)
87990467 1370 htb_destroy_class(sch, cl);
1da177e4
LT
1371
1372 sch_tree_unlock(sch);
1373 return 0;
1374}
1375
1376static void htb_put(struct Qdisc *sch, unsigned long arg)
1377{
87990467 1378 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1379
1380 if (--cl->refcnt == 0)
87990467 1381 htb_destroy_class(sch, cl);
1da177e4
LT
1382}
1383
87990467
SH
1384static int htb_change_class(struct Qdisc *sch, u32 classid,
1385 u32 parentid, struct rtattr **tca,
1386 unsigned long *arg)
1da177e4
LT
1387{
1388 int err = -EINVAL;
1389 struct htb_sched *q = qdisc_priv(sch);
87990467
SH
1390 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1391 struct rtattr *opt = tca[TCA_OPTIONS - 1];
1da177e4
LT
1392 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1393 struct rtattr *tb[TCA_HTB_RTAB];
1394 struct tc_htb_opt *hopt;
1395
1396 /* extract all subattrs from opt attr */
1397 if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
87990467
SH
1398 tb[TCA_HTB_PARMS - 1] == NULL ||
1399 RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
1da177e4 1400 goto failure;
1da177e4 1401
87990467
SH
1402 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1403
1404 hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
3bf72957 1405
87990467
SH
1406 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
1407 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
1408 if (!rtab || !ctab)
1409 goto failure;
1da177e4 1410
87990467 1411 if (!cl) { /* new class */
1da177e4 1412 struct Qdisc *new_q;
3696f625
SH
1413 int prio;
1414
1da177e4 1415 /* check for valid classid */
87990467
SH
1416 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1417 || htb_find(classid, sch))
1da177e4
LT
1418 goto failure;
1419
1420 /* check maximal depth */
1421 if (parent && parent->parent && parent->parent->level < 2) {
1422 printk(KERN_ERR "htb: tree is too deep\n");
1423 goto failure;
1424 }
1425 err = -ENOBUFS;
0da974f4 1426 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1da177e4 1427 goto failure;
87990467 1428
1da177e4
LT
1429 cl->refcnt = 1;
1430 INIT_LIST_HEAD(&cl->sibling);
0cef296d 1431 INIT_HLIST_NODE(&cl->hlist);
1da177e4
LT
1432 INIT_LIST_HEAD(&cl->children);
1433 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1434 RB_CLEAR_NODE(&cl->pq_node);
1435
1436 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1437 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1438
1439 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1440 so that can't be used inside of sch_tree_lock
1441 -- thanks to Karlis Peisenieks */
9f9afec4 1442 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
1da177e4
LT
1443 sch_tree_lock(sch);
1444 if (parent && !parent->level) {
256d61b8
PM
1445 unsigned int qlen = parent->un.leaf.q->q.qlen;
1446
1da177e4 1447 /* turn parent into inner node */
256d61b8
PM
1448 qdisc_reset(parent->un.leaf.q);
1449 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1450 qdisc_destroy(parent->un.leaf.q);
1451 if (parent->prio_activity)
1452 htb_deactivate(q, parent);
1da177e4
LT
1453
1454 /* remove from evt list because of level change */
1455 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1456 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1457 parent->cmode = HTB_CAN_SEND;
1458 }
1459 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1460 : TC_HTB_MAXDEPTH) - 1;
1461 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1462 }
1463 /* leaf (we) needs elementary qdisc */
1464 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1465
87990467
SH
1466 cl->classid = classid;
1467 cl->parent = parent;
1da177e4
LT
1468
1469 /* set class to be in HTB_CAN_SEND state */
1470 cl->tokens = hopt->buffer;
1471 cl->ctokens = hopt->cbuffer;
87990467 1472 cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */
1da177e4
LT
1473 PSCHED_GET_TIME(cl->t_c);
1474 cl->cmode = HTB_CAN_SEND;
1475
1476 /* attach to the hash list and parent's family */
0cef296d 1477 hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
87990467
SH
1478 list_add_tail(&cl->sibling,
1479 parent ? &parent->children : &q->root);
1480 } else
1481 sch_tree_lock(sch);
1da177e4
LT
1482
1483 /* it used to be a nasty bug here, we have to check that node
87990467 1484 is really leaf before changing cl->un.leaf ! */
1da177e4
LT
1485 if (!cl->level) {
1486 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1487 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
87990467
SH
1488 printk(KERN_WARNING
1489 "HTB: quantum of class %X is small. Consider r2q change.\n",
1490 cl->classid);
1da177e4
LT
1491 cl->un.leaf.quantum = 1000;
1492 }
1493 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
87990467
SH
1494 printk(KERN_WARNING
1495 "HTB: quantum of class %X is big. Consider r2q change.\n",
1496 cl->classid);
1da177e4
LT
1497 cl->un.leaf.quantum = 200000;
1498 }
1499 if (hopt->quantum)
1500 cl->un.leaf.quantum = hopt->quantum;
1501 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1502 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
160d5e10
JP
1503
1504 /* backup for htb_parent_to_leaf */
1505 cl->quantum = cl->un.leaf.quantum;
1506 cl->prio = cl->un.leaf.prio;
1da177e4
LT
1507 }
1508
1509 cl->buffer = hopt->buffer;
1510 cl->cbuffer = hopt->cbuffer;
87990467
SH
1511 if (cl->rate)
1512 qdisc_put_rtab(cl->rate);
1513 cl->rate = rtab;
1514 if (cl->ceil)
1515 qdisc_put_rtab(cl->ceil);
1516 cl->ceil = ctab;
1da177e4
LT
1517 sch_tree_unlock(sch);
1518
1519 *arg = (unsigned long)cl;
1520 return 0;
1521
1522failure:
87990467
SH
1523 if (rtab)
1524 qdisc_put_rtab(rtab);
1525 if (ctab)
1526 qdisc_put_rtab(ctab);
1da177e4
LT
1527 return err;
1528}
1529
1530static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1531{
1532 struct htb_sched *q = qdisc_priv(sch);
1533 struct htb_class *cl = (struct htb_class *)arg;
1534 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1535
1da177e4
LT
1536 return fl;
1537}
1538
1539static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1540 u32 classid)
1da177e4
LT
1541{
1542 struct htb_sched *q = qdisc_priv(sch);
87990467 1543 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1544
1da177e4 1545 /*if (cl && !cl->level) return 0;
87990467
SH
1546 The line above used to be there to prevent attaching filters to
1547 leaves. But at least tc_index filter uses this just to get class
1548 for other reasons so that we have to allow for it.
1549 ----
1550 19.6.2002 As Werner explained it is ok - bind filter is just
1551 another way to "lock" the class - unlike "get" this lock can
1552 be broken by class during destroy IIUC.
1da177e4 1553 */
87990467
SH
1554 if (cl)
1555 cl->filter_cnt++;
1556 else
1da177e4
LT
1557 q->filter_cnt++;
1558 return (unsigned long)cl;
1559}
1560
1561static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1562{
1563 struct htb_sched *q = qdisc_priv(sch);
1564 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1565
87990467
SH
1566 if (cl)
1567 cl->filter_cnt--;
1568 else
1da177e4
LT
1569 q->filter_cnt--;
1570}
1571
1572static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1573{
1574 struct htb_sched *q = qdisc_priv(sch);
1575 int i;
1576
1577 if (arg->stop)
1578 return;
1579
1580 for (i = 0; i < HTB_HSIZE; i++) {
0cef296d
SH
1581 struct hlist_node *p;
1582 struct htb_class *cl;
1583
1584 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
1da177e4
LT
1585 if (arg->count < arg->skip) {
1586 arg->count++;
1587 continue;
1588 }
1589 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1590 arg->stop = 1;
1591 return;
1592 }
1593 arg->count++;
1594 }
1595 }
1596}
1597
1598static struct Qdisc_class_ops htb_class_ops = {
1599 .graft = htb_graft,
1600 .leaf = htb_leaf,
256d61b8 1601 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1602 .get = htb_get,
1603 .put = htb_put,
1604 .change = htb_change_class,
1605 .delete = htb_delete,
1606 .walk = htb_walk,
1607 .tcf_chain = htb_find_tcf,
1608 .bind_tcf = htb_bind_filter,
1609 .unbind_tcf = htb_unbind_filter,
1610 .dump = htb_dump_class,
1611 .dump_stats = htb_dump_class_stats,
1612};
1613
1614static struct Qdisc_ops htb_qdisc_ops = {
1615 .next = NULL,
1616 .cl_ops = &htb_class_ops,
1617 .id = "htb",
1618 .priv_size = sizeof(struct htb_sched),
1619 .enqueue = htb_enqueue,
1620 .dequeue = htb_dequeue,
1621 .requeue = htb_requeue,
1622 .drop = htb_drop,
1623 .init = htb_init,
1624 .reset = htb_reset,
1625 .destroy = htb_destroy,
1626 .change = NULL /* htb_change */,
1627 .dump = htb_dump,
1628 .owner = THIS_MODULE,
1629};
1630
1631static int __init htb_module_init(void)
1632{
87990467 1633 return register_qdisc(&htb_qdisc_ops);
1da177e4 1634}
87990467 1635static void __exit htb_module_exit(void)
1da177e4 1636{
87990467 1637 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1638}
87990467 1639
1da177e4
LT
1640module_init(htb_module_init)
1641module_exit(htb_module_exit)
1642MODULE_LICENSE("GPL");
This page took 0.521567 seconds and 5 git commands to generate.