Merge branches 'for-3.9/sony' and 'for-3.9/steelseries' into for-linus
[deliverable/linux.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
1224736d 38#include <linux/workqueue.h>
5a0e3ad6 39#include <linux/slab.h>
dc5fc579 40#include <net/netlink.h>
1da177e4 41#include <net/pkt_sched.h>
1da177e4
LT
42
43/* HTB algorithm.
44 Author: devik@cdi.cz
45 ========================================================================
46 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 47 it allows to assign priority to each class in hierarchy.
1da177e4
LT
48 In fact it is another implementation of Floyd's formal sharing.
49
50 Levels:
10297b99 51 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
52 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
53 one less than their parent.
54*/
55
47083fc0 56static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 57#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
58
59#if HTB_VER >> 16 != TC_HTB_PROTOVER
60#error "Mismatched sch_htb.c and pkt_sch.h"
61#endif
62
47083fc0
JDB
63/* Module parameter and sysfs export */
64module_param (htb_hysteresis, int, 0640);
65MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
66
1da177e4
LT
67/* used internaly to keep status of single class */
68enum htb_cmode {
87990467
SH
69 HTB_CANT_SEND, /* class can't send and can't borrow */
70 HTB_MAY_BORROW, /* class can't send but may borrow */
71 HTB_CAN_SEND /* class can send */
1da177e4
LT
72};
73
56b765b7
V
74struct htb_rate_cfg {
75 u64 rate_bps;
76 u32 mult;
77 u32 shift;
78};
79
1da177e4 80/* interior & leaf nodes; props specific to leaves are marked L: */
87990467 81struct htb_class {
f4c1f3e0 82 struct Qdisc_class_common common;
87990467 83 /* general class parameters */
c1a8f1f1 84 struct gnet_stats_basic_packed bstats;
87990467
SH
85 struct gnet_stats_queue qstats;
86 struct gnet_stats_rate_est rate_est;
87 struct tc_htb_xstats xstats; /* our special stats */
88 int refcnt; /* usage count of this class */
1da177e4 89
87990467
SH
90 /* topology */
91 int level; /* our level (see above) */
42077599 92 unsigned int children;
87990467 93 struct htb_class *parent; /* parent class */
87990467 94
c19f7a34
JP
95 int prio; /* these two are used only by leaves... */
96 int quantum; /* but stored for parent-to-leaf return */
97
87990467
SH
98 union {
99 struct htb_class_leaf {
100 struct Qdisc *q;
87990467
SH
101 int deficit[TC_HTB_MAXDEPTH];
102 struct list_head drop_list;
103 } leaf;
104 struct htb_class_inner {
105 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
106 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
107 /* When class changes from state 1->2 and disconnects from
cc7ec456
ED
108 * parent's feed then we lost ptr value and start from the
109 * first child again. Here we store classid of the
110 * last valid ptr (used when ptr is NULL).
111 */
87990467
SH
112 u32 last_ptr_id[TC_HTB_NUMPRIO];
113 } inner;
114 } un;
115 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
116 struct rb_node pq_node; /* node for event queue */
fb983d45 117 psched_time_t pq_key;
87990467
SH
118
119 int prio_activity; /* for which prios are we active */
120 enum htb_cmode cmode; /* current mode of the class */
121
122 /* class attached filters */
123 struct tcf_proto *filter_list;
124 int filter_cnt;
125
87990467 126 /* token bucket parameters */
56b765b7
V
127 struct htb_rate_cfg rate;
128 struct htb_rate_cfg ceil;
129 s64 buffer, cbuffer; /* token bucket depth/rate */
87990467 130 psched_tdiff_t mbuffer; /* max wait time */
56b765b7 131 s64 tokens, ctokens; /* current number of tokens */
87990467 132 psched_time_t t_c; /* checkpoint time */
1da177e4
LT
133};
134
87990467 135struct htb_sched {
f4c1f3e0 136 struct Qdisc_class_hash clhash;
0cef296d 137 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
138
139 /* self list - roots of self generating tree */
140 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
141 int row_mask[TC_HTB_MAXDEPTH];
142 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
143 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 144
87990467
SH
145 /* self wait list - roots of wait PQs per row */
146 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 147
87990467 148 /* time of nearest event per level (row) */
fb983d45 149 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 150
87990467 151 int defcls; /* class where unclassified flows go to */
1da177e4 152
87990467
SH
153 /* filters for qdisc itself */
154 struct tcf_proto *filter_list;
1da177e4 155
87990467
SH
156 int rate2quantum; /* quant = rate / rate2quantum */
157 psched_time_t now; /* cached dequeue time */
fb983d45 158 struct qdisc_watchdog watchdog;
1da177e4 159
87990467
SH
160 /* non shaped skbs; let them go directly thru */
161 struct sk_buff_head direct_queue;
162 int direct_qlen; /* max qlen of above */
163
164 long direct_pkts;
e82181de
JP
165
166#define HTB_WARN_TOOMANYEVENTS 0x1
167 unsigned int warned; /* only one warning */
1224736d 168 struct work_struct work;
1da177e4
LT
169};
170
56b765b7
V
171static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len)
172{
173 return ((u64)len * r->mult) >> r->shift;
174}
175
176static void htb_precompute_ratedata(struct htb_rate_cfg *r)
177{
178 u64 factor;
179 u64 mult;
180 int shift;
181
182 r->shift = 0;
183 r->mult = 1;
184 /*
185 * Calibrate mult, shift so that token counting is accurate
186 * for smallest packet size (64 bytes). Token (time in ns) is
187 * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will
188 * work as long as the smallest packet transfer time can be
189 * accurately represented in nanosec.
190 */
191 if (r->rate_bps > 0) {
192 /*
193 * Higher shift gives better accuracy. Find the largest
194 * shift such that mult fits in 32 bits.
195 */
196 for (shift = 0; shift < 16; shift++) {
197 r->shift = shift;
198 factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
199 mult = div64_u64(factor, r->rate_bps);
200 if (mult > UINT_MAX)
201 break;
202 }
203
204 r->shift = shift - 1;
205 factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
206 r->mult = div64_u64(factor, r->rate_bps);
207 }
208}
209
1da177e4 210/* find class in global hash table using given handle */
87990467 211static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
212{
213 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 214 struct Qdisc_class_common *clc;
0cef296d 215
f4c1f3e0
PM
216 clc = qdisc_class_find(&q->clhash, handle);
217 if (clc == NULL)
1da177e4 218 return NULL;
f4c1f3e0 219 return container_of(clc, struct htb_class, common);
1da177e4
LT
220}
221
222/**
223 * htb_classify - classify a packet into class
224 *
225 * It returns NULL if the packet should be dropped or -1 if the packet
226 * should be passed directly thru. In all other cases leaf class is returned.
227 * We allow direct class selection by classid in priority. The we examine
228 * filters in qdisc and in inner nodes (if higher filter points to the inner
229 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 230 * internal fifo (direct). These packets then go directly thru. If we still
25985edc 231 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
1da177e4
LT
232 * then finish and return direct queue.
233 */
cc7ec456 234#define HTB_DIRECT ((struct htb_class *)-1L)
1da177e4 235
87990467
SH
236static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
237 int *qerr)
1da177e4
LT
238{
239 struct htb_sched *q = qdisc_priv(sch);
240 struct htb_class *cl;
241 struct tcf_result res;
242 struct tcf_proto *tcf;
243 int result;
244
245 /* allow to select class by setting skb->priority to valid classid;
cc7ec456
ED
246 * note that nfmark can be used too by attaching filter fw with no
247 * rules in it
248 */
1da177e4 249 if (skb->priority == sch->handle)
87990467 250 return HTB_DIRECT; /* X:0 (direct flow) selected */
cc7ec456
ED
251 cl = htb_find(skb->priority, sch);
252 if (cl && cl->level == 0)
1da177e4
LT
253 return cl;
254
c27f339a 255 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
256 tcf = q->filter_list;
257 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
258#ifdef CONFIG_NET_CLS_ACT
259 switch (result) {
260 case TC_ACT_QUEUED:
87990467 261 case TC_ACT_STOLEN:
378a2f09 262 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
263 case TC_ACT_SHOT:
264 return NULL;
265 }
1da177e4 266#endif
cc7ec456
ED
267 cl = (void *)res.class;
268 if (!cl) {
1da177e4 269 if (res.classid == sch->handle)
87990467 270 return HTB_DIRECT; /* X:0 (direct flow) */
cc7ec456
ED
271 cl = htb_find(res.classid, sch);
272 if (!cl)
87990467 273 break; /* filter selected invalid classid */
1da177e4
LT
274 }
275 if (!cl->level)
87990467 276 return cl; /* we hit leaf; return it */
1da177e4
LT
277
278 /* we have got inner class; apply inner filter chain */
279 tcf = cl->filter_list;
280 }
281 /* classification failed; try to use default class */
87990467 282 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 283 if (!cl || cl->level)
87990467 284 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
285 return cl;
286}
287
1da177e4
LT
288/**
289 * htb_add_to_id_tree - adds class to the round robin list
290 *
291 * Routine adds class to the list (actually tree) sorted by classid.
292 * Make sure that class is not already on such list for given prio.
293 */
87990467
SH
294static void htb_add_to_id_tree(struct rb_root *root,
295 struct htb_class *cl, int prio)
1da177e4
LT
296{
297 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 298
1da177e4 299 while (*p) {
87990467
SH
300 struct htb_class *c;
301 parent = *p;
1da177e4 302 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 303
f4c1f3e0 304 if (cl->common.classid > c->common.classid)
1da177e4 305 p = &parent->rb_right;
87990467 306 else
1da177e4
LT
307 p = &parent->rb_left;
308 }
309 rb_link_node(&cl->node[prio], parent, p);
310 rb_insert_color(&cl->node[prio], root);
311}
312
313/**
314 * htb_add_to_wait_tree - adds class to the event queue with delay
315 *
316 * The class is added to priority event queue to indicate that class will
317 * change its mode in cl->pq_key microseconds. Make sure that class is not
318 * already in the queue.
319 */
87990467 320static void htb_add_to_wait_tree(struct htb_sched *q,
56b765b7 321 struct htb_class *cl, s64 delay)
1da177e4
LT
322{
323 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 324
fb983d45
PM
325 cl->pq_key = q->now + delay;
326 if (cl->pq_key == q->now)
1da177e4
LT
327 cl->pq_key++;
328
329 /* update the nearest event cache */
fb983d45 330 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 331 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 332
1da177e4 333 while (*p) {
87990467
SH
334 struct htb_class *c;
335 parent = *p;
1da177e4 336 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 337 if (cl->pq_key >= c->pq_key)
1da177e4 338 p = &parent->rb_right;
87990467 339 else
1da177e4
LT
340 p = &parent->rb_left;
341 }
342 rb_link_node(&cl->pq_node, parent, p);
343 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
344}
345
346/**
347 * htb_next_rb_node - finds next node in binary tree
348 *
349 * When we are past last key we return NULL.
350 * Average complexity is 2 steps per call.
351 */
3696f625 352static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
353{
354 *n = rb_next(*n);
355}
356
357/**
358 * htb_add_class_to_row - add class to its row
359 *
360 * The class is added to row at priorities marked in mask.
361 * It does nothing if mask == 0.
362 */
87990467
SH
363static inline void htb_add_class_to_row(struct htb_sched *q,
364 struct htb_class *cl, int mask)
1da177e4 365{
1da177e4
LT
366 q->row_mask[cl->level] |= mask;
367 while (mask) {
368 int prio = ffz(~mask);
369 mask &= ~(1 << prio);
87990467 370 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
371 }
372}
373
3696f625
SH
374/* If this triggers, it is a bug in this code, but it need not be fatal */
375static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
376{
81771b3b 377 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
378 WARN_ON(1);
379 } else {
380 rb_erase(rb, root);
381 RB_CLEAR_NODE(rb);
382 }
383}
384
385
1da177e4
LT
386/**
387 * htb_remove_class_from_row - removes class from its row
388 *
389 * The class is removed from row at priorities marked in mask.
390 * It does nothing if mask == 0.
391 */
87990467
SH
392static inline void htb_remove_class_from_row(struct htb_sched *q,
393 struct htb_class *cl, int mask)
1da177e4
LT
394{
395 int m = 0;
3bf72957 396
1da177e4
LT
397 while (mask) {
398 int prio = ffz(~mask);
3696f625 399
1da177e4 400 mask &= ~(1 << prio);
87990467
SH
401 if (q->ptr[cl->level][prio] == cl->node + prio)
402 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
403
404 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 405 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
406 m |= 1 << prio;
407 }
1da177e4
LT
408 q->row_mask[cl->level] &= ~m;
409}
410
411/**
412 * htb_activate_prios - creates active classe's feed chain
413 *
414 * The class is connected to ancestors and/or appropriate rows
10297b99 415 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
416 * (activated) mode. It does nothing if cl->prio_activity == 0.
417 */
87990467 418static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
419{
420 struct htb_class *p = cl->parent;
87990467 421 long m, mask = cl->prio_activity;
1da177e4
LT
422
423 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
424 m = mask;
425 while (m) {
1da177e4
LT
426 int prio = ffz(~m);
427 m &= ~(1 << prio);
87990467 428
1da177e4
LT
429 if (p->un.inner.feed[prio].rb_node)
430 /* parent already has its feed in use so that
cc7ec456
ED
431 * reset bit in mask as parent is already ok
432 */
1da177e4 433 mask &= ~(1 << prio);
87990467
SH
434
435 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 436 }
1da177e4 437 p->prio_activity |= mask;
87990467
SH
438 cl = p;
439 p = cl->parent;
3bf72957 440
1da177e4
LT
441 }
442 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 443 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
444}
445
446/**
447 * htb_deactivate_prios - remove class from feed chain
448 *
10297b99 449 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
450 * nothing if cl->prio_activity == 0. Class is removed from all feed
451 * chains and rows.
452 */
453static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
454{
455 struct htb_class *p = cl->parent;
87990467 456 long m, mask = cl->prio_activity;
1da177e4
LT
457
458 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
459 m = mask;
460 mask = 0;
1da177e4
LT
461 while (m) {
462 int prio = ffz(~m);
463 m &= ~(1 << prio);
87990467
SH
464
465 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4 466 /* we are removing child which is pointed to from
cc7ec456
ED
467 * parent feed - forget the pointer but remember
468 * classid
469 */
f4c1f3e0 470 p->un.inner.last_ptr_id[prio] = cl->common.classid;
1da177e4
LT
471 p->un.inner.ptr[prio] = NULL;
472 }
87990467 473
3696f625 474 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
475
476 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
477 mask |= 1 << prio;
478 }
3bf72957 479
1da177e4 480 p->prio_activity &= ~mask;
87990467
SH
481 cl = p;
482 p = cl->parent;
3bf72957 483
1da177e4 484 }
87990467
SH
485 if (cl->cmode == HTB_CAN_SEND && mask)
486 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
487}
488
56b765b7 489static inline s64 htb_lowater(const struct htb_class *cl)
18a63e86 490{
47083fc0
JDB
491 if (htb_hysteresis)
492 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
493 else
494 return 0;
18a63e86 495}
56b765b7 496static inline s64 htb_hiwater(const struct htb_class *cl)
18a63e86 497{
47083fc0
JDB
498 if (htb_hysteresis)
499 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
500 else
501 return 0;
18a63e86 502}
47083fc0 503
18a63e86 504
1da177e4
LT
505/**
506 * htb_class_mode - computes and returns current class mode
507 *
508 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
509 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 510 * from now to time when cl will change its state.
1da177e4 511 * Also it is worth to note that class mode doesn't change simply
10297b99 512 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
513 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
514 * mode transitions per time unit. The speed gain is about 1/6.
515 */
87990467 516static inline enum htb_cmode
56b765b7 517htb_class_mode(struct htb_class *cl, s64 *diff)
1da177e4 518{
56b765b7 519 s64 toks;
1da177e4 520
87990467
SH
521 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
522 *diff = -toks;
523 return HTB_CANT_SEND;
524 }
18a63e86 525
87990467
SH
526 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
527 return HTB_CAN_SEND;
1da177e4 528
87990467
SH
529 *diff = -toks;
530 return HTB_MAY_BORROW;
1da177e4
LT
531}
532
533/**
534 * htb_change_class_mode - changes classe's mode
535 *
536 * This should be the only way how to change classe's mode under normal
537 * cirsumstances. Routine will update feed lists linkage, change mode
538 * and add class to the wait event queue if appropriate. New mode should
539 * be different from old one and cl->pq_key has to be valid if changing
540 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
541 */
87990467 542static void
56b765b7 543htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
87990467
SH
544{
545 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
546
547 if (new_mode == cl->cmode)
87990467
SH
548 return;
549
550 if (cl->prio_activity) { /* not necessary: speed optimization */
551 if (cl->cmode != HTB_CANT_SEND)
552 htb_deactivate_prios(q, cl);
1da177e4 553 cl->cmode = new_mode;
87990467
SH
554 if (new_mode != HTB_CANT_SEND)
555 htb_activate_prios(q, cl);
556 } else
1da177e4
LT
557 cl->cmode = new_mode;
558}
559
560/**
10297b99 561 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
562 *
563 * Routine learns (new) priority of leaf and activates feed chain
564 * for the prio. It can be called on already active leaf safely.
565 * It also adds leaf into droplist.
566 */
87990467 567static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 568{
547b792c 569 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 570
1da177e4 571 if (!cl->prio_activity) {
c19f7a34 572 cl->prio_activity = 1 << cl->prio;
87990467
SH
573 htb_activate_prios(q, cl);
574 list_add_tail(&cl->un.leaf.drop_list,
c19f7a34 575 q->drops + cl->prio);
1da177e4
LT
576 }
577}
578
579/**
10297b99 580 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
581 *
582 * Make sure that leaf is active. In the other words it can't be called
583 * with non-active leaf. It also removes class from the drop list.
584 */
87990467 585static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 586{
547b792c 587 WARN_ON(!cl->prio_activity);
3bf72957 588
87990467 589 htb_deactivate_prios(q, cl);
1da177e4
LT
590 cl->prio_activity = 0;
591 list_del_init(&cl->un.leaf.drop_list);
592}
593
594static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
595{
f30ab418 596 int uninitialized_var(ret);
87990467
SH
597 struct htb_sched *q = qdisc_priv(sch);
598 struct htb_class *cl = htb_classify(skb, sch, &ret);
599
600 if (cl == HTB_DIRECT) {
601 /* enqueue to helper queue */
602 if (q->direct_queue.qlen < q->direct_qlen) {
603 __skb_queue_tail(&q->direct_queue, skb);
604 q->direct_pkts++;
605 } else {
17045755 606 return qdisc_drop(skb, sch);
87990467 607 }
1da177e4 608#ifdef CONFIG_NET_CLS_ACT
87990467 609 } else if (!cl) {
c27f339a 610 if (ret & __NET_XMIT_BYPASS)
87990467
SH
611 sch->qstats.drops++;
612 kfree_skb(skb);
613 return ret;
1da177e4 614#endif
378a2f09
JP
615 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
616 if (net_xmit_drop_count(ret)) {
617 sch->qstats.drops++;
618 cl->qstats.drops++;
619 }
69747650 620 return ret;
87990467 621 } else {
87990467
SH
622 htb_activate(q, cl);
623 }
624
625 sch->q.qlen++;
87990467 626 return NET_XMIT_SUCCESS;
1da177e4
LT
627}
628
56b765b7 629static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 630{
56b765b7 631 s64 toks = diff + cl->tokens;
59e4220a
JP
632
633 if (toks > cl->buffer)
634 toks = cl->buffer;
56b765b7 635 toks -= (s64) l2t_ns(&cl->rate, bytes);
59e4220a
JP
636 if (toks <= -cl->mbuffer)
637 toks = 1 - cl->mbuffer;
638
639 cl->tokens = toks;
640}
641
56b765b7 642static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 643{
56b765b7 644 s64 toks = diff + cl->ctokens;
59e4220a
JP
645
646 if (toks > cl->cbuffer)
647 toks = cl->cbuffer;
56b765b7 648 toks -= (s64) l2t_ns(&cl->ceil, bytes);
59e4220a
JP
649 if (toks <= -cl->mbuffer)
650 toks = 1 - cl->mbuffer;
651
652 cl->ctokens = toks;
653}
654
1da177e4
LT
655/**
656 * htb_charge_class - charges amount "bytes" to leaf and ancestors
657 *
658 * Routine assumes that packet "bytes" long was dequeued from leaf cl
659 * borrowing from "level". It accounts bytes to ceil leaky bucket for
660 * leaf and all ancestors and to rate bucket for ancestors at levels
661 * "level" and higher. It also handles possible change of mode resulting
662 * from the update. Note that mode can also increase here (MAY_BORROW to
663 * CAN_SEND) because we can use more precise clock that event queue here.
664 * In such case we remove class from event queue first.
665 */
87990467 666static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 667 int level, struct sk_buff *skb)
87990467 668{
0abf77e5 669 int bytes = qdisc_pkt_len(skb);
1da177e4 670 enum htb_cmode old_mode;
56b765b7 671 s64 diff;
1da177e4
LT
672
673 while (cl) {
56b765b7 674 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
1da177e4 675 if (cl->level >= level) {
87990467
SH
676 if (cl->level == level)
677 cl->xstats.lends++;
59e4220a 678 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
679 } else {
680 cl->xstats.borrows++;
87990467 681 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 682 }
59e4220a 683 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 684 cl->t_c = q->now;
1da177e4 685
87990467
SH
686 old_mode = cl->cmode;
687 diff = 0;
688 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
689 if (old_mode != cl->cmode) {
690 if (old_mode != HTB_CAN_SEND)
3696f625 691 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 692 if (cl->cmode != HTB_CAN_SEND)
87990467 693 htb_add_to_wait_tree(q, cl, diff);
1da177e4 694 }
1da177e4 695
bfe0d029
ED
696 /* update basic stats except for leaves which are already updated */
697 if (cl->level)
698 bstats_update(&cl->bstats, skb);
699
1da177e4
LT
700 cl = cl->parent;
701 }
702}
703
704/**
705 * htb_do_events - make mode changes to classes at the level
706 *
fb983d45 707 * Scans event queue for pending events and applies them. Returns time of
1224736d 708 * next pending event (0 for no event in pq, q->now for too many events).
fb983d45 709 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 710 */
a73be040
JP
711static psched_time_t htb_do_events(struct htb_sched *q, int level,
712 unsigned long start)
1da177e4 713{
8f3ea33a 714 /* don't run for longer than 2 jiffies; 2 is used instead of
cc7ec456
ED
715 * 1 to simplify things when jiffy is going to be incremented
716 * too soon
717 */
a73be040 718 unsigned long stop_at = start + 2;
8f3ea33a 719 while (time_before(jiffies, stop_at)) {
1da177e4 720 struct htb_class *cl;
56b765b7 721 s64 diff;
30bdbe39
AM
722 struct rb_node *p = rb_first(&q->wait_pq[level]);
723
87990467
SH
724 if (!p)
725 return 0;
1da177e4
LT
726
727 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
728 if (cl->pq_key > q->now)
729 return cl->pq_key;
730
3696f625 731 htb_safe_rb_erase(p, q->wait_pq + level);
56b765b7 732 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
87990467 733 htb_change_class_mode(q, cl, &diff);
1da177e4 734 if (cl->cmode != HTB_CAN_SEND)
87990467 735 htb_add_to_wait_tree(q, cl, diff);
1da177e4 736 }
1224736d
JP
737
738 /* too much load - let's continue after a break for scheduling */
e82181de 739 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
cc7ec456 740 pr_warning("htb: too many events!\n");
e82181de
JP
741 q->warned |= HTB_WARN_TOOMANYEVENTS;
742 }
1224736d
JP
743
744 return q->now;
1da177e4
LT
745}
746
747/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
cc7ec456
ED
748 * is no such one exists.
749 */
87990467
SH
750static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
751 u32 id)
1da177e4
LT
752{
753 struct rb_node *r = NULL;
754 while (n) {
87990467
SH
755 struct htb_class *cl =
756 rb_entry(n, struct htb_class, node[prio]);
87990467 757
f4c1f3e0 758 if (id > cl->common.classid) {
1da177e4 759 n = n->rb_right;
1b5c0077 760 } else if (id < cl->common.classid) {
1da177e4
LT
761 r = n;
762 n = n->rb_left;
1b5c0077
JP
763 } else {
764 return n;
1da177e4
LT
765 }
766 }
767 return r;
768}
769
770/**
771 * htb_lookup_leaf - returns next leaf class in DRR order
772 *
773 * Find leaf where current feed pointers points to.
774 */
87990467
SH
775static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
776 struct rb_node **pptr, u32 * pid)
1da177e4
LT
777{
778 int i;
779 struct {
780 struct rb_node *root;
781 struct rb_node **pptr;
782 u32 *pid;
87990467
SH
783 } stk[TC_HTB_MAXDEPTH], *sp = stk;
784
512bb43e 785 BUG_ON(!tree->rb_node);
1da177e4
LT
786 sp->root = tree->rb_node;
787 sp->pptr = pptr;
788 sp->pid = pid;
789
790 for (i = 0; i < 65535; i++) {
87990467 791 if (!*sp->pptr && *sp->pid) {
10297b99 792 /* ptr was invalidated but id is valid - try to recover
cc7ec456
ED
793 * the original or next ptr
794 */
87990467
SH
795 *sp->pptr =
796 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 797 }
87990467 798 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
cc7ec456
ED
799 * can become out of date quickly
800 */
87990467 801 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 802 *sp->pptr = sp->root;
87990467 803 while ((*sp->pptr)->rb_left)
1da177e4
LT
804 *sp->pptr = (*sp->pptr)->rb_left;
805 if (sp > stk) {
806 sp--;
512bb43e
JP
807 if (!*sp->pptr) {
808 WARN_ON(1);
87990467 809 return NULL;
512bb43e 810 }
87990467 811 htb_next_rb_node(sp->pptr);
1da177e4
LT
812 }
813 } else {
814 struct htb_class *cl;
87990467
SH
815 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
816 if (!cl->level)
1da177e4
LT
817 return cl;
818 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
819 sp->pptr = cl->un.inner.ptr + prio;
820 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
821 }
822 }
547b792c 823 WARN_ON(1);
1da177e4
LT
824 return NULL;
825}
826
827/* dequeues packet at given priority and level; call only if
cc7ec456
ED
828 * you are sure that there is active class at prio/level
829 */
87990467
SH
830static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
831 int level)
1da177e4
LT
832{
833 struct sk_buff *skb = NULL;
87990467 834 struct htb_class *cl, *start;
1da177e4 835 /* look initial class up in the row */
87990467
SH
836 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
837 q->ptr[level] + prio,
838 q->last_ptr_id[level] + prio);
839
1da177e4
LT
840 do {
841next:
512bb43e 842 if (unlikely(!cl))
87990467 843 return NULL;
1da177e4
LT
844
845 /* class can be empty - it is unlikely but can be true if leaf
cc7ec456
ED
846 * qdisc drops packets in enqueue routine or if someone used
847 * graft operation on the leaf since last dequeue;
848 * simply deactivate and skip such class
849 */
1da177e4
LT
850 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
851 struct htb_class *next;
87990467 852 htb_deactivate(q, cl);
1da177e4
LT
853
854 /* row/level might become empty */
855 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 856 return NULL;
1da177e4 857
87990467
SH
858 next = htb_lookup_leaf(q->row[level] + prio,
859 prio, q->ptr[level] + prio,
860 q->last_ptr_id[level] + prio);
861
862 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
863 start = next;
864 cl = next;
865 goto next;
866 }
87990467
SH
867
868 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
869 if (likely(skb != NULL))
1da177e4 870 break;
633fe66e 871
b00355db 872 qdisc_warn_nonwc("htb", cl->un.leaf.q);
87990467
SH
873 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
874 ptr[0]) + prio);
875 cl = htb_lookup_leaf(q->row[level] + prio, prio,
876 q->ptr[level] + prio,
877 q->last_ptr_id[level] + prio);
1da177e4
LT
878
879 } while (cl != start);
880
881 if (likely(skb != NULL)) {
196d97f6 882 bstats_update(&cl->bstats, skb);
0abf77e5
JK
883 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
884 if (cl->un.leaf.deficit[level] < 0) {
c19f7a34 885 cl->un.leaf.deficit[level] += cl->quantum;
87990467
SH
886 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
887 ptr[0]) + prio);
1da177e4
LT
888 }
889 /* this used to be after charge_class but this constelation
cc7ec456
ED
890 * gives us slightly better performance
891 */
1da177e4 892 if (!cl->un.leaf.q->q.qlen)
87990467 893 htb_deactivate(q, cl);
c9726d68 894 htb_charge_class(q, cl, level, skb);
1da177e4
LT
895 }
896 return skb;
897}
898
1da177e4
LT
899static struct sk_buff *htb_dequeue(struct Qdisc *sch)
900{
9190b3b3 901 struct sk_buff *skb;
1da177e4
LT
902 struct htb_sched *q = qdisc_priv(sch);
903 int level;
fb983d45 904 psched_time_t next_event;
a73be040 905 unsigned long start_at;
1da177e4
LT
906
907 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
908 skb = __skb_dequeue(&q->direct_queue);
909 if (skb != NULL) {
9190b3b3
ED
910ok:
911 qdisc_bstats_update(sch, skb);
fd245a4a 912 qdisc_unthrottled(sch);
1da177e4
LT
913 sch->q.qlen--;
914 return skb;
915 }
916
87990467
SH
917 if (!sch->q.qlen)
918 goto fin;
56b765b7 919 q->now = ktime_to_ns(ktime_get());
a73be040 920 start_at = jiffies;
1da177e4 921
d2fe85da 922 next_event = q->now + 5LLU * NSEC_PER_SEC;
633fe66e 923
1da177e4
LT
924 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
925 /* common case optimization - skip event handler quickly */
926 int m;
fb983d45
PM
927 psched_time_t event;
928
929 if (q->now >= q->near_ev_cache[level]) {
a73be040 930 event = htb_do_events(q, level, start_at);
2e4b3b0e 931 if (!event)
56b765b7 932 event = q->now + NSEC_PER_SEC;
2e4b3b0e 933 q->near_ev_cache[level] = event;
1da177e4 934 } else
fb983d45
PM
935 event = q->near_ev_cache[level];
936
c0851347 937 if (next_event > event)
fb983d45 938 next_event = event;
87990467 939
1da177e4
LT
940 m = ~q->row_mask[level];
941 while (m != (int)(-1)) {
87990467 942 int prio = ffz(m);
cc7ec456 943
1da177e4 944 m |= 1 << prio;
87990467 945 skb = htb_dequeue_tree(q, prio, level);
9190b3b3
ED
946 if (likely(skb != NULL))
947 goto ok;
1da177e4
LT
948 }
949 }
fb983d45 950 sch->qstats.overlimits++;
56b765b7
V
951 if (likely(next_event > q->now)) {
952 if (!test_bit(__QDISC_STATE_DEACTIVATED,
953 &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
954 ktime_t time = ns_to_ktime(next_event);
955 qdisc_throttled(q->watchdog.qdisc);
956 hrtimer_start(&q->watchdog.timer, time,
957 HRTIMER_MODE_ABS);
958 }
959 } else {
1224736d 960 schedule_work(&q->work);
56b765b7 961 }
1da177e4 962fin:
1da177e4
LT
963 return skb;
964}
965
966/* try to drop from each class (by prio) until one succeed */
87990467 967static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
968{
969 struct htb_sched *q = qdisc_priv(sch);
970 int prio;
971
972 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
973 struct list_head *p;
87990467 974 list_for_each(p, q->drops + prio) {
1da177e4
LT
975 struct htb_class *cl = list_entry(p, struct htb_class,
976 un.leaf.drop_list);
977 unsigned int len;
87990467
SH
978 if (cl->un.leaf.q->ops->drop &&
979 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
980 sch->q.qlen--;
981 if (!cl->un.leaf.q->q.qlen)
87990467 982 htb_deactivate(q, cl);
1da177e4
LT
983 return len;
984 }
985 }
986 }
987 return 0;
988}
989
990/* reset all classes */
991/* always caled under BH & queue lock */
87990467 992static void htb_reset(struct Qdisc *sch)
1da177e4
LT
993{
994 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
995 struct htb_class *cl;
996 struct hlist_node *n;
997 unsigned int i;
0cef296d 998
f4c1f3e0
PM
999 for (i = 0; i < q->clhash.hashsize; i++) {
1000 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4 1001 if (cl->level)
87990467 1002 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 1003 else {
87990467 1004 if (cl->un.leaf.q)
1da177e4
LT
1005 qdisc_reset(cl->un.leaf.q);
1006 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1007 }
1008 cl->prio_activity = 0;
1009 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
1010
1011 }
1012 }
fb983d45 1013 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
1014 __skb_queue_purge(&q->direct_queue);
1015 sch->q.qlen = 0;
87990467
SH
1016 memset(q->row, 0, sizeof(q->row));
1017 memset(q->row_mask, 0, sizeof(q->row_mask));
1018 memset(q->wait_pq, 0, sizeof(q->wait_pq));
1019 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 1020 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1021 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
1022}
1023
27a3421e
PM
1024static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1025 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1026 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
1027 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1028 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1029};
1030
1224736d
JP
1031static void htb_work_func(struct work_struct *work)
1032{
1033 struct htb_sched *q = container_of(work, struct htb_sched, work);
1034 struct Qdisc *sch = q->watchdog.qdisc;
1035
1036 __netif_schedule(qdisc_root(sch));
1037}
1038
1e90474c 1039static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1040{
1041 struct htb_sched *q = qdisc_priv(sch);
1e90474c 1042 struct nlattr *tb[TCA_HTB_INIT + 1];
1da177e4 1043 struct tc_htb_glob *gopt;
cee63723 1044 int err;
1da177e4 1045 int i;
cee63723
PM
1046
1047 if (!opt)
1048 return -EINVAL;
1049
27a3421e 1050 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
cee63723
PM
1051 if (err < 0)
1052 return err;
1053
27a3421e 1054 if (tb[TCA_HTB_INIT] == NULL) {
cc7ec456 1055 pr_err("HTB: hey probably you have bad tc tool ?\n");
1da177e4
LT
1056 return -EINVAL;
1057 }
1e90474c 1058 gopt = nla_data(tb[TCA_HTB_INIT]);
1da177e4 1059 if (gopt->version != HTB_VER >> 16) {
cc7ec456 1060 pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
87990467 1061 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1da177e4
LT
1062 return -EINVAL;
1063 }
1da177e4 1064
f4c1f3e0
PM
1065 err = qdisc_class_hash_init(&q->clhash);
1066 if (err < 0)
1067 return err;
1da177e4 1068 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1069 INIT_LIST_HEAD(q->drops + i);
1da177e4 1070
fb983d45 1071 qdisc_watchdog_init(&q->watchdog, sch);
1224736d 1072 INIT_WORK(&q->work, htb_work_func);
1da177e4
LT
1073 skb_queue_head_init(&q->direct_queue);
1074
5ce2d488 1075 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
87990467 1076 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1da177e4 1077 q->direct_qlen = 2;
1da177e4 1078
1da177e4
LT
1079 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1080 q->rate2quantum = 1;
1081 q->defcls = gopt->defcls;
1082
1083 return 0;
1084}
1085
1086static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1087{
102396ae 1088 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4 1089 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1090 struct nlattr *nest;
1da177e4 1091 struct tc_htb_glob gopt;
4b3550ef 1092
7698b4fc 1093 spin_lock_bh(root_lock);
1da177e4 1094
4b3550ef 1095 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1096 gopt.version = HTB_VER;
1097 gopt.rate2quantum = q->rate2quantum;
1098 gopt.defcls = q->defcls;
3bf72957 1099 gopt.debug = 0;
4b3550ef
PM
1100
1101 nest = nla_nest_start(skb, TCA_OPTIONS);
1102 if (nest == NULL)
1103 goto nla_put_failure;
1b34ec43
DM
1104 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt))
1105 goto nla_put_failure;
4b3550ef
PM
1106 nla_nest_end(skb, nest);
1107
7698b4fc 1108 spin_unlock_bh(root_lock);
1da177e4 1109 return skb->len;
4b3550ef 1110
1e90474c 1111nla_put_failure:
7698b4fc 1112 spin_unlock_bh(root_lock);
4b3550ef 1113 nla_nest_cancel(skb, nest);
1da177e4
LT
1114 return -1;
1115}
1116
1117static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1118 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1119{
87990467 1120 struct htb_class *cl = (struct htb_class *)arg;
102396ae 1121 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
4b3550ef 1122 struct nlattr *nest;
1da177e4
LT
1123 struct tc_htb_opt opt;
1124
7698b4fc 1125 spin_lock_bh(root_lock);
f4c1f3e0
PM
1126 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1127 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1128 if (!cl->level && cl->un.leaf.q)
1129 tcm->tcm_info = cl->un.leaf.q->handle;
1130
4b3550ef
PM
1131 nest = nla_nest_start(skb, TCA_OPTIONS);
1132 if (nest == NULL)
1133 goto nla_put_failure;
1da177e4 1134
87990467 1135 memset(&opt, 0, sizeof(opt));
1da177e4 1136
56b765b7 1137 opt.rate.rate = cl->rate.rate_bps >> 3;
87990467 1138 opt.buffer = cl->buffer;
56b765b7 1139 opt.ceil.rate = cl->ceil.rate_bps >> 3;
87990467 1140 opt.cbuffer = cl->cbuffer;
c19f7a34
JP
1141 opt.quantum = cl->quantum;
1142 opt.prio = cl->prio;
87990467 1143 opt.level = cl->level;
1b34ec43
DM
1144 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1145 goto nla_put_failure;
4b3550ef
PM
1146
1147 nla_nest_end(skb, nest);
7698b4fc 1148 spin_unlock_bh(root_lock);
1da177e4 1149 return skb->len;
4b3550ef 1150
1e90474c 1151nla_put_failure:
7698b4fc 1152 spin_unlock_bh(root_lock);
4b3550ef 1153 nla_nest_cancel(skb, nest);
1da177e4
LT
1154 return -1;
1155}
1156
1157static int
87990467 1158htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1159{
87990467 1160 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1161
1da177e4
LT
1162 if (!cl->level && cl->un.leaf.q)
1163 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1164 cl->xstats.tokens = cl->tokens;
1165 cl->xstats.ctokens = cl->ctokens;
1166
1167 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
d250a5f9 1168 gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1da177e4
LT
1169 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1170 return -1;
1171
1172 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1173}
1174
1175static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1176 struct Qdisc **old)
1da177e4 1177{
87990467 1178 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1179
5b9a9ccf
PM
1180 if (cl->level)
1181 return -EINVAL;
1182 if (new == NULL &&
3511c913 1183 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
5b9a9ccf
PM
1184 cl->common.classid)) == NULL)
1185 return -ENOBUFS;
1186
1187 sch_tree_lock(sch);
1188 *old = cl->un.leaf.q;
1189 cl->un.leaf.q = new;
1190 if (*old != NULL) {
1191 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1192 qdisc_reset(*old);
1da177e4 1193 }
5b9a9ccf
PM
1194 sch_tree_unlock(sch);
1195 return 0;
1da177e4
LT
1196}
1197
87990467 1198static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1199{
87990467 1200 struct htb_class *cl = (struct htb_class *)arg;
5b9a9ccf 1201 return !cl->level ? cl->un.leaf.q : NULL;
1da177e4
LT
1202}
1203
256d61b8
PM
1204static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1205{
1206 struct htb_class *cl = (struct htb_class *)arg;
1207
1208 if (cl->un.leaf.q->q.qlen == 0)
1209 htb_deactivate(qdisc_priv(sch), cl);
1210}
1211
1da177e4
LT
1212static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1213{
87990467
SH
1214 struct htb_class *cl = htb_find(classid, sch);
1215 if (cl)
1da177e4
LT
1216 cl->refcnt++;
1217 return (unsigned long)cl;
1218}
1219
160d5e10
JP
1220static inline int htb_parent_last_child(struct htb_class *cl)
1221{
1222 if (!cl->parent)
1223 /* the root class */
1224 return 0;
42077599 1225 if (cl->parent->children > 1)
160d5e10
JP
1226 /* not the last child */
1227 return 0;
160d5e10
JP
1228 return 1;
1229}
1230
3ba08b00
JP
1231static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1232 struct Qdisc *new_q)
160d5e10
JP
1233{
1234 struct htb_class *parent = cl->parent;
1235
547b792c 1236 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1237
3ba08b00
JP
1238 if (parent->cmode != HTB_CAN_SEND)
1239 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1240
160d5e10
JP
1241 parent->level = 0;
1242 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1243 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1244 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1245 parent->tokens = parent->buffer;
1246 parent->ctokens = parent->cbuffer;
3bebcda2 1247 parent->t_c = psched_get_time();
160d5e10
JP
1248 parent->cmode = HTB_CAN_SEND;
1249}
1250
87990467 1251static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1252{
1da177e4 1253 if (!cl->level) {
547b792c 1254 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1255 qdisc_destroy(cl->un.leaf.q);
1256 }
ee39e10c 1257 gen_kill_estimator(&cl->bstats, &cl->rate_est);
ff31ab56 1258 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1259 kfree(cl);
1260}
1261
87990467 1262static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1263{
1264 struct htb_sched *q = qdisc_priv(sch);
fbd8f137
PM
1265 struct hlist_node *n, *next;
1266 struct htb_class *cl;
1267 unsigned int i;
1da177e4 1268
1224736d 1269 cancel_work_sync(&q->work);
fb983d45 1270 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1271 /* This line used to be after htb_destroy_class call below
cc7ec456
ED
1272 * and surprisingly it worked in 2.4. But it must precede it
1273 * because filter need its target class alive to be able to call
1274 * unbind_filter on it (without Oops).
1275 */
ff31ab56 1276 tcf_destroy_chain(&q->filter_list);
87990467 1277
f4c1f3e0
PM
1278 for (i = 0; i < q->clhash.hashsize; i++) {
1279 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1280 tcf_destroy_chain(&cl->filter_list);
1281 }
f4c1f3e0
PM
1282 for (i = 0; i < q->clhash.hashsize; i++) {
1283 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1284 common.hnode)
fbd8f137
PM
1285 htb_destroy_class(sch, cl);
1286 }
f4c1f3e0 1287 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1288 __skb_queue_purge(&q->direct_queue);
1289}
1290
1291static int htb_delete(struct Qdisc *sch, unsigned long arg)
1292{
1293 struct htb_sched *q = qdisc_priv(sch);
87990467 1294 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1295 unsigned int qlen;
160d5e10
JP
1296 struct Qdisc *new_q = NULL;
1297 int last_child = 0;
1da177e4
LT
1298
1299 // TODO: why don't allow to delete subtree ? references ? does
1300 // tc subsys quarantee us that in htb_destroy it holds no class
1301 // refs so that we can remove children safely there ?
42077599 1302 if (cl->children || cl->filter_cnt)
1da177e4 1303 return -EBUSY;
87990467 1304
160d5e10 1305 if (!cl->level && htb_parent_last_child(cl)) {
3511c913 1306 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
bb949fbd 1307 cl->parent->common.classid);
160d5e10
JP
1308 last_child = 1;
1309 }
1310
1da177e4 1311 sch_tree_lock(sch);
87990467 1312
814a175e 1313 if (!cl->level) {
256d61b8 1314 qlen = cl->un.leaf.q->q.qlen;
814a175e 1315 qdisc_reset(cl->un.leaf.q);
256d61b8 1316 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1317 }
1318
f4c1f3e0
PM
1319 /* delete from hash and active; remainder in destroy_class */
1320 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1321 if (cl->parent)
1322 cl->parent->children--;
c38c83cb 1323
1da177e4 1324 if (cl->prio_activity)
87990467 1325 htb_deactivate(q, cl);
1da177e4 1326
fbd8f137
PM
1327 if (cl->cmode != HTB_CAN_SEND)
1328 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1329
160d5e10 1330 if (last_child)
3ba08b00 1331 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1332
7cd0a638
JP
1333 BUG_ON(--cl->refcnt == 0);
1334 /*
1335 * This shouldn't happen: we "hold" one cops->get() when called
1336 * from tc_ctl_tclass; the destroy method is done from cops->put().
1337 */
1da177e4
LT
1338
1339 sch_tree_unlock(sch);
1340 return 0;
1341}
1342
1343static void htb_put(struct Qdisc *sch, unsigned long arg)
1344{
87990467 1345 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1346
1347 if (--cl->refcnt == 0)
87990467 1348 htb_destroy_class(sch, cl);
1da177e4
LT
1349}
1350
87990467 1351static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1352 u32 parentid, struct nlattr **tca,
87990467 1353 unsigned long *arg)
1da177e4
LT
1354{
1355 int err = -EINVAL;
1356 struct htb_sched *q = qdisc_priv(sch);
87990467 1357 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1358 struct nlattr *opt = tca[TCA_OPTIONS];
e18434c4 1359 struct nlattr *tb[__TCA_HTB_MAX];
1da177e4
LT
1360 struct tc_htb_opt *hopt;
1361
1362 /* extract all subattrs from opt attr */
cee63723
PM
1363 if (!opt)
1364 goto failure;
1365
e18434c4 1366 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1367 if (err < 0)
1368 goto failure;
1369
1370 err = -EINVAL;
27a3421e 1371 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1372 goto failure;
1da177e4 1373
87990467
SH
1374 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1375
1e90474c 1376 hopt = nla_data(tb[TCA_HTB_PARMS]);
196d97f6 1377 if (!hopt->rate.rate || !hopt->ceil.rate)
87990467 1378 goto failure;
1da177e4 1379
87990467 1380 if (!cl) { /* new class */
1da177e4 1381 struct Qdisc *new_q;
3696f625 1382 int prio;
ee39e10c 1383 struct {
1e90474c 1384 struct nlattr nla;
ee39e10c
PM
1385 struct gnet_estimator opt;
1386 } est = {
1e90474c
PM
1387 .nla = {
1388 .nla_len = nla_attr_size(sizeof(est.opt)),
1389 .nla_type = TCA_RATE,
ee39e10c
PM
1390 },
1391 .opt = {
1392 /* 4s interval, 16s averaging constant */
1393 .interval = 2,
1394 .ewma_log = 2,
1395 },
1396 };
3696f625 1397
1da177e4 1398 /* check for valid classid */
f64f9e71
JP
1399 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1400 htb_find(classid, sch))
1da177e4
LT
1401 goto failure;
1402
1403 /* check maximal depth */
1404 if (parent && parent->parent && parent->parent->level < 2) {
cc7ec456 1405 pr_err("htb: tree is too deep\n");
1da177e4
LT
1406 goto failure;
1407 }
1408 err = -ENOBUFS;
cc7ec456
ED
1409 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1410 if (!cl)
1da177e4 1411 goto failure;
87990467 1412
71bcb09a
SH
1413 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1414 qdisc_root_sleeping_lock(sch),
1415 tca[TCA_RATE] ? : &est.nla);
1416 if (err) {
1417 kfree(cl);
1418 goto failure;
1419 }
1420
1da177e4 1421 cl->refcnt = 1;
42077599 1422 cl->children = 0;
1da177e4 1423 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1424 RB_CLEAR_NODE(&cl->pq_node);
1425
1426 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1427 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1428
1429 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
cc7ec456
ED
1430 * so that can't be used inside of sch_tree_lock
1431 * -- thanks to Karlis Peisenieks
1432 */
3511c913 1433 new_q = qdisc_create_dflt(sch->dev_queue,
bb949fbd 1434 &pfifo_qdisc_ops, classid);
1da177e4
LT
1435 sch_tree_lock(sch);
1436 if (parent && !parent->level) {
256d61b8
PM
1437 unsigned int qlen = parent->un.leaf.q->q.qlen;
1438
1da177e4 1439 /* turn parent into inner node */
256d61b8
PM
1440 qdisc_reset(parent->un.leaf.q);
1441 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1442 qdisc_destroy(parent->un.leaf.q);
1443 if (parent->prio_activity)
1444 htb_deactivate(q, parent);
1da177e4
LT
1445
1446 /* remove from evt list because of level change */
1447 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1448 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1449 parent->cmode = HTB_CAN_SEND;
1450 }
1451 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1452 : TC_HTB_MAXDEPTH) - 1;
1453 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1454 }
1455 /* leaf (we) needs elementary qdisc */
1456 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1457
f4c1f3e0 1458 cl->common.classid = classid;
87990467 1459 cl->parent = parent;
1da177e4
LT
1460
1461 /* set class to be in HTB_CAN_SEND state */
1462 cl->tokens = hopt->buffer;
1463 cl->ctokens = hopt->cbuffer;
00c04af9 1464 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
3bebcda2 1465 cl->t_c = psched_get_time();
1da177e4
LT
1466 cl->cmode = HTB_CAN_SEND;
1467
1468 /* attach to the hash list and parent's family */
f4c1f3e0 1469 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1470 if (parent)
1471 parent->children++;
ee39e10c 1472 } else {
71bcb09a
SH
1473 if (tca[TCA_RATE]) {
1474 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1475 qdisc_root_sleeping_lock(sch),
1476 tca[TCA_RATE]);
1477 if (err)
1478 return err;
1479 }
87990467 1480 sch_tree_lock(sch);
ee39e10c 1481 }
1da177e4
LT
1482
1483 /* it used to be a nasty bug here, we have to check that node
cc7ec456
ED
1484 * is really leaf before changing cl->un.leaf !
1485 */
1da177e4 1486 if (!cl->level) {
196d97f6 1487 cl->quantum = hopt->rate.rate / q->rate2quantum;
c19f7a34 1488 if (!hopt->quantum && cl->quantum < 1000) {
cc7ec456 1489 pr_warning(
87990467 1490 "HTB: quantum of class %X is small. Consider r2q change.\n",
f4c1f3e0 1491 cl->common.classid);
c19f7a34 1492 cl->quantum = 1000;
1da177e4 1493 }
c19f7a34 1494 if (!hopt->quantum && cl->quantum > 200000) {
cc7ec456 1495 pr_warning(
87990467 1496 "HTB: quantum of class %X is big. Consider r2q change.\n",
f4c1f3e0 1497 cl->common.classid);
c19f7a34 1498 cl->quantum = 200000;
1da177e4
LT
1499 }
1500 if (hopt->quantum)
c19f7a34
JP
1501 cl->quantum = hopt->quantum;
1502 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1503 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1504 }
1505
1506 cl->buffer = hopt->buffer;
1507 cl->cbuffer = hopt->cbuffer;
56b765b7 1508
196d97f6
ED
1509 cl->rate.rate_bps = (u64)hopt->rate.rate << 3;
1510 cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3;
56b765b7
V
1511
1512 htb_precompute_ratedata(&cl->rate);
1513 htb_precompute_ratedata(&cl->ceil);
1514
1515 cl->buffer = hopt->buffer << PSCHED_SHIFT;
1516 cl->cbuffer = hopt->buffer << PSCHED_SHIFT;
1517
1da177e4
LT
1518 sch_tree_unlock(sch);
1519
f4c1f3e0
PM
1520 qdisc_class_hash_grow(sch, &q->clhash);
1521
1da177e4
LT
1522 *arg = (unsigned long)cl;
1523 return 0;
1524
1525failure:
1da177e4
LT
1526 return err;
1527}
1528
1529static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1530{
1531 struct htb_sched *q = qdisc_priv(sch);
1532 struct htb_class *cl = (struct htb_class *)arg;
1533 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1534
1da177e4
LT
1535 return fl;
1536}
1537
1538static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1539 u32 classid)
1da177e4 1540{
87990467 1541 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1542
1da177e4 1543 /*if (cl && !cl->level) return 0;
cc7ec456
ED
1544 * The line above used to be there to prevent attaching filters to
1545 * leaves. But at least tc_index filter uses this just to get class
1546 * for other reasons so that we have to allow for it.
1547 * ----
1548 * 19.6.2002 As Werner explained it is ok - bind filter is just
1549 * another way to "lock" the class - unlike "get" this lock can
1550 * be broken by class during destroy IIUC.
1da177e4 1551 */
87990467
SH
1552 if (cl)
1553 cl->filter_cnt++;
1da177e4
LT
1554 return (unsigned long)cl;
1555}
1556
1557static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1558{
1da177e4 1559 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1560
87990467
SH
1561 if (cl)
1562 cl->filter_cnt--;
1da177e4
LT
1563}
1564
1565static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1566{
1567 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
1568 struct htb_class *cl;
1569 struct hlist_node *n;
1570 unsigned int i;
1da177e4
LT
1571
1572 if (arg->stop)
1573 return;
1574
f4c1f3e0
PM
1575 for (i = 0; i < q->clhash.hashsize; i++) {
1576 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1577 if (arg->count < arg->skip) {
1578 arg->count++;
1579 continue;
1580 }
1581 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1582 arg->stop = 1;
1583 return;
1584 }
1585 arg->count++;
1586 }
1587 }
1588}
1589
20fea08b 1590static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1591 .graft = htb_graft,
1592 .leaf = htb_leaf,
256d61b8 1593 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1594 .get = htb_get,
1595 .put = htb_put,
1596 .change = htb_change_class,
1597 .delete = htb_delete,
1598 .walk = htb_walk,
1599 .tcf_chain = htb_find_tcf,
1600 .bind_tcf = htb_bind_filter,
1601 .unbind_tcf = htb_unbind_filter,
1602 .dump = htb_dump_class,
1603 .dump_stats = htb_dump_class_stats,
1604};
1605
20fea08b 1606static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1607 .cl_ops = &htb_class_ops,
1608 .id = "htb",
1609 .priv_size = sizeof(struct htb_sched),
1610 .enqueue = htb_enqueue,
1611 .dequeue = htb_dequeue,
77be155c 1612 .peek = qdisc_peek_dequeued,
1da177e4
LT
1613 .drop = htb_drop,
1614 .init = htb_init,
1615 .reset = htb_reset,
1616 .destroy = htb_destroy,
1da177e4
LT
1617 .dump = htb_dump,
1618 .owner = THIS_MODULE,
1619};
1620
1621static int __init htb_module_init(void)
1622{
87990467 1623 return register_qdisc(&htb_qdisc_ops);
1da177e4 1624}
87990467 1625static void __exit htb_module_exit(void)
1da177e4 1626{
87990467 1627 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1628}
87990467 1629
1da177e4
LT
1630module_init(htb_module_init)
1631module_exit(htb_module_exit)
1632MODULE_LICENSE("GPL");
This page took 1.063198 seconds and 5 git commands to generate.