pkt_sched: sch_hfsc: sch_htb: Add non-work-conserving warning handler.
[deliverable/linux.git] / net / sched / sch_htb.c
1 /*
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
14 * Ondrej Kraus, <krauso@barr.cz>
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
27 */
28 #include <linux/module.h>
29 #include <linux/moduleparam.h>
30 #include <linux/types.h>
31 #include <linux/kernel.h>
32 #include <linux/string.h>
33 #include <linux/errno.h>
34 #include <linux/skbuff.h>
35 #include <linux/list.h>
36 #include <linux/compiler.h>
37 #include <linux/rbtree.h>
38 #include <net/netlink.h>
39 #include <net/pkt_sched.h>
40
41 /* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
45 it allows to assign priority to each class in hierarchy.
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
49 Each class is assigned level. Leaf has ALWAYS level 0 and root
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 /* used internaly to keep status of single class */
66 enum htb_cmode {
67 HTB_CANT_SEND, /* class can't send and can't borrow */
68 HTB_MAY_BORROW, /* class can't send but may borrow */
69 HTB_CAN_SEND /* class can send */
70 };
71
72 /* interior & leaf nodes; props specific to leaves are marked L: */
73 struct htb_class {
74 struct Qdisc_class_common common;
75 /* general class parameters */
76 struct gnet_stats_basic bstats;
77 struct gnet_stats_queue qstats;
78 struct gnet_stats_rate_est rate_est;
79 struct tc_htb_xstats xstats; /* our special stats */
80 int refcnt; /* usage count of this class */
81
82 /* topology */
83 int level; /* our level (see above) */
84 unsigned int children;
85 struct htb_class *parent; /* parent class */
86
87 int prio; /* these two are used only by leaves... */
88 int quantum; /* but stored for parent-to-leaf return */
89
90 union {
91 struct htb_class_leaf {
92 struct Qdisc *q;
93 int deficit[TC_HTB_MAXDEPTH];
94 struct list_head drop_list;
95 } leaf;
96 struct htb_class_inner {
97 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
98 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
99 /* When class changes from state 1->2 and disconnects from
100 parent's feed then we lost ptr value and start from the
101 first child again. Here we store classid of the
102 last valid ptr (used when ptr is NULL). */
103 u32 last_ptr_id[TC_HTB_NUMPRIO];
104 } inner;
105 } un;
106 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
107 struct rb_node pq_node; /* node for event queue */
108 psched_time_t pq_key;
109
110 int prio_activity; /* for which prios are we active */
111 enum htb_cmode cmode; /* current mode of the class */
112
113 /* class attached filters */
114 struct tcf_proto *filter_list;
115 int filter_cnt;
116
117 /* token bucket parameters */
118 struct qdisc_rate_table *rate; /* rate table of the class itself */
119 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
120 long buffer, cbuffer; /* token bucket depth/rate */
121 psched_tdiff_t mbuffer; /* max wait time */
122 long tokens, ctokens; /* current number of tokens */
123 psched_time_t t_c; /* checkpoint time */
124 };
125
126 struct htb_sched {
127 struct Qdisc_class_hash clhash;
128 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
129
130 /* self list - roots of self generating tree */
131 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
132 int row_mask[TC_HTB_MAXDEPTH];
133 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
134 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
135
136 /* self wait list - roots of wait PQs per row */
137 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
138
139 /* time of nearest event per level (row) */
140 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
141
142 int defcls; /* class where unclassified flows go to */
143
144 /* filters for qdisc itself */
145 struct tcf_proto *filter_list;
146
147 int rate2quantum; /* quant = rate / rate2quantum */
148 psched_time_t now; /* cached dequeue time */
149 struct qdisc_watchdog watchdog;
150
151 /* non shaped skbs; let them go directly thru */
152 struct sk_buff_head direct_queue;
153 int direct_qlen; /* max qlen of above */
154
155 long direct_pkts;
156 };
157
158 /* find class in global hash table using given handle */
159 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
160 {
161 struct htb_sched *q = qdisc_priv(sch);
162 struct Qdisc_class_common *clc;
163
164 clc = qdisc_class_find(&q->clhash, handle);
165 if (clc == NULL)
166 return NULL;
167 return container_of(clc, struct htb_class, common);
168 }
169
170 /**
171 * htb_classify - classify a packet into class
172 *
173 * It returns NULL if the packet should be dropped or -1 if the packet
174 * should be passed directly thru. In all other cases leaf class is returned.
175 * We allow direct class selection by classid in priority. The we examine
176 * filters in qdisc and in inner nodes (if higher filter points to the inner
177 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
178 * internal fifo (direct). These packets then go directly thru. If we still
179 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
180 * then finish and return direct queue.
181 */
182 #define HTB_DIRECT (struct htb_class*)-1
183
184 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
185 int *qerr)
186 {
187 struct htb_sched *q = qdisc_priv(sch);
188 struct htb_class *cl;
189 struct tcf_result res;
190 struct tcf_proto *tcf;
191 int result;
192
193 /* allow to select class by setting skb->priority to valid classid;
194 note that nfmark can be used too by attaching filter fw with no
195 rules in it */
196 if (skb->priority == sch->handle)
197 return HTB_DIRECT; /* X:0 (direct flow) selected */
198 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
199 return cl;
200
201 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
202 tcf = q->filter_list;
203 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
204 #ifdef CONFIG_NET_CLS_ACT
205 switch (result) {
206 case TC_ACT_QUEUED:
207 case TC_ACT_STOLEN:
208 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
209 case TC_ACT_SHOT:
210 return NULL;
211 }
212 #endif
213 if ((cl = (void *)res.class) == NULL) {
214 if (res.classid == sch->handle)
215 return HTB_DIRECT; /* X:0 (direct flow) */
216 if ((cl = htb_find(res.classid, sch)) == NULL)
217 break; /* filter selected invalid classid */
218 }
219 if (!cl->level)
220 return cl; /* we hit leaf; return it */
221
222 /* we have got inner class; apply inner filter chain */
223 tcf = cl->filter_list;
224 }
225 /* classification failed; try to use default class */
226 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
227 if (!cl || cl->level)
228 return HTB_DIRECT; /* bad default .. this is safe bet */
229 return cl;
230 }
231
232 /**
233 * htb_add_to_id_tree - adds class to the round robin list
234 *
235 * Routine adds class to the list (actually tree) sorted by classid.
236 * Make sure that class is not already on such list for given prio.
237 */
238 static void htb_add_to_id_tree(struct rb_root *root,
239 struct htb_class *cl, int prio)
240 {
241 struct rb_node **p = &root->rb_node, *parent = NULL;
242
243 while (*p) {
244 struct htb_class *c;
245 parent = *p;
246 c = rb_entry(parent, struct htb_class, node[prio]);
247
248 if (cl->common.classid > c->common.classid)
249 p = &parent->rb_right;
250 else
251 p = &parent->rb_left;
252 }
253 rb_link_node(&cl->node[prio], parent, p);
254 rb_insert_color(&cl->node[prio], root);
255 }
256
257 /**
258 * htb_add_to_wait_tree - adds class to the event queue with delay
259 *
260 * The class is added to priority event queue to indicate that class will
261 * change its mode in cl->pq_key microseconds. Make sure that class is not
262 * already in the queue.
263 */
264 static void htb_add_to_wait_tree(struct htb_sched *q,
265 struct htb_class *cl, long delay)
266 {
267 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
268
269 cl->pq_key = q->now + delay;
270 if (cl->pq_key == q->now)
271 cl->pq_key++;
272
273 /* update the nearest event cache */
274 if (q->near_ev_cache[cl->level] > cl->pq_key)
275 q->near_ev_cache[cl->level] = cl->pq_key;
276
277 while (*p) {
278 struct htb_class *c;
279 parent = *p;
280 c = rb_entry(parent, struct htb_class, pq_node);
281 if (cl->pq_key >= c->pq_key)
282 p = &parent->rb_right;
283 else
284 p = &parent->rb_left;
285 }
286 rb_link_node(&cl->pq_node, parent, p);
287 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
288 }
289
290 /**
291 * htb_next_rb_node - finds next node in binary tree
292 *
293 * When we are past last key we return NULL.
294 * Average complexity is 2 steps per call.
295 */
296 static inline void htb_next_rb_node(struct rb_node **n)
297 {
298 *n = rb_next(*n);
299 }
300
301 /**
302 * htb_add_class_to_row - add class to its row
303 *
304 * The class is added to row at priorities marked in mask.
305 * It does nothing if mask == 0.
306 */
307 static inline void htb_add_class_to_row(struct htb_sched *q,
308 struct htb_class *cl, int mask)
309 {
310 q->row_mask[cl->level] |= mask;
311 while (mask) {
312 int prio = ffz(~mask);
313 mask &= ~(1 << prio);
314 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
315 }
316 }
317
318 /* If this triggers, it is a bug in this code, but it need not be fatal */
319 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
320 {
321 if (RB_EMPTY_NODE(rb)) {
322 WARN_ON(1);
323 } else {
324 rb_erase(rb, root);
325 RB_CLEAR_NODE(rb);
326 }
327 }
328
329
330 /**
331 * htb_remove_class_from_row - removes class from its row
332 *
333 * The class is removed from row at priorities marked in mask.
334 * It does nothing if mask == 0.
335 */
336 static inline void htb_remove_class_from_row(struct htb_sched *q,
337 struct htb_class *cl, int mask)
338 {
339 int m = 0;
340
341 while (mask) {
342 int prio = ffz(~mask);
343
344 mask &= ~(1 << prio);
345 if (q->ptr[cl->level][prio] == cl->node + prio)
346 htb_next_rb_node(q->ptr[cl->level] + prio);
347
348 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
349 if (!q->row[cl->level][prio].rb_node)
350 m |= 1 << prio;
351 }
352 q->row_mask[cl->level] &= ~m;
353 }
354
355 /**
356 * htb_activate_prios - creates active classe's feed chain
357 *
358 * The class is connected to ancestors and/or appropriate rows
359 * for priorities it is participating on. cl->cmode must be new
360 * (activated) mode. It does nothing if cl->prio_activity == 0.
361 */
362 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
363 {
364 struct htb_class *p = cl->parent;
365 long m, mask = cl->prio_activity;
366
367 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
368 m = mask;
369 while (m) {
370 int prio = ffz(~m);
371 m &= ~(1 << prio);
372
373 if (p->un.inner.feed[prio].rb_node)
374 /* parent already has its feed in use so that
375 reset bit in mask as parent is already ok */
376 mask &= ~(1 << prio);
377
378 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
379 }
380 p->prio_activity |= mask;
381 cl = p;
382 p = cl->parent;
383
384 }
385 if (cl->cmode == HTB_CAN_SEND && mask)
386 htb_add_class_to_row(q, cl, mask);
387 }
388
389 /**
390 * htb_deactivate_prios - remove class from feed chain
391 *
392 * cl->cmode must represent old mode (before deactivation). It does
393 * nothing if cl->prio_activity == 0. Class is removed from all feed
394 * chains and rows.
395 */
396 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
397 {
398 struct htb_class *p = cl->parent;
399 long m, mask = cl->prio_activity;
400
401 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
402 m = mask;
403 mask = 0;
404 while (m) {
405 int prio = ffz(~m);
406 m &= ~(1 << prio);
407
408 if (p->un.inner.ptr[prio] == cl->node + prio) {
409 /* we are removing child which is pointed to from
410 parent feed - forget the pointer but remember
411 classid */
412 p->un.inner.last_ptr_id[prio] = cl->common.classid;
413 p->un.inner.ptr[prio] = NULL;
414 }
415
416 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
417
418 if (!p->un.inner.feed[prio].rb_node)
419 mask |= 1 << prio;
420 }
421
422 p->prio_activity &= ~mask;
423 cl = p;
424 p = cl->parent;
425
426 }
427 if (cl->cmode == HTB_CAN_SEND && mask)
428 htb_remove_class_from_row(q, cl, mask);
429 }
430
431 static inline long htb_lowater(const struct htb_class *cl)
432 {
433 if (htb_hysteresis)
434 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
435 else
436 return 0;
437 }
438 static inline long htb_hiwater(const struct htb_class *cl)
439 {
440 if (htb_hysteresis)
441 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
442 else
443 return 0;
444 }
445
446
447 /**
448 * htb_class_mode - computes and returns current class mode
449 *
450 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
451 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
452 * from now to time when cl will change its state.
453 * Also it is worth to note that class mode doesn't change simply
454 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
455 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
456 * mode transitions per time unit. The speed gain is about 1/6.
457 */
458 static inline enum htb_cmode
459 htb_class_mode(struct htb_class *cl, long *diff)
460 {
461 long toks;
462
463 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
464 *diff = -toks;
465 return HTB_CANT_SEND;
466 }
467
468 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
469 return HTB_CAN_SEND;
470
471 *diff = -toks;
472 return HTB_MAY_BORROW;
473 }
474
475 /**
476 * htb_change_class_mode - changes classe's mode
477 *
478 * This should be the only way how to change classe's mode under normal
479 * cirsumstances. Routine will update feed lists linkage, change mode
480 * and add class to the wait event queue if appropriate. New mode should
481 * be different from old one and cl->pq_key has to be valid if changing
482 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
483 */
484 static void
485 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
486 {
487 enum htb_cmode new_mode = htb_class_mode(cl, diff);
488
489 if (new_mode == cl->cmode)
490 return;
491
492 if (cl->prio_activity) { /* not necessary: speed optimization */
493 if (cl->cmode != HTB_CANT_SEND)
494 htb_deactivate_prios(q, cl);
495 cl->cmode = new_mode;
496 if (new_mode != HTB_CANT_SEND)
497 htb_activate_prios(q, cl);
498 } else
499 cl->cmode = new_mode;
500 }
501
502 /**
503 * htb_activate - inserts leaf cl into appropriate active feeds
504 *
505 * Routine learns (new) priority of leaf and activates feed chain
506 * for the prio. It can be called on already active leaf safely.
507 * It also adds leaf into droplist.
508 */
509 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
510 {
511 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
512
513 if (!cl->prio_activity) {
514 cl->prio_activity = 1 << cl->prio;
515 htb_activate_prios(q, cl);
516 list_add_tail(&cl->un.leaf.drop_list,
517 q->drops + cl->prio);
518 }
519 }
520
521 /**
522 * htb_deactivate - remove leaf cl from active feeds
523 *
524 * Make sure that leaf is active. In the other words it can't be called
525 * with non-active leaf. It also removes class from the drop list.
526 */
527 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
528 {
529 WARN_ON(!cl->prio_activity);
530
531 htb_deactivate_prios(q, cl);
532 cl->prio_activity = 0;
533 list_del_init(&cl->un.leaf.drop_list);
534 }
535
536 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
537 {
538 int uninitialized_var(ret);
539 struct htb_sched *q = qdisc_priv(sch);
540 struct htb_class *cl = htb_classify(skb, sch, &ret);
541
542 if (cl == HTB_DIRECT) {
543 /* enqueue to helper queue */
544 if (q->direct_queue.qlen < q->direct_qlen) {
545 __skb_queue_tail(&q->direct_queue, skb);
546 q->direct_pkts++;
547 } else {
548 kfree_skb(skb);
549 sch->qstats.drops++;
550 return NET_XMIT_DROP;
551 }
552 #ifdef CONFIG_NET_CLS_ACT
553 } else if (!cl) {
554 if (ret & __NET_XMIT_BYPASS)
555 sch->qstats.drops++;
556 kfree_skb(skb);
557 return ret;
558 #endif
559 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
560 if (net_xmit_drop_count(ret)) {
561 sch->qstats.drops++;
562 cl->qstats.drops++;
563 }
564 return ret;
565 } else {
566 cl->bstats.packets +=
567 skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
568 cl->bstats.bytes += qdisc_pkt_len(skb);
569 htb_activate(q, cl);
570 }
571
572 sch->q.qlen++;
573 sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
574 sch->bstats.bytes += qdisc_pkt_len(skb);
575 return NET_XMIT_SUCCESS;
576 }
577
578 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
579 {
580 long toks = diff + cl->tokens;
581
582 if (toks > cl->buffer)
583 toks = cl->buffer;
584 toks -= (long) qdisc_l2t(cl->rate, bytes);
585 if (toks <= -cl->mbuffer)
586 toks = 1 - cl->mbuffer;
587
588 cl->tokens = toks;
589 }
590
591 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
592 {
593 long toks = diff + cl->ctokens;
594
595 if (toks > cl->cbuffer)
596 toks = cl->cbuffer;
597 toks -= (long) qdisc_l2t(cl->ceil, bytes);
598 if (toks <= -cl->mbuffer)
599 toks = 1 - cl->mbuffer;
600
601 cl->ctokens = toks;
602 }
603
604 /**
605 * htb_charge_class - charges amount "bytes" to leaf and ancestors
606 *
607 * Routine assumes that packet "bytes" long was dequeued from leaf cl
608 * borrowing from "level". It accounts bytes to ceil leaky bucket for
609 * leaf and all ancestors and to rate bucket for ancestors at levels
610 * "level" and higher. It also handles possible change of mode resulting
611 * from the update. Note that mode can also increase here (MAY_BORROW to
612 * CAN_SEND) because we can use more precise clock that event queue here.
613 * In such case we remove class from event queue first.
614 */
615 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
616 int level, struct sk_buff *skb)
617 {
618 int bytes = qdisc_pkt_len(skb);
619 enum htb_cmode old_mode;
620 long diff;
621
622 while (cl) {
623 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
624 if (cl->level >= level) {
625 if (cl->level == level)
626 cl->xstats.lends++;
627 htb_accnt_tokens(cl, bytes, diff);
628 } else {
629 cl->xstats.borrows++;
630 cl->tokens += diff; /* we moved t_c; update tokens */
631 }
632 htb_accnt_ctokens(cl, bytes, diff);
633 cl->t_c = q->now;
634
635 old_mode = cl->cmode;
636 diff = 0;
637 htb_change_class_mode(q, cl, &diff);
638 if (old_mode != cl->cmode) {
639 if (old_mode != HTB_CAN_SEND)
640 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
641 if (cl->cmode != HTB_CAN_SEND)
642 htb_add_to_wait_tree(q, cl, diff);
643 }
644
645 /* update byte stats except for leaves which are already updated */
646 if (cl->level) {
647 cl->bstats.bytes += bytes;
648 cl->bstats.packets += skb_is_gso(skb)?
649 skb_shinfo(skb)->gso_segs:1;
650 }
651 cl = cl->parent;
652 }
653 }
654
655 /**
656 * htb_do_events - make mode changes to classes at the level
657 *
658 * Scans event queue for pending events and applies them. Returns time of
659 * next pending event (0 for no event in pq).
660 * Note: Applied are events whose have cl->pq_key <= q->now.
661 */
662 static psched_time_t htb_do_events(struct htb_sched *q, int level,
663 unsigned long start)
664 {
665 /* don't run for longer than 2 jiffies; 2 is used instead of
666 1 to simplify things when jiffy is going to be incremented
667 too soon */
668 unsigned long stop_at = start + 2;
669 while (time_before(jiffies, stop_at)) {
670 struct htb_class *cl;
671 long diff;
672 struct rb_node *p = rb_first(&q->wait_pq[level]);
673
674 if (!p)
675 return 0;
676
677 cl = rb_entry(p, struct htb_class, pq_node);
678 if (cl->pq_key > q->now)
679 return cl->pq_key;
680
681 htb_safe_rb_erase(p, q->wait_pq + level);
682 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
683 htb_change_class_mode(q, cl, &diff);
684 if (cl->cmode != HTB_CAN_SEND)
685 htb_add_to_wait_tree(q, cl, diff);
686 }
687 /* too much load - let's continue on next jiffie (including above) */
688 return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ;
689 }
690
691 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
692 is no such one exists. */
693 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
694 u32 id)
695 {
696 struct rb_node *r = NULL;
697 while (n) {
698 struct htb_class *cl =
699 rb_entry(n, struct htb_class, node[prio]);
700
701 if (id > cl->common.classid) {
702 n = n->rb_right;
703 } else if (id < cl->common.classid) {
704 r = n;
705 n = n->rb_left;
706 } else {
707 return n;
708 }
709 }
710 return r;
711 }
712
713 /**
714 * htb_lookup_leaf - returns next leaf class in DRR order
715 *
716 * Find leaf where current feed pointers points to.
717 */
718 static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
719 struct rb_node **pptr, u32 * pid)
720 {
721 int i;
722 struct {
723 struct rb_node *root;
724 struct rb_node **pptr;
725 u32 *pid;
726 } stk[TC_HTB_MAXDEPTH], *sp = stk;
727
728 BUG_ON(!tree->rb_node);
729 sp->root = tree->rb_node;
730 sp->pptr = pptr;
731 sp->pid = pid;
732
733 for (i = 0; i < 65535; i++) {
734 if (!*sp->pptr && *sp->pid) {
735 /* ptr was invalidated but id is valid - try to recover
736 the original or next ptr */
737 *sp->pptr =
738 htb_id_find_next_upper(prio, sp->root, *sp->pid);
739 }
740 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
741 can become out of date quickly */
742 if (!*sp->pptr) { /* we are at right end; rewind & go up */
743 *sp->pptr = sp->root;
744 while ((*sp->pptr)->rb_left)
745 *sp->pptr = (*sp->pptr)->rb_left;
746 if (sp > stk) {
747 sp--;
748 if (!*sp->pptr) {
749 WARN_ON(1);
750 return NULL;
751 }
752 htb_next_rb_node(sp->pptr);
753 }
754 } else {
755 struct htb_class *cl;
756 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
757 if (!cl->level)
758 return cl;
759 (++sp)->root = cl->un.inner.feed[prio].rb_node;
760 sp->pptr = cl->un.inner.ptr + prio;
761 sp->pid = cl->un.inner.last_ptr_id + prio;
762 }
763 }
764 WARN_ON(1);
765 return NULL;
766 }
767
768 /* dequeues packet at given priority and level; call only if
769 you are sure that there is active class at prio/level */
770 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
771 int level)
772 {
773 struct sk_buff *skb = NULL;
774 struct htb_class *cl, *start;
775 /* look initial class up in the row */
776 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
777 q->ptr[level] + prio,
778 q->last_ptr_id[level] + prio);
779
780 do {
781 next:
782 if (unlikely(!cl))
783 return NULL;
784
785 /* class can be empty - it is unlikely but can be true if leaf
786 qdisc drops packets in enqueue routine or if someone used
787 graft operation on the leaf since last dequeue;
788 simply deactivate and skip such class */
789 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
790 struct htb_class *next;
791 htb_deactivate(q, cl);
792
793 /* row/level might become empty */
794 if ((q->row_mask[level] & (1 << prio)) == 0)
795 return NULL;
796
797 next = htb_lookup_leaf(q->row[level] + prio,
798 prio, q->ptr[level] + prio,
799 q->last_ptr_id[level] + prio);
800
801 if (cl == start) /* fix start if we just deleted it */
802 start = next;
803 cl = next;
804 goto next;
805 }
806
807 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
808 if (likely(skb != NULL))
809 break;
810
811 qdisc_warn_nonwc("htb", cl->un.leaf.q);
812 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
813 ptr[0]) + prio);
814 cl = htb_lookup_leaf(q->row[level] + prio, prio,
815 q->ptr[level] + prio,
816 q->last_ptr_id[level] + prio);
817
818 } while (cl != start);
819
820 if (likely(skb != NULL)) {
821 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
822 if (cl->un.leaf.deficit[level] < 0) {
823 cl->un.leaf.deficit[level] += cl->quantum;
824 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
825 ptr[0]) + prio);
826 }
827 /* this used to be after charge_class but this constelation
828 gives us slightly better performance */
829 if (!cl->un.leaf.q->q.qlen)
830 htb_deactivate(q, cl);
831 htb_charge_class(q, cl, level, skb);
832 }
833 return skb;
834 }
835
836 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
837 {
838 struct sk_buff *skb = NULL;
839 struct htb_sched *q = qdisc_priv(sch);
840 int level;
841 psched_time_t next_event;
842 unsigned long start_at;
843
844 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
845 skb = __skb_dequeue(&q->direct_queue);
846 if (skb != NULL) {
847 sch->flags &= ~TCQ_F_THROTTLED;
848 sch->q.qlen--;
849 return skb;
850 }
851
852 if (!sch->q.qlen)
853 goto fin;
854 q->now = psched_get_time();
855 start_at = jiffies;
856
857 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
858
859 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
860 /* common case optimization - skip event handler quickly */
861 int m;
862 psched_time_t event;
863
864 if (q->now >= q->near_ev_cache[level]) {
865 event = htb_do_events(q, level, start_at);
866 if (!event)
867 event = q->now + PSCHED_TICKS_PER_SEC;
868 q->near_ev_cache[level] = event;
869 } else
870 event = q->near_ev_cache[level];
871
872 if (next_event > event)
873 next_event = event;
874
875 m = ~q->row_mask[level];
876 while (m != (int)(-1)) {
877 int prio = ffz(m);
878 m |= 1 << prio;
879 skb = htb_dequeue_tree(q, prio, level);
880 if (likely(skb != NULL)) {
881 sch->q.qlen--;
882 sch->flags &= ~TCQ_F_THROTTLED;
883 goto fin;
884 }
885 }
886 }
887 sch->qstats.overlimits++;
888 qdisc_watchdog_schedule(&q->watchdog, next_event);
889 fin:
890 return skb;
891 }
892
893 /* try to drop from each class (by prio) until one succeed */
894 static unsigned int htb_drop(struct Qdisc *sch)
895 {
896 struct htb_sched *q = qdisc_priv(sch);
897 int prio;
898
899 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
900 struct list_head *p;
901 list_for_each(p, q->drops + prio) {
902 struct htb_class *cl = list_entry(p, struct htb_class,
903 un.leaf.drop_list);
904 unsigned int len;
905 if (cl->un.leaf.q->ops->drop &&
906 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
907 sch->q.qlen--;
908 if (!cl->un.leaf.q->q.qlen)
909 htb_deactivate(q, cl);
910 return len;
911 }
912 }
913 }
914 return 0;
915 }
916
917 /* reset all classes */
918 /* always caled under BH & queue lock */
919 static void htb_reset(struct Qdisc *sch)
920 {
921 struct htb_sched *q = qdisc_priv(sch);
922 struct htb_class *cl;
923 struct hlist_node *n;
924 unsigned int i;
925
926 for (i = 0; i < q->clhash.hashsize; i++) {
927 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
928 if (cl->level)
929 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
930 else {
931 if (cl->un.leaf.q)
932 qdisc_reset(cl->un.leaf.q);
933 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
934 }
935 cl->prio_activity = 0;
936 cl->cmode = HTB_CAN_SEND;
937
938 }
939 }
940 qdisc_watchdog_cancel(&q->watchdog);
941 __skb_queue_purge(&q->direct_queue);
942 sch->q.qlen = 0;
943 memset(q->row, 0, sizeof(q->row));
944 memset(q->row_mask, 0, sizeof(q->row_mask));
945 memset(q->wait_pq, 0, sizeof(q->wait_pq));
946 memset(q->ptr, 0, sizeof(q->ptr));
947 for (i = 0; i < TC_HTB_NUMPRIO; i++)
948 INIT_LIST_HEAD(q->drops + i);
949 }
950
951 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
952 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
953 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
954 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
955 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
956 };
957
958 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
959 {
960 struct htb_sched *q = qdisc_priv(sch);
961 struct nlattr *tb[TCA_HTB_INIT + 1];
962 struct tc_htb_glob *gopt;
963 int err;
964 int i;
965
966 if (!opt)
967 return -EINVAL;
968
969 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
970 if (err < 0)
971 return err;
972
973 if (tb[TCA_HTB_INIT] == NULL) {
974 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
975 return -EINVAL;
976 }
977 gopt = nla_data(tb[TCA_HTB_INIT]);
978 if (gopt->version != HTB_VER >> 16) {
979 printk(KERN_ERR
980 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
981 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
982 return -EINVAL;
983 }
984
985 err = qdisc_class_hash_init(&q->clhash);
986 if (err < 0)
987 return err;
988 for (i = 0; i < TC_HTB_NUMPRIO; i++)
989 INIT_LIST_HEAD(q->drops + i);
990
991 qdisc_watchdog_init(&q->watchdog, sch);
992 skb_queue_head_init(&q->direct_queue);
993
994 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
995 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
996 q->direct_qlen = 2;
997
998 if ((q->rate2quantum = gopt->rate2quantum) < 1)
999 q->rate2quantum = 1;
1000 q->defcls = gopt->defcls;
1001
1002 return 0;
1003 }
1004
1005 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1006 {
1007 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1008 struct htb_sched *q = qdisc_priv(sch);
1009 struct nlattr *nest;
1010 struct tc_htb_glob gopt;
1011
1012 spin_lock_bh(root_lock);
1013
1014 gopt.direct_pkts = q->direct_pkts;
1015 gopt.version = HTB_VER;
1016 gopt.rate2quantum = q->rate2quantum;
1017 gopt.defcls = q->defcls;
1018 gopt.debug = 0;
1019
1020 nest = nla_nest_start(skb, TCA_OPTIONS);
1021 if (nest == NULL)
1022 goto nla_put_failure;
1023 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1024 nla_nest_end(skb, nest);
1025
1026 spin_unlock_bh(root_lock);
1027 return skb->len;
1028
1029 nla_put_failure:
1030 spin_unlock_bh(root_lock);
1031 nla_nest_cancel(skb, nest);
1032 return -1;
1033 }
1034
1035 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1036 struct sk_buff *skb, struct tcmsg *tcm)
1037 {
1038 struct htb_class *cl = (struct htb_class *)arg;
1039 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1040 struct nlattr *nest;
1041 struct tc_htb_opt opt;
1042
1043 spin_lock_bh(root_lock);
1044 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1045 tcm->tcm_handle = cl->common.classid;
1046 if (!cl->level && cl->un.leaf.q)
1047 tcm->tcm_info = cl->un.leaf.q->handle;
1048
1049 nest = nla_nest_start(skb, TCA_OPTIONS);
1050 if (nest == NULL)
1051 goto nla_put_failure;
1052
1053 memset(&opt, 0, sizeof(opt));
1054
1055 opt.rate = cl->rate->rate;
1056 opt.buffer = cl->buffer;
1057 opt.ceil = cl->ceil->rate;
1058 opt.cbuffer = cl->cbuffer;
1059 opt.quantum = cl->quantum;
1060 opt.prio = cl->prio;
1061 opt.level = cl->level;
1062 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1063
1064 nla_nest_end(skb, nest);
1065 spin_unlock_bh(root_lock);
1066 return skb->len;
1067
1068 nla_put_failure:
1069 spin_unlock_bh(root_lock);
1070 nla_nest_cancel(skb, nest);
1071 return -1;
1072 }
1073
1074 static int
1075 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1076 {
1077 struct htb_class *cl = (struct htb_class *)arg;
1078
1079 if (!cl->level && cl->un.leaf.q)
1080 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1081 cl->xstats.tokens = cl->tokens;
1082 cl->xstats.ctokens = cl->ctokens;
1083
1084 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1085 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1086 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1087 return -1;
1088
1089 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1090 }
1091
1092 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1093 struct Qdisc **old)
1094 {
1095 struct htb_class *cl = (struct htb_class *)arg;
1096
1097 if (cl && !cl->level) {
1098 if (new == NULL &&
1099 (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1100 &pfifo_qdisc_ops,
1101 cl->common.classid))
1102 == NULL)
1103 return -ENOBUFS;
1104 sch_tree_lock(sch);
1105 *old = cl->un.leaf.q;
1106 cl->un.leaf.q = new;
1107 if (*old != NULL) {
1108 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1109 qdisc_reset(*old);
1110 }
1111 sch_tree_unlock(sch);
1112 return 0;
1113 }
1114 return -ENOENT;
1115 }
1116
1117 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1118 {
1119 struct htb_class *cl = (struct htb_class *)arg;
1120 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1121 }
1122
1123 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1124 {
1125 struct htb_class *cl = (struct htb_class *)arg;
1126
1127 if (cl->un.leaf.q->q.qlen == 0)
1128 htb_deactivate(qdisc_priv(sch), cl);
1129 }
1130
1131 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1132 {
1133 struct htb_class *cl = htb_find(classid, sch);
1134 if (cl)
1135 cl->refcnt++;
1136 return (unsigned long)cl;
1137 }
1138
1139 static inline int htb_parent_last_child(struct htb_class *cl)
1140 {
1141 if (!cl->parent)
1142 /* the root class */
1143 return 0;
1144 if (cl->parent->children > 1)
1145 /* not the last child */
1146 return 0;
1147 return 1;
1148 }
1149
1150 static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1151 struct Qdisc *new_q)
1152 {
1153 struct htb_class *parent = cl->parent;
1154
1155 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1156
1157 if (parent->cmode != HTB_CAN_SEND)
1158 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1159
1160 parent->level = 0;
1161 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1162 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1163 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1164 parent->tokens = parent->buffer;
1165 parent->ctokens = parent->cbuffer;
1166 parent->t_c = psched_get_time();
1167 parent->cmode = HTB_CAN_SEND;
1168 }
1169
1170 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1171 {
1172 if (!cl->level) {
1173 WARN_ON(!cl->un.leaf.q);
1174 qdisc_destroy(cl->un.leaf.q);
1175 }
1176 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1177 qdisc_put_rtab(cl->rate);
1178 qdisc_put_rtab(cl->ceil);
1179
1180 tcf_destroy_chain(&cl->filter_list);
1181 kfree(cl);
1182 }
1183
1184 /* always caled under BH & queue lock */
1185 static void htb_destroy(struct Qdisc *sch)
1186 {
1187 struct htb_sched *q = qdisc_priv(sch);
1188 struct hlist_node *n, *next;
1189 struct htb_class *cl;
1190 unsigned int i;
1191
1192 qdisc_watchdog_cancel(&q->watchdog);
1193 /* This line used to be after htb_destroy_class call below
1194 and surprisingly it worked in 2.4. But it must precede it
1195 because filter need its target class alive to be able to call
1196 unbind_filter on it (without Oops). */
1197 tcf_destroy_chain(&q->filter_list);
1198
1199 for (i = 0; i < q->clhash.hashsize; i++) {
1200 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1201 tcf_destroy_chain(&cl->filter_list);
1202 }
1203 for (i = 0; i < q->clhash.hashsize; i++) {
1204 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1205 common.hnode)
1206 htb_destroy_class(sch, cl);
1207 }
1208 qdisc_class_hash_destroy(&q->clhash);
1209 __skb_queue_purge(&q->direct_queue);
1210 }
1211
1212 static int htb_delete(struct Qdisc *sch, unsigned long arg)
1213 {
1214 struct htb_sched *q = qdisc_priv(sch);
1215 struct htb_class *cl = (struct htb_class *)arg;
1216 unsigned int qlen;
1217 struct Qdisc *new_q = NULL;
1218 int last_child = 0;
1219
1220 // TODO: why don't allow to delete subtree ? references ? does
1221 // tc subsys quarantee us that in htb_destroy it holds no class
1222 // refs so that we can remove children safely there ?
1223 if (cl->children || cl->filter_cnt)
1224 return -EBUSY;
1225
1226 if (!cl->level && htb_parent_last_child(cl)) {
1227 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1228 &pfifo_qdisc_ops,
1229 cl->parent->common.classid);
1230 last_child = 1;
1231 }
1232
1233 sch_tree_lock(sch);
1234
1235 if (!cl->level) {
1236 qlen = cl->un.leaf.q->q.qlen;
1237 qdisc_reset(cl->un.leaf.q);
1238 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1239 }
1240
1241 /* delete from hash and active; remainder in destroy_class */
1242 qdisc_class_hash_remove(&q->clhash, &cl->common);
1243 if (cl->parent)
1244 cl->parent->children--;
1245
1246 if (cl->prio_activity)
1247 htb_deactivate(q, cl);
1248
1249 if (cl->cmode != HTB_CAN_SEND)
1250 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1251
1252 if (last_child)
1253 htb_parent_to_leaf(q, cl, new_q);
1254
1255 if (--cl->refcnt == 0)
1256 htb_destroy_class(sch, cl);
1257
1258 sch_tree_unlock(sch);
1259 return 0;
1260 }
1261
1262 static void htb_put(struct Qdisc *sch, unsigned long arg)
1263 {
1264 struct htb_class *cl = (struct htb_class *)arg;
1265
1266 if (--cl->refcnt == 0)
1267 htb_destroy_class(sch, cl);
1268 }
1269
1270 static int htb_change_class(struct Qdisc *sch, u32 classid,
1271 u32 parentid, struct nlattr **tca,
1272 unsigned long *arg)
1273 {
1274 int err = -EINVAL;
1275 struct htb_sched *q = qdisc_priv(sch);
1276 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1277 struct nlattr *opt = tca[TCA_OPTIONS];
1278 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1279 struct nlattr *tb[TCA_HTB_RTAB + 1];
1280 struct tc_htb_opt *hopt;
1281
1282 /* extract all subattrs from opt attr */
1283 if (!opt)
1284 goto failure;
1285
1286 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
1287 if (err < 0)
1288 goto failure;
1289
1290 err = -EINVAL;
1291 if (tb[TCA_HTB_PARMS] == NULL)
1292 goto failure;
1293
1294 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1295
1296 hopt = nla_data(tb[TCA_HTB_PARMS]);
1297
1298 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1299 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1300 if (!rtab || !ctab)
1301 goto failure;
1302
1303 if (!cl) { /* new class */
1304 struct Qdisc *new_q;
1305 int prio;
1306 struct {
1307 struct nlattr nla;
1308 struct gnet_estimator opt;
1309 } est = {
1310 .nla = {
1311 .nla_len = nla_attr_size(sizeof(est.opt)),
1312 .nla_type = TCA_RATE,
1313 },
1314 .opt = {
1315 /* 4s interval, 16s averaging constant */
1316 .interval = 2,
1317 .ewma_log = 2,
1318 },
1319 };
1320
1321 /* check for valid classid */
1322 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1323 || htb_find(classid, sch))
1324 goto failure;
1325
1326 /* check maximal depth */
1327 if (parent && parent->parent && parent->parent->level < 2) {
1328 printk(KERN_ERR "htb: tree is too deep\n");
1329 goto failure;
1330 }
1331 err = -ENOBUFS;
1332 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1333 goto failure;
1334
1335 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1336 qdisc_root_sleeping_lock(sch),
1337 tca[TCA_RATE] ? : &est.nla);
1338 if (err) {
1339 kfree(cl);
1340 goto failure;
1341 }
1342
1343 cl->refcnt = 1;
1344 cl->children = 0;
1345 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1346 RB_CLEAR_NODE(&cl->pq_node);
1347
1348 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1349 RB_CLEAR_NODE(&cl->node[prio]);
1350
1351 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1352 so that can't be used inside of sch_tree_lock
1353 -- thanks to Karlis Peisenieks */
1354 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1355 &pfifo_qdisc_ops, classid);
1356 sch_tree_lock(sch);
1357 if (parent && !parent->level) {
1358 unsigned int qlen = parent->un.leaf.q->q.qlen;
1359
1360 /* turn parent into inner node */
1361 qdisc_reset(parent->un.leaf.q);
1362 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1363 qdisc_destroy(parent->un.leaf.q);
1364 if (parent->prio_activity)
1365 htb_deactivate(q, parent);
1366
1367 /* remove from evt list because of level change */
1368 if (parent->cmode != HTB_CAN_SEND) {
1369 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1370 parent->cmode = HTB_CAN_SEND;
1371 }
1372 parent->level = (parent->parent ? parent->parent->level
1373 : TC_HTB_MAXDEPTH) - 1;
1374 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1375 }
1376 /* leaf (we) needs elementary qdisc */
1377 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1378
1379 cl->common.classid = classid;
1380 cl->parent = parent;
1381
1382 /* set class to be in HTB_CAN_SEND state */
1383 cl->tokens = hopt->buffer;
1384 cl->ctokens = hopt->cbuffer;
1385 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
1386 cl->t_c = psched_get_time();
1387 cl->cmode = HTB_CAN_SEND;
1388
1389 /* attach to the hash list and parent's family */
1390 qdisc_class_hash_insert(&q->clhash, &cl->common);
1391 if (parent)
1392 parent->children++;
1393 } else {
1394 if (tca[TCA_RATE]) {
1395 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1396 qdisc_root_sleeping_lock(sch),
1397 tca[TCA_RATE]);
1398 if (err)
1399 return err;
1400 }
1401 sch_tree_lock(sch);
1402 }
1403
1404 /* it used to be a nasty bug here, we have to check that node
1405 is really leaf before changing cl->un.leaf ! */
1406 if (!cl->level) {
1407 cl->quantum = rtab->rate.rate / q->rate2quantum;
1408 if (!hopt->quantum && cl->quantum < 1000) {
1409 printk(KERN_WARNING
1410 "HTB: quantum of class %X is small. Consider r2q change.\n",
1411 cl->common.classid);
1412 cl->quantum = 1000;
1413 }
1414 if (!hopt->quantum && cl->quantum > 200000) {
1415 printk(KERN_WARNING
1416 "HTB: quantum of class %X is big. Consider r2q change.\n",
1417 cl->common.classid);
1418 cl->quantum = 200000;
1419 }
1420 if (hopt->quantum)
1421 cl->quantum = hopt->quantum;
1422 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1423 cl->prio = TC_HTB_NUMPRIO - 1;
1424 }
1425
1426 cl->buffer = hopt->buffer;
1427 cl->cbuffer = hopt->cbuffer;
1428 if (cl->rate)
1429 qdisc_put_rtab(cl->rate);
1430 cl->rate = rtab;
1431 if (cl->ceil)
1432 qdisc_put_rtab(cl->ceil);
1433 cl->ceil = ctab;
1434 sch_tree_unlock(sch);
1435
1436 qdisc_class_hash_grow(sch, &q->clhash);
1437
1438 *arg = (unsigned long)cl;
1439 return 0;
1440
1441 failure:
1442 if (rtab)
1443 qdisc_put_rtab(rtab);
1444 if (ctab)
1445 qdisc_put_rtab(ctab);
1446 return err;
1447 }
1448
1449 static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1450 {
1451 struct htb_sched *q = qdisc_priv(sch);
1452 struct htb_class *cl = (struct htb_class *)arg;
1453 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1454
1455 return fl;
1456 }
1457
1458 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1459 u32 classid)
1460 {
1461 struct htb_class *cl = htb_find(classid, sch);
1462
1463 /*if (cl && !cl->level) return 0;
1464 The line above used to be there to prevent attaching filters to
1465 leaves. But at least tc_index filter uses this just to get class
1466 for other reasons so that we have to allow for it.
1467 ----
1468 19.6.2002 As Werner explained it is ok - bind filter is just
1469 another way to "lock" the class - unlike "get" this lock can
1470 be broken by class during destroy IIUC.
1471 */
1472 if (cl)
1473 cl->filter_cnt++;
1474 return (unsigned long)cl;
1475 }
1476
1477 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1478 {
1479 struct htb_class *cl = (struct htb_class *)arg;
1480
1481 if (cl)
1482 cl->filter_cnt--;
1483 }
1484
1485 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1486 {
1487 struct htb_sched *q = qdisc_priv(sch);
1488 struct htb_class *cl;
1489 struct hlist_node *n;
1490 unsigned int i;
1491
1492 if (arg->stop)
1493 return;
1494
1495 for (i = 0; i < q->clhash.hashsize; i++) {
1496 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1497 if (arg->count < arg->skip) {
1498 arg->count++;
1499 continue;
1500 }
1501 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1502 arg->stop = 1;
1503 return;
1504 }
1505 arg->count++;
1506 }
1507 }
1508 }
1509
1510 static const struct Qdisc_class_ops htb_class_ops = {
1511 .graft = htb_graft,
1512 .leaf = htb_leaf,
1513 .qlen_notify = htb_qlen_notify,
1514 .get = htb_get,
1515 .put = htb_put,
1516 .change = htb_change_class,
1517 .delete = htb_delete,
1518 .walk = htb_walk,
1519 .tcf_chain = htb_find_tcf,
1520 .bind_tcf = htb_bind_filter,
1521 .unbind_tcf = htb_unbind_filter,
1522 .dump = htb_dump_class,
1523 .dump_stats = htb_dump_class_stats,
1524 };
1525
1526 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1527 .next = NULL,
1528 .cl_ops = &htb_class_ops,
1529 .id = "htb",
1530 .priv_size = sizeof(struct htb_sched),
1531 .enqueue = htb_enqueue,
1532 .dequeue = htb_dequeue,
1533 .peek = qdisc_peek_dequeued,
1534 .drop = htb_drop,
1535 .init = htb_init,
1536 .reset = htb_reset,
1537 .destroy = htb_destroy,
1538 .change = NULL /* htb_change */,
1539 .dump = htb_dump,
1540 .owner = THIS_MODULE,
1541 };
1542
1543 static int __init htb_module_init(void)
1544 {
1545 return register_qdisc(&htb_qdisc_ops);
1546 }
1547 static void __exit htb_module_exit(void)
1548 {
1549 unregister_qdisc(&htb_qdisc_ops);
1550 }
1551
1552 module_init(htb_module_init)
1553 module_exit(htb_module_exit)
1554 MODULE_LICENSE("GPL");
This page took 0.064712 seconds and 5 git commands to generate.