Merge branches 'for-3.9/sony' and 'for-3.9/steelseries' into for-linus
[deliverable/linux.git] / net / sched / sch_htb.c
1 /*
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
14 * Ondrej Kraus, <krauso@barr.cz>
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
27 */
28 #include <linux/module.h>
29 #include <linux/moduleparam.h>
30 #include <linux/types.h>
31 #include <linux/kernel.h>
32 #include <linux/string.h>
33 #include <linux/errno.h>
34 #include <linux/skbuff.h>
35 #include <linux/list.h>
36 #include <linux/compiler.h>
37 #include <linux/rbtree.h>
38 #include <linux/workqueue.h>
39 #include <linux/slab.h>
40 #include <net/netlink.h>
41 #include <net/pkt_sched.h>
42
43 /* HTB algorithm.
44 Author: devik@cdi.cz
45 ========================================================================
46 HTB is like TBF with multiple classes. It is also similar to CBQ because
47 it allows to assign priority to each class in hierarchy.
48 In fact it is another implementation of Floyd's formal sharing.
49
50 Levels:
51 Each class is assigned level. Leaf has ALWAYS level 0 and root
52 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
53 one less than their parent.
54 */
55
56 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
57 #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
58
59 #if HTB_VER >> 16 != TC_HTB_PROTOVER
60 #error "Mismatched sch_htb.c and pkt_sch.h"
61 #endif
62
63 /* Module parameter and sysfs export */
64 module_param (htb_hysteresis, int, 0640);
65 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
66
67 /* used internaly to keep status of single class */
68 enum htb_cmode {
69 HTB_CANT_SEND, /* class can't send and can't borrow */
70 HTB_MAY_BORROW, /* class can't send but may borrow */
71 HTB_CAN_SEND /* class can send */
72 };
73
74 struct htb_rate_cfg {
75 u64 rate_bps;
76 u32 mult;
77 u32 shift;
78 };
79
80 /* interior & leaf nodes; props specific to leaves are marked L: */
81 struct htb_class {
82 struct Qdisc_class_common common;
83 /* general class parameters */
84 struct gnet_stats_basic_packed bstats;
85 struct gnet_stats_queue qstats;
86 struct gnet_stats_rate_est rate_est;
87 struct tc_htb_xstats xstats; /* our special stats */
88 int refcnt; /* usage count of this class */
89
90 /* topology */
91 int level; /* our level (see above) */
92 unsigned int children;
93 struct htb_class *parent; /* parent class */
94
95 int prio; /* these two are used only by leaves... */
96 int quantum; /* but stored for parent-to-leaf return */
97
98 union {
99 struct htb_class_leaf {
100 struct Qdisc *q;
101 int deficit[TC_HTB_MAXDEPTH];
102 struct list_head drop_list;
103 } leaf;
104 struct htb_class_inner {
105 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
106 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
107 /* When class changes from state 1->2 and disconnects from
108 * parent's feed then we lost ptr value and start from the
109 * first child again. Here we store classid of the
110 * last valid ptr (used when ptr is NULL).
111 */
112 u32 last_ptr_id[TC_HTB_NUMPRIO];
113 } inner;
114 } un;
115 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
116 struct rb_node pq_node; /* node for event queue */
117 psched_time_t pq_key;
118
119 int prio_activity; /* for which prios are we active */
120 enum htb_cmode cmode; /* current mode of the class */
121
122 /* class attached filters */
123 struct tcf_proto *filter_list;
124 int filter_cnt;
125
126 /* token bucket parameters */
127 struct htb_rate_cfg rate;
128 struct htb_rate_cfg ceil;
129 s64 buffer, cbuffer; /* token bucket depth/rate */
130 psched_tdiff_t mbuffer; /* max wait time */
131 s64 tokens, ctokens; /* current number of tokens */
132 psched_time_t t_c; /* checkpoint time */
133 };
134
135 struct htb_sched {
136 struct Qdisc_class_hash clhash;
137 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
138
139 /* self list - roots of self generating tree */
140 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
141 int row_mask[TC_HTB_MAXDEPTH];
142 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
143 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
144
145 /* self wait list - roots of wait PQs per row */
146 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
147
148 /* time of nearest event per level (row) */
149 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
150
151 int defcls; /* class where unclassified flows go to */
152
153 /* filters for qdisc itself */
154 struct tcf_proto *filter_list;
155
156 int rate2quantum; /* quant = rate / rate2quantum */
157 psched_time_t now; /* cached dequeue time */
158 struct qdisc_watchdog watchdog;
159
160 /* non shaped skbs; let them go directly thru */
161 struct sk_buff_head direct_queue;
162 int direct_qlen; /* max qlen of above */
163
164 long direct_pkts;
165
166 #define HTB_WARN_TOOMANYEVENTS 0x1
167 unsigned int warned; /* only one warning */
168 struct work_struct work;
169 };
170
171 static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len)
172 {
173 return ((u64)len * r->mult) >> r->shift;
174 }
175
176 static void htb_precompute_ratedata(struct htb_rate_cfg *r)
177 {
178 u64 factor;
179 u64 mult;
180 int shift;
181
182 r->shift = 0;
183 r->mult = 1;
184 /*
185 * Calibrate mult, shift so that token counting is accurate
186 * for smallest packet size (64 bytes). Token (time in ns) is
187 * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will
188 * work as long as the smallest packet transfer time can be
189 * accurately represented in nanosec.
190 */
191 if (r->rate_bps > 0) {
192 /*
193 * Higher shift gives better accuracy. Find the largest
194 * shift such that mult fits in 32 bits.
195 */
196 for (shift = 0; shift < 16; shift++) {
197 r->shift = shift;
198 factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
199 mult = div64_u64(factor, r->rate_bps);
200 if (mult > UINT_MAX)
201 break;
202 }
203
204 r->shift = shift - 1;
205 factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
206 r->mult = div64_u64(factor, r->rate_bps);
207 }
208 }
209
210 /* find class in global hash table using given handle */
211 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
212 {
213 struct htb_sched *q = qdisc_priv(sch);
214 struct Qdisc_class_common *clc;
215
216 clc = qdisc_class_find(&q->clhash, handle);
217 if (clc == NULL)
218 return NULL;
219 return container_of(clc, struct htb_class, common);
220 }
221
222 /**
223 * htb_classify - classify a packet into class
224 *
225 * It returns NULL if the packet should be dropped or -1 if the packet
226 * should be passed directly thru. In all other cases leaf class is returned.
227 * We allow direct class selection by classid in priority. The we examine
228 * filters in qdisc and in inner nodes (if higher filter points to the inner
229 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
230 * internal fifo (direct). These packets then go directly thru. If we still
231 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
232 * then finish and return direct queue.
233 */
234 #define HTB_DIRECT ((struct htb_class *)-1L)
235
236 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
237 int *qerr)
238 {
239 struct htb_sched *q = qdisc_priv(sch);
240 struct htb_class *cl;
241 struct tcf_result res;
242 struct tcf_proto *tcf;
243 int result;
244
245 /* allow to select class by setting skb->priority to valid classid;
246 * note that nfmark can be used too by attaching filter fw with no
247 * rules in it
248 */
249 if (skb->priority == sch->handle)
250 return HTB_DIRECT; /* X:0 (direct flow) selected */
251 cl = htb_find(skb->priority, sch);
252 if (cl && cl->level == 0)
253 return cl;
254
255 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
256 tcf = q->filter_list;
257 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
258 #ifdef CONFIG_NET_CLS_ACT
259 switch (result) {
260 case TC_ACT_QUEUED:
261 case TC_ACT_STOLEN:
262 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
263 case TC_ACT_SHOT:
264 return NULL;
265 }
266 #endif
267 cl = (void *)res.class;
268 if (!cl) {
269 if (res.classid == sch->handle)
270 return HTB_DIRECT; /* X:0 (direct flow) */
271 cl = htb_find(res.classid, sch);
272 if (!cl)
273 break; /* filter selected invalid classid */
274 }
275 if (!cl->level)
276 return cl; /* we hit leaf; return it */
277
278 /* we have got inner class; apply inner filter chain */
279 tcf = cl->filter_list;
280 }
281 /* classification failed; try to use default class */
282 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
283 if (!cl || cl->level)
284 return HTB_DIRECT; /* bad default .. this is safe bet */
285 return cl;
286 }
287
288 /**
289 * htb_add_to_id_tree - adds class to the round robin list
290 *
291 * Routine adds class to the list (actually tree) sorted by classid.
292 * Make sure that class is not already on such list for given prio.
293 */
294 static void htb_add_to_id_tree(struct rb_root *root,
295 struct htb_class *cl, int prio)
296 {
297 struct rb_node **p = &root->rb_node, *parent = NULL;
298
299 while (*p) {
300 struct htb_class *c;
301 parent = *p;
302 c = rb_entry(parent, struct htb_class, node[prio]);
303
304 if (cl->common.classid > c->common.classid)
305 p = &parent->rb_right;
306 else
307 p = &parent->rb_left;
308 }
309 rb_link_node(&cl->node[prio], parent, p);
310 rb_insert_color(&cl->node[prio], root);
311 }
312
313 /**
314 * htb_add_to_wait_tree - adds class to the event queue with delay
315 *
316 * The class is added to priority event queue to indicate that class will
317 * change its mode in cl->pq_key microseconds. Make sure that class is not
318 * already in the queue.
319 */
320 static void htb_add_to_wait_tree(struct htb_sched *q,
321 struct htb_class *cl, s64 delay)
322 {
323 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
324
325 cl->pq_key = q->now + delay;
326 if (cl->pq_key == q->now)
327 cl->pq_key++;
328
329 /* update the nearest event cache */
330 if (q->near_ev_cache[cl->level] > cl->pq_key)
331 q->near_ev_cache[cl->level] = cl->pq_key;
332
333 while (*p) {
334 struct htb_class *c;
335 parent = *p;
336 c = rb_entry(parent, struct htb_class, pq_node);
337 if (cl->pq_key >= c->pq_key)
338 p = &parent->rb_right;
339 else
340 p = &parent->rb_left;
341 }
342 rb_link_node(&cl->pq_node, parent, p);
343 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
344 }
345
346 /**
347 * htb_next_rb_node - finds next node in binary tree
348 *
349 * When we are past last key we return NULL.
350 * Average complexity is 2 steps per call.
351 */
352 static inline void htb_next_rb_node(struct rb_node **n)
353 {
354 *n = rb_next(*n);
355 }
356
357 /**
358 * htb_add_class_to_row - add class to its row
359 *
360 * The class is added to row at priorities marked in mask.
361 * It does nothing if mask == 0.
362 */
363 static inline void htb_add_class_to_row(struct htb_sched *q,
364 struct htb_class *cl, int mask)
365 {
366 q->row_mask[cl->level] |= mask;
367 while (mask) {
368 int prio = ffz(~mask);
369 mask &= ~(1 << prio);
370 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
371 }
372 }
373
374 /* If this triggers, it is a bug in this code, but it need not be fatal */
375 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
376 {
377 if (RB_EMPTY_NODE(rb)) {
378 WARN_ON(1);
379 } else {
380 rb_erase(rb, root);
381 RB_CLEAR_NODE(rb);
382 }
383 }
384
385
386 /**
387 * htb_remove_class_from_row - removes class from its row
388 *
389 * The class is removed from row at priorities marked in mask.
390 * It does nothing if mask == 0.
391 */
392 static inline void htb_remove_class_from_row(struct htb_sched *q,
393 struct htb_class *cl, int mask)
394 {
395 int m = 0;
396
397 while (mask) {
398 int prio = ffz(~mask);
399
400 mask &= ~(1 << prio);
401 if (q->ptr[cl->level][prio] == cl->node + prio)
402 htb_next_rb_node(q->ptr[cl->level] + prio);
403
404 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
405 if (!q->row[cl->level][prio].rb_node)
406 m |= 1 << prio;
407 }
408 q->row_mask[cl->level] &= ~m;
409 }
410
411 /**
412 * htb_activate_prios - creates active classe's feed chain
413 *
414 * The class is connected to ancestors and/or appropriate rows
415 * for priorities it is participating on. cl->cmode must be new
416 * (activated) mode. It does nothing if cl->prio_activity == 0.
417 */
418 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
419 {
420 struct htb_class *p = cl->parent;
421 long m, mask = cl->prio_activity;
422
423 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
424 m = mask;
425 while (m) {
426 int prio = ffz(~m);
427 m &= ~(1 << prio);
428
429 if (p->un.inner.feed[prio].rb_node)
430 /* parent already has its feed in use so that
431 * reset bit in mask as parent is already ok
432 */
433 mask &= ~(1 << prio);
434
435 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
436 }
437 p->prio_activity |= mask;
438 cl = p;
439 p = cl->parent;
440
441 }
442 if (cl->cmode == HTB_CAN_SEND && mask)
443 htb_add_class_to_row(q, cl, mask);
444 }
445
446 /**
447 * htb_deactivate_prios - remove class from feed chain
448 *
449 * cl->cmode must represent old mode (before deactivation). It does
450 * nothing if cl->prio_activity == 0. Class is removed from all feed
451 * chains and rows.
452 */
453 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
454 {
455 struct htb_class *p = cl->parent;
456 long m, mask = cl->prio_activity;
457
458 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
459 m = mask;
460 mask = 0;
461 while (m) {
462 int prio = ffz(~m);
463 m &= ~(1 << prio);
464
465 if (p->un.inner.ptr[prio] == cl->node + prio) {
466 /* we are removing child which is pointed to from
467 * parent feed - forget the pointer but remember
468 * classid
469 */
470 p->un.inner.last_ptr_id[prio] = cl->common.classid;
471 p->un.inner.ptr[prio] = NULL;
472 }
473
474 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
475
476 if (!p->un.inner.feed[prio].rb_node)
477 mask |= 1 << prio;
478 }
479
480 p->prio_activity &= ~mask;
481 cl = p;
482 p = cl->parent;
483
484 }
485 if (cl->cmode == HTB_CAN_SEND && mask)
486 htb_remove_class_from_row(q, cl, mask);
487 }
488
489 static inline s64 htb_lowater(const struct htb_class *cl)
490 {
491 if (htb_hysteresis)
492 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
493 else
494 return 0;
495 }
496 static inline s64 htb_hiwater(const struct htb_class *cl)
497 {
498 if (htb_hysteresis)
499 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
500 else
501 return 0;
502 }
503
504
505 /**
506 * htb_class_mode - computes and returns current class mode
507 *
508 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
509 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
510 * from now to time when cl will change its state.
511 * Also it is worth to note that class mode doesn't change simply
512 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
513 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
514 * mode transitions per time unit. The speed gain is about 1/6.
515 */
516 static inline enum htb_cmode
517 htb_class_mode(struct htb_class *cl, s64 *diff)
518 {
519 s64 toks;
520
521 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
522 *diff = -toks;
523 return HTB_CANT_SEND;
524 }
525
526 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
527 return HTB_CAN_SEND;
528
529 *diff = -toks;
530 return HTB_MAY_BORROW;
531 }
532
533 /**
534 * htb_change_class_mode - changes classe's mode
535 *
536 * This should be the only way how to change classe's mode under normal
537 * cirsumstances. Routine will update feed lists linkage, change mode
538 * and add class to the wait event queue if appropriate. New mode should
539 * be different from old one and cl->pq_key has to be valid if changing
540 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
541 */
542 static void
543 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
544 {
545 enum htb_cmode new_mode = htb_class_mode(cl, diff);
546
547 if (new_mode == cl->cmode)
548 return;
549
550 if (cl->prio_activity) { /* not necessary: speed optimization */
551 if (cl->cmode != HTB_CANT_SEND)
552 htb_deactivate_prios(q, cl);
553 cl->cmode = new_mode;
554 if (new_mode != HTB_CANT_SEND)
555 htb_activate_prios(q, cl);
556 } else
557 cl->cmode = new_mode;
558 }
559
560 /**
561 * htb_activate - inserts leaf cl into appropriate active feeds
562 *
563 * Routine learns (new) priority of leaf and activates feed chain
564 * for the prio. It can be called on already active leaf safely.
565 * It also adds leaf into droplist.
566 */
567 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
568 {
569 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
570
571 if (!cl->prio_activity) {
572 cl->prio_activity = 1 << cl->prio;
573 htb_activate_prios(q, cl);
574 list_add_tail(&cl->un.leaf.drop_list,
575 q->drops + cl->prio);
576 }
577 }
578
579 /**
580 * htb_deactivate - remove leaf cl from active feeds
581 *
582 * Make sure that leaf is active. In the other words it can't be called
583 * with non-active leaf. It also removes class from the drop list.
584 */
585 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
586 {
587 WARN_ON(!cl->prio_activity);
588
589 htb_deactivate_prios(q, cl);
590 cl->prio_activity = 0;
591 list_del_init(&cl->un.leaf.drop_list);
592 }
593
594 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
595 {
596 int uninitialized_var(ret);
597 struct htb_sched *q = qdisc_priv(sch);
598 struct htb_class *cl = htb_classify(skb, sch, &ret);
599
600 if (cl == HTB_DIRECT) {
601 /* enqueue to helper queue */
602 if (q->direct_queue.qlen < q->direct_qlen) {
603 __skb_queue_tail(&q->direct_queue, skb);
604 q->direct_pkts++;
605 } else {
606 return qdisc_drop(skb, sch);
607 }
608 #ifdef CONFIG_NET_CLS_ACT
609 } else if (!cl) {
610 if (ret & __NET_XMIT_BYPASS)
611 sch->qstats.drops++;
612 kfree_skb(skb);
613 return ret;
614 #endif
615 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
616 if (net_xmit_drop_count(ret)) {
617 sch->qstats.drops++;
618 cl->qstats.drops++;
619 }
620 return ret;
621 } else {
622 htb_activate(q, cl);
623 }
624
625 sch->q.qlen++;
626 return NET_XMIT_SUCCESS;
627 }
628
629 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
630 {
631 s64 toks = diff + cl->tokens;
632
633 if (toks > cl->buffer)
634 toks = cl->buffer;
635 toks -= (s64) l2t_ns(&cl->rate, bytes);
636 if (toks <= -cl->mbuffer)
637 toks = 1 - cl->mbuffer;
638
639 cl->tokens = toks;
640 }
641
642 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
643 {
644 s64 toks = diff + cl->ctokens;
645
646 if (toks > cl->cbuffer)
647 toks = cl->cbuffer;
648 toks -= (s64) l2t_ns(&cl->ceil, bytes);
649 if (toks <= -cl->mbuffer)
650 toks = 1 - cl->mbuffer;
651
652 cl->ctokens = toks;
653 }
654
655 /**
656 * htb_charge_class - charges amount "bytes" to leaf and ancestors
657 *
658 * Routine assumes that packet "bytes" long was dequeued from leaf cl
659 * borrowing from "level". It accounts bytes to ceil leaky bucket for
660 * leaf and all ancestors and to rate bucket for ancestors at levels
661 * "level" and higher. It also handles possible change of mode resulting
662 * from the update. Note that mode can also increase here (MAY_BORROW to
663 * CAN_SEND) because we can use more precise clock that event queue here.
664 * In such case we remove class from event queue first.
665 */
666 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
667 int level, struct sk_buff *skb)
668 {
669 int bytes = qdisc_pkt_len(skb);
670 enum htb_cmode old_mode;
671 s64 diff;
672
673 while (cl) {
674 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
675 if (cl->level >= level) {
676 if (cl->level == level)
677 cl->xstats.lends++;
678 htb_accnt_tokens(cl, bytes, diff);
679 } else {
680 cl->xstats.borrows++;
681 cl->tokens += diff; /* we moved t_c; update tokens */
682 }
683 htb_accnt_ctokens(cl, bytes, diff);
684 cl->t_c = q->now;
685
686 old_mode = cl->cmode;
687 diff = 0;
688 htb_change_class_mode(q, cl, &diff);
689 if (old_mode != cl->cmode) {
690 if (old_mode != HTB_CAN_SEND)
691 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
692 if (cl->cmode != HTB_CAN_SEND)
693 htb_add_to_wait_tree(q, cl, diff);
694 }
695
696 /* update basic stats except for leaves which are already updated */
697 if (cl->level)
698 bstats_update(&cl->bstats, skb);
699
700 cl = cl->parent;
701 }
702 }
703
704 /**
705 * htb_do_events - make mode changes to classes at the level
706 *
707 * Scans event queue for pending events and applies them. Returns time of
708 * next pending event (0 for no event in pq, q->now for too many events).
709 * Note: Applied are events whose have cl->pq_key <= q->now.
710 */
711 static psched_time_t htb_do_events(struct htb_sched *q, int level,
712 unsigned long start)
713 {
714 /* don't run for longer than 2 jiffies; 2 is used instead of
715 * 1 to simplify things when jiffy is going to be incremented
716 * too soon
717 */
718 unsigned long stop_at = start + 2;
719 while (time_before(jiffies, stop_at)) {
720 struct htb_class *cl;
721 s64 diff;
722 struct rb_node *p = rb_first(&q->wait_pq[level]);
723
724 if (!p)
725 return 0;
726
727 cl = rb_entry(p, struct htb_class, pq_node);
728 if (cl->pq_key > q->now)
729 return cl->pq_key;
730
731 htb_safe_rb_erase(p, q->wait_pq + level);
732 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
733 htb_change_class_mode(q, cl, &diff);
734 if (cl->cmode != HTB_CAN_SEND)
735 htb_add_to_wait_tree(q, cl, diff);
736 }
737
738 /* too much load - let's continue after a break for scheduling */
739 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
740 pr_warning("htb: too many events!\n");
741 q->warned |= HTB_WARN_TOOMANYEVENTS;
742 }
743
744 return q->now;
745 }
746
747 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
748 * is no such one exists.
749 */
750 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
751 u32 id)
752 {
753 struct rb_node *r = NULL;
754 while (n) {
755 struct htb_class *cl =
756 rb_entry(n, struct htb_class, node[prio]);
757
758 if (id > cl->common.classid) {
759 n = n->rb_right;
760 } else if (id < cl->common.classid) {
761 r = n;
762 n = n->rb_left;
763 } else {
764 return n;
765 }
766 }
767 return r;
768 }
769
770 /**
771 * htb_lookup_leaf - returns next leaf class in DRR order
772 *
773 * Find leaf where current feed pointers points to.
774 */
775 static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
776 struct rb_node **pptr, u32 * pid)
777 {
778 int i;
779 struct {
780 struct rb_node *root;
781 struct rb_node **pptr;
782 u32 *pid;
783 } stk[TC_HTB_MAXDEPTH], *sp = stk;
784
785 BUG_ON(!tree->rb_node);
786 sp->root = tree->rb_node;
787 sp->pptr = pptr;
788 sp->pid = pid;
789
790 for (i = 0; i < 65535; i++) {
791 if (!*sp->pptr && *sp->pid) {
792 /* ptr was invalidated but id is valid - try to recover
793 * the original or next ptr
794 */
795 *sp->pptr =
796 htb_id_find_next_upper(prio, sp->root, *sp->pid);
797 }
798 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
799 * can become out of date quickly
800 */
801 if (!*sp->pptr) { /* we are at right end; rewind & go up */
802 *sp->pptr = sp->root;
803 while ((*sp->pptr)->rb_left)
804 *sp->pptr = (*sp->pptr)->rb_left;
805 if (sp > stk) {
806 sp--;
807 if (!*sp->pptr) {
808 WARN_ON(1);
809 return NULL;
810 }
811 htb_next_rb_node(sp->pptr);
812 }
813 } else {
814 struct htb_class *cl;
815 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
816 if (!cl->level)
817 return cl;
818 (++sp)->root = cl->un.inner.feed[prio].rb_node;
819 sp->pptr = cl->un.inner.ptr + prio;
820 sp->pid = cl->un.inner.last_ptr_id + prio;
821 }
822 }
823 WARN_ON(1);
824 return NULL;
825 }
826
827 /* dequeues packet at given priority and level; call only if
828 * you are sure that there is active class at prio/level
829 */
830 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
831 int level)
832 {
833 struct sk_buff *skb = NULL;
834 struct htb_class *cl, *start;
835 /* look initial class up in the row */
836 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
837 q->ptr[level] + prio,
838 q->last_ptr_id[level] + prio);
839
840 do {
841 next:
842 if (unlikely(!cl))
843 return NULL;
844
845 /* class can be empty - it is unlikely but can be true if leaf
846 * qdisc drops packets in enqueue routine or if someone used
847 * graft operation on the leaf since last dequeue;
848 * simply deactivate and skip such class
849 */
850 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
851 struct htb_class *next;
852 htb_deactivate(q, cl);
853
854 /* row/level might become empty */
855 if ((q->row_mask[level] & (1 << prio)) == 0)
856 return NULL;
857
858 next = htb_lookup_leaf(q->row[level] + prio,
859 prio, q->ptr[level] + prio,
860 q->last_ptr_id[level] + prio);
861
862 if (cl == start) /* fix start if we just deleted it */
863 start = next;
864 cl = next;
865 goto next;
866 }
867
868 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
869 if (likely(skb != NULL))
870 break;
871
872 qdisc_warn_nonwc("htb", cl->un.leaf.q);
873 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
874 ptr[0]) + prio);
875 cl = htb_lookup_leaf(q->row[level] + prio, prio,
876 q->ptr[level] + prio,
877 q->last_ptr_id[level] + prio);
878
879 } while (cl != start);
880
881 if (likely(skb != NULL)) {
882 bstats_update(&cl->bstats, skb);
883 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
884 if (cl->un.leaf.deficit[level] < 0) {
885 cl->un.leaf.deficit[level] += cl->quantum;
886 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
887 ptr[0]) + prio);
888 }
889 /* this used to be after charge_class but this constelation
890 * gives us slightly better performance
891 */
892 if (!cl->un.leaf.q->q.qlen)
893 htb_deactivate(q, cl);
894 htb_charge_class(q, cl, level, skb);
895 }
896 return skb;
897 }
898
899 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
900 {
901 struct sk_buff *skb;
902 struct htb_sched *q = qdisc_priv(sch);
903 int level;
904 psched_time_t next_event;
905 unsigned long start_at;
906
907 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
908 skb = __skb_dequeue(&q->direct_queue);
909 if (skb != NULL) {
910 ok:
911 qdisc_bstats_update(sch, skb);
912 qdisc_unthrottled(sch);
913 sch->q.qlen--;
914 return skb;
915 }
916
917 if (!sch->q.qlen)
918 goto fin;
919 q->now = ktime_to_ns(ktime_get());
920 start_at = jiffies;
921
922 next_event = q->now + 5LLU * NSEC_PER_SEC;
923
924 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
925 /* common case optimization - skip event handler quickly */
926 int m;
927 psched_time_t event;
928
929 if (q->now >= q->near_ev_cache[level]) {
930 event = htb_do_events(q, level, start_at);
931 if (!event)
932 event = q->now + NSEC_PER_SEC;
933 q->near_ev_cache[level] = event;
934 } else
935 event = q->near_ev_cache[level];
936
937 if (next_event > event)
938 next_event = event;
939
940 m = ~q->row_mask[level];
941 while (m != (int)(-1)) {
942 int prio = ffz(m);
943
944 m |= 1 << prio;
945 skb = htb_dequeue_tree(q, prio, level);
946 if (likely(skb != NULL))
947 goto ok;
948 }
949 }
950 sch->qstats.overlimits++;
951 if (likely(next_event > q->now)) {
952 if (!test_bit(__QDISC_STATE_DEACTIVATED,
953 &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
954 ktime_t time = ns_to_ktime(next_event);
955 qdisc_throttled(q->watchdog.qdisc);
956 hrtimer_start(&q->watchdog.timer, time,
957 HRTIMER_MODE_ABS);
958 }
959 } else {
960 schedule_work(&q->work);
961 }
962 fin:
963 return skb;
964 }
965
966 /* try to drop from each class (by prio) until one succeed */
967 static unsigned int htb_drop(struct Qdisc *sch)
968 {
969 struct htb_sched *q = qdisc_priv(sch);
970 int prio;
971
972 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
973 struct list_head *p;
974 list_for_each(p, q->drops + prio) {
975 struct htb_class *cl = list_entry(p, struct htb_class,
976 un.leaf.drop_list);
977 unsigned int len;
978 if (cl->un.leaf.q->ops->drop &&
979 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
980 sch->q.qlen--;
981 if (!cl->un.leaf.q->q.qlen)
982 htb_deactivate(q, cl);
983 return len;
984 }
985 }
986 }
987 return 0;
988 }
989
990 /* reset all classes */
991 /* always caled under BH & queue lock */
992 static void htb_reset(struct Qdisc *sch)
993 {
994 struct htb_sched *q = qdisc_priv(sch);
995 struct htb_class *cl;
996 struct hlist_node *n;
997 unsigned int i;
998
999 for (i = 0; i < q->clhash.hashsize; i++) {
1000 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1001 if (cl->level)
1002 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1003 else {
1004 if (cl->un.leaf.q)
1005 qdisc_reset(cl->un.leaf.q);
1006 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1007 }
1008 cl->prio_activity = 0;
1009 cl->cmode = HTB_CAN_SEND;
1010
1011 }
1012 }
1013 qdisc_watchdog_cancel(&q->watchdog);
1014 __skb_queue_purge(&q->direct_queue);
1015 sch->q.qlen = 0;
1016 memset(q->row, 0, sizeof(q->row));
1017 memset(q->row_mask, 0, sizeof(q->row_mask));
1018 memset(q->wait_pq, 0, sizeof(q->wait_pq));
1019 memset(q->ptr, 0, sizeof(q->ptr));
1020 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1021 INIT_LIST_HEAD(q->drops + i);
1022 }
1023
1024 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1025 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1026 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
1027 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1028 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1029 };
1030
1031 static void htb_work_func(struct work_struct *work)
1032 {
1033 struct htb_sched *q = container_of(work, struct htb_sched, work);
1034 struct Qdisc *sch = q->watchdog.qdisc;
1035
1036 __netif_schedule(qdisc_root(sch));
1037 }
1038
1039 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1040 {
1041 struct htb_sched *q = qdisc_priv(sch);
1042 struct nlattr *tb[TCA_HTB_INIT + 1];
1043 struct tc_htb_glob *gopt;
1044 int err;
1045 int i;
1046
1047 if (!opt)
1048 return -EINVAL;
1049
1050 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
1051 if (err < 0)
1052 return err;
1053
1054 if (tb[TCA_HTB_INIT] == NULL) {
1055 pr_err("HTB: hey probably you have bad tc tool ?\n");
1056 return -EINVAL;
1057 }
1058 gopt = nla_data(tb[TCA_HTB_INIT]);
1059 if (gopt->version != HTB_VER >> 16) {
1060 pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
1061 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1062 return -EINVAL;
1063 }
1064
1065 err = qdisc_class_hash_init(&q->clhash);
1066 if (err < 0)
1067 return err;
1068 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1069 INIT_LIST_HEAD(q->drops + i);
1070
1071 qdisc_watchdog_init(&q->watchdog, sch);
1072 INIT_WORK(&q->work, htb_work_func);
1073 skb_queue_head_init(&q->direct_queue);
1074
1075 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1076 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1077 q->direct_qlen = 2;
1078
1079 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1080 q->rate2quantum = 1;
1081 q->defcls = gopt->defcls;
1082
1083 return 0;
1084 }
1085
1086 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1087 {
1088 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1089 struct htb_sched *q = qdisc_priv(sch);
1090 struct nlattr *nest;
1091 struct tc_htb_glob gopt;
1092
1093 spin_lock_bh(root_lock);
1094
1095 gopt.direct_pkts = q->direct_pkts;
1096 gopt.version = HTB_VER;
1097 gopt.rate2quantum = q->rate2quantum;
1098 gopt.defcls = q->defcls;
1099 gopt.debug = 0;
1100
1101 nest = nla_nest_start(skb, TCA_OPTIONS);
1102 if (nest == NULL)
1103 goto nla_put_failure;
1104 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt))
1105 goto nla_put_failure;
1106 nla_nest_end(skb, nest);
1107
1108 spin_unlock_bh(root_lock);
1109 return skb->len;
1110
1111 nla_put_failure:
1112 spin_unlock_bh(root_lock);
1113 nla_nest_cancel(skb, nest);
1114 return -1;
1115 }
1116
1117 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1118 struct sk_buff *skb, struct tcmsg *tcm)
1119 {
1120 struct htb_class *cl = (struct htb_class *)arg;
1121 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1122 struct nlattr *nest;
1123 struct tc_htb_opt opt;
1124
1125 spin_lock_bh(root_lock);
1126 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1127 tcm->tcm_handle = cl->common.classid;
1128 if (!cl->level && cl->un.leaf.q)
1129 tcm->tcm_info = cl->un.leaf.q->handle;
1130
1131 nest = nla_nest_start(skb, TCA_OPTIONS);
1132 if (nest == NULL)
1133 goto nla_put_failure;
1134
1135 memset(&opt, 0, sizeof(opt));
1136
1137 opt.rate.rate = cl->rate.rate_bps >> 3;
1138 opt.buffer = cl->buffer;
1139 opt.ceil.rate = cl->ceil.rate_bps >> 3;
1140 opt.cbuffer = cl->cbuffer;
1141 opt.quantum = cl->quantum;
1142 opt.prio = cl->prio;
1143 opt.level = cl->level;
1144 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1145 goto nla_put_failure;
1146
1147 nla_nest_end(skb, nest);
1148 spin_unlock_bh(root_lock);
1149 return skb->len;
1150
1151 nla_put_failure:
1152 spin_unlock_bh(root_lock);
1153 nla_nest_cancel(skb, nest);
1154 return -1;
1155 }
1156
1157 static int
1158 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1159 {
1160 struct htb_class *cl = (struct htb_class *)arg;
1161
1162 if (!cl->level && cl->un.leaf.q)
1163 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1164 cl->xstats.tokens = cl->tokens;
1165 cl->xstats.ctokens = cl->ctokens;
1166
1167 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1168 gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1169 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1170 return -1;
1171
1172 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1173 }
1174
1175 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1176 struct Qdisc **old)
1177 {
1178 struct htb_class *cl = (struct htb_class *)arg;
1179
1180 if (cl->level)
1181 return -EINVAL;
1182 if (new == NULL &&
1183 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1184 cl->common.classid)) == NULL)
1185 return -ENOBUFS;
1186
1187 sch_tree_lock(sch);
1188 *old = cl->un.leaf.q;
1189 cl->un.leaf.q = new;
1190 if (*old != NULL) {
1191 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1192 qdisc_reset(*old);
1193 }
1194 sch_tree_unlock(sch);
1195 return 0;
1196 }
1197
1198 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1199 {
1200 struct htb_class *cl = (struct htb_class *)arg;
1201 return !cl->level ? cl->un.leaf.q : NULL;
1202 }
1203
1204 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1205 {
1206 struct htb_class *cl = (struct htb_class *)arg;
1207
1208 if (cl->un.leaf.q->q.qlen == 0)
1209 htb_deactivate(qdisc_priv(sch), cl);
1210 }
1211
1212 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1213 {
1214 struct htb_class *cl = htb_find(classid, sch);
1215 if (cl)
1216 cl->refcnt++;
1217 return (unsigned long)cl;
1218 }
1219
1220 static inline int htb_parent_last_child(struct htb_class *cl)
1221 {
1222 if (!cl->parent)
1223 /* the root class */
1224 return 0;
1225 if (cl->parent->children > 1)
1226 /* not the last child */
1227 return 0;
1228 return 1;
1229 }
1230
1231 static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1232 struct Qdisc *new_q)
1233 {
1234 struct htb_class *parent = cl->parent;
1235
1236 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1237
1238 if (parent->cmode != HTB_CAN_SEND)
1239 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1240
1241 parent->level = 0;
1242 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1243 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1244 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1245 parent->tokens = parent->buffer;
1246 parent->ctokens = parent->cbuffer;
1247 parent->t_c = psched_get_time();
1248 parent->cmode = HTB_CAN_SEND;
1249 }
1250
1251 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1252 {
1253 if (!cl->level) {
1254 WARN_ON(!cl->un.leaf.q);
1255 qdisc_destroy(cl->un.leaf.q);
1256 }
1257 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1258 tcf_destroy_chain(&cl->filter_list);
1259 kfree(cl);
1260 }
1261
1262 static void htb_destroy(struct Qdisc *sch)
1263 {
1264 struct htb_sched *q = qdisc_priv(sch);
1265 struct hlist_node *n, *next;
1266 struct htb_class *cl;
1267 unsigned int i;
1268
1269 cancel_work_sync(&q->work);
1270 qdisc_watchdog_cancel(&q->watchdog);
1271 /* This line used to be after htb_destroy_class call below
1272 * and surprisingly it worked in 2.4. But it must precede it
1273 * because filter need its target class alive to be able to call
1274 * unbind_filter on it (without Oops).
1275 */
1276 tcf_destroy_chain(&q->filter_list);
1277
1278 for (i = 0; i < q->clhash.hashsize; i++) {
1279 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1280 tcf_destroy_chain(&cl->filter_list);
1281 }
1282 for (i = 0; i < q->clhash.hashsize; i++) {
1283 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1284 common.hnode)
1285 htb_destroy_class(sch, cl);
1286 }
1287 qdisc_class_hash_destroy(&q->clhash);
1288 __skb_queue_purge(&q->direct_queue);
1289 }
1290
1291 static int htb_delete(struct Qdisc *sch, unsigned long arg)
1292 {
1293 struct htb_sched *q = qdisc_priv(sch);
1294 struct htb_class *cl = (struct htb_class *)arg;
1295 unsigned int qlen;
1296 struct Qdisc *new_q = NULL;
1297 int last_child = 0;
1298
1299 // TODO: why don't allow to delete subtree ? references ? does
1300 // tc subsys quarantee us that in htb_destroy it holds no class
1301 // refs so that we can remove children safely there ?
1302 if (cl->children || cl->filter_cnt)
1303 return -EBUSY;
1304
1305 if (!cl->level && htb_parent_last_child(cl)) {
1306 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1307 cl->parent->common.classid);
1308 last_child = 1;
1309 }
1310
1311 sch_tree_lock(sch);
1312
1313 if (!cl->level) {
1314 qlen = cl->un.leaf.q->q.qlen;
1315 qdisc_reset(cl->un.leaf.q);
1316 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1317 }
1318
1319 /* delete from hash and active; remainder in destroy_class */
1320 qdisc_class_hash_remove(&q->clhash, &cl->common);
1321 if (cl->parent)
1322 cl->parent->children--;
1323
1324 if (cl->prio_activity)
1325 htb_deactivate(q, cl);
1326
1327 if (cl->cmode != HTB_CAN_SEND)
1328 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1329
1330 if (last_child)
1331 htb_parent_to_leaf(q, cl, new_q);
1332
1333 BUG_ON(--cl->refcnt == 0);
1334 /*
1335 * This shouldn't happen: we "hold" one cops->get() when called
1336 * from tc_ctl_tclass; the destroy method is done from cops->put().
1337 */
1338
1339 sch_tree_unlock(sch);
1340 return 0;
1341 }
1342
1343 static void htb_put(struct Qdisc *sch, unsigned long arg)
1344 {
1345 struct htb_class *cl = (struct htb_class *)arg;
1346
1347 if (--cl->refcnt == 0)
1348 htb_destroy_class(sch, cl);
1349 }
1350
1351 static int htb_change_class(struct Qdisc *sch, u32 classid,
1352 u32 parentid, struct nlattr **tca,
1353 unsigned long *arg)
1354 {
1355 int err = -EINVAL;
1356 struct htb_sched *q = qdisc_priv(sch);
1357 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1358 struct nlattr *opt = tca[TCA_OPTIONS];
1359 struct nlattr *tb[__TCA_HTB_MAX];
1360 struct tc_htb_opt *hopt;
1361
1362 /* extract all subattrs from opt attr */
1363 if (!opt)
1364 goto failure;
1365
1366 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1367 if (err < 0)
1368 goto failure;
1369
1370 err = -EINVAL;
1371 if (tb[TCA_HTB_PARMS] == NULL)
1372 goto failure;
1373
1374 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1375
1376 hopt = nla_data(tb[TCA_HTB_PARMS]);
1377 if (!hopt->rate.rate || !hopt->ceil.rate)
1378 goto failure;
1379
1380 if (!cl) { /* new class */
1381 struct Qdisc *new_q;
1382 int prio;
1383 struct {
1384 struct nlattr nla;
1385 struct gnet_estimator opt;
1386 } est = {
1387 .nla = {
1388 .nla_len = nla_attr_size(sizeof(est.opt)),
1389 .nla_type = TCA_RATE,
1390 },
1391 .opt = {
1392 /* 4s interval, 16s averaging constant */
1393 .interval = 2,
1394 .ewma_log = 2,
1395 },
1396 };
1397
1398 /* check for valid classid */
1399 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1400 htb_find(classid, sch))
1401 goto failure;
1402
1403 /* check maximal depth */
1404 if (parent && parent->parent && parent->parent->level < 2) {
1405 pr_err("htb: tree is too deep\n");
1406 goto failure;
1407 }
1408 err = -ENOBUFS;
1409 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1410 if (!cl)
1411 goto failure;
1412
1413 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1414 qdisc_root_sleeping_lock(sch),
1415 tca[TCA_RATE] ? : &est.nla);
1416 if (err) {
1417 kfree(cl);
1418 goto failure;
1419 }
1420
1421 cl->refcnt = 1;
1422 cl->children = 0;
1423 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1424 RB_CLEAR_NODE(&cl->pq_node);
1425
1426 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1427 RB_CLEAR_NODE(&cl->node[prio]);
1428
1429 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1430 * so that can't be used inside of sch_tree_lock
1431 * -- thanks to Karlis Peisenieks
1432 */
1433 new_q = qdisc_create_dflt(sch->dev_queue,
1434 &pfifo_qdisc_ops, classid);
1435 sch_tree_lock(sch);
1436 if (parent && !parent->level) {
1437 unsigned int qlen = parent->un.leaf.q->q.qlen;
1438
1439 /* turn parent into inner node */
1440 qdisc_reset(parent->un.leaf.q);
1441 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1442 qdisc_destroy(parent->un.leaf.q);
1443 if (parent->prio_activity)
1444 htb_deactivate(q, parent);
1445
1446 /* remove from evt list because of level change */
1447 if (parent->cmode != HTB_CAN_SEND) {
1448 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1449 parent->cmode = HTB_CAN_SEND;
1450 }
1451 parent->level = (parent->parent ? parent->parent->level
1452 : TC_HTB_MAXDEPTH) - 1;
1453 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1454 }
1455 /* leaf (we) needs elementary qdisc */
1456 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1457
1458 cl->common.classid = classid;
1459 cl->parent = parent;
1460
1461 /* set class to be in HTB_CAN_SEND state */
1462 cl->tokens = hopt->buffer;
1463 cl->ctokens = hopt->cbuffer;
1464 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
1465 cl->t_c = psched_get_time();
1466 cl->cmode = HTB_CAN_SEND;
1467
1468 /* attach to the hash list and parent's family */
1469 qdisc_class_hash_insert(&q->clhash, &cl->common);
1470 if (parent)
1471 parent->children++;
1472 } else {
1473 if (tca[TCA_RATE]) {
1474 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1475 qdisc_root_sleeping_lock(sch),
1476 tca[TCA_RATE]);
1477 if (err)
1478 return err;
1479 }
1480 sch_tree_lock(sch);
1481 }
1482
1483 /* it used to be a nasty bug here, we have to check that node
1484 * is really leaf before changing cl->un.leaf !
1485 */
1486 if (!cl->level) {
1487 cl->quantum = hopt->rate.rate / q->rate2quantum;
1488 if (!hopt->quantum && cl->quantum < 1000) {
1489 pr_warning(
1490 "HTB: quantum of class %X is small. Consider r2q change.\n",
1491 cl->common.classid);
1492 cl->quantum = 1000;
1493 }
1494 if (!hopt->quantum && cl->quantum > 200000) {
1495 pr_warning(
1496 "HTB: quantum of class %X is big. Consider r2q change.\n",
1497 cl->common.classid);
1498 cl->quantum = 200000;
1499 }
1500 if (hopt->quantum)
1501 cl->quantum = hopt->quantum;
1502 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1503 cl->prio = TC_HTB_NUMPRIO - 1;
1504 }
1505
1506 cl->buffer = hopt->buffer;
1507 cl->cbuffer = hopt->cbuffer;
1508
1509 cl->rate.rate_bps = (u64)hopt->rate.rate << 3;
1510 cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3;
1511
1512 htb_precompute_ratedata(&cl->rate);
1513 htb_precompute_ratedata(&cl->ceil);
1514
1515 cl->buffer = hopt->buffer << PSCHED_SHIFT;
1516 cl->cbuffer = hopt->buffer << PSCHED_SHIFT;
1517
1518 sch_tree_unlock(sch);
1519
1520 qdisc_class_hash_grow(sch, &q->clhash);
1521
1522 *arg = (unsigned long)cl;
1523 return 0;
1524
1525 failure:
1526 return err;
1527 }
1528
1529 static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1530 {
1531 struct htb_sched *q = qdisc_priv(sch);
1532 struct htb_class *cl = (struct htb_class *)arg;
1533 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1534
1535 return fl;
1536 }
1537
1538 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1539 u32 classid)
1540 {
1541 struct htb_class *cl = htb_find(classid, sch);
1542
1543 /*if (cl && !cl->level) return 0;
1544 * The line above used to be there to prevent attaching filters to
1545 * leaves. But at least tc_index filter uses this just to get class
1546 * for other reasons so that we have to allow for it.
1547 * ----
1548 * 19.6.2002 As Werner explained it is ok - bind filter is just
1549 * another way to "lock" the class - unlike "get" this lock can
1550 * be broken by class during destroy IIUC.
1551 */
1552 if (cl)
1553 cl->filter_cnt++;
1554 return (unsigned long)cl;
1555 }
1556
1557 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1558 {
1559 struct htb_class *cl = (struct htb_class *)arg;
1560
1561 if (cl)
1562 cl->filter_cnt--;
1563 }
1564
1565 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1566 {
1567 struct htb_sched *q = qdisc_priv(sch);
1568 struct htb_class *cl;
1569 struct hlist_node *n;
1570 unsigned int i;
1571
1572 if (arg->stop)
1573 return;
1574
1575 for (i = 0; i < q->clhash.hashsize; i++) {
1576 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1577 if (arg->count < arg->skip) {
1578 arg->count++;
1579 continue;
1580 }
1581 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1582 arg->stop = 1;
1583 return;
1584 }
1585 arg->count++;
1586 }
1587 }
1588 }
1589
1590 static const struct Qdisc_class_ops htb_class_ops = {
1591 .graft = htb_graft,
1592 .leaf = htb_leaf,
1593 .qlen_notify = htb_qlen_notify,
1594 .get = htb_get,
1595 .put = htb_put,
1596 .change = htb_change_class,
1597 .delete = htb_delete,
1598 .walk = htb_walk,
1599 .tcf_chain = htb_find_tcf,
1600 .bind_tcf = htb_bind_filter,
1601 .unbind_tcf = htb_unbind_filter,
1602 .dump = htb_dump_class,
1603 .dump_stats = htb_dump_class_stats,
1604 };
1605
1606 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1607 .cl_ops = &htb_class_ops,
1608 .id = "htb",
1609 .priv_size = sizeof(struct htb_sched),
1610 .enqueue = htb_enqueue,
1611 .dequeue = htb_dequeue,
1612 .peek = qdisc_peek_dequeued,
1613 .drop = htb_drop,
1614 .init = htb_init,
1615 .reset = htb_reset,
1616 .destroy = htb_destroy,
1617 .dump = htb_dump,
1618 .owner = THIS_MODULE,
1619 };
1620
1621 static int __init htb_module_init(void)
1622 {
1623 return register_qdisc(&htb_qdisc_ops);
1624 }
1625 static void __exit htb_module_exit(void)
1626 {
1627 unregister_qdisc(&htb_qdisc_ops);
1628 }
1629
1630 module_init(htb_module_init)
1631 module_exit(htb_module_exit)
1632 MODULE_LICENSE("GPL");
This page took 0.09599 seconds and 6 git commands to generate.