2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <net/netlink.h>
21 #include <net/pkt_sched.h>
24 /* Class-Based Queueing (CBQ) algorithm.
25 =======================================
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
28 Management Models for Packet Networks",
29 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
31 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
33 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
39 -----------------------------------------------------------------------
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
46 --- The WRR algorithm is different. Our version looks more
47 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
72 struct cbq_sched_data
;
76 struct Qdisc_class_common common
;
77 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
80 unsigned char priority
; /* class priority */
81 unsigned char priority2
; /* priority to be used after overlimit */
82 unsigned char ewma_log
; /* time constant for idle time calculation */
83 unsigned char ovl_strategy
;
84 #ifdef CONFIG_NET_CLS_ACT
90 /* Link-sharing scheduler parameters */
91 long maxidle
; /* Class parameters: see below. */
95 struct qdisc_rate_table
*R_tab
;
97 /* Overlimit strategy parameters */
98 void (*overlimit
)(struct cbq_class
*cl
);
99 psched_tdiff_t penalty
;
101 /* General scheduler (WRR) parameters */
103 long quantum
; /* Allotment per WRR round */
104 long weight
; /* Relative allotment: see below */
106 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
107 struct cbq_class
*split
; /* Ptr to split node */
108 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
109 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
110 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
112 struct cbq_class
*sibling
; /* Sibling chain */
113 struct cbq_class
*children
; /* Pointer to children chain */
115 struct Qdisc
*q
; /* Elementary queueing discipline */
119 unsigned char cpriority
; /* Effective priority */
120 unsigned char delayed
;
121 unsigned char level
; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
126 psched_time_t last
; /* Last end of service */
127 psched_time_t undertime
;
129 long deficit
; /* Saved deficit for WRR */
130 psched_time_t penalized
;
131 struct gnet_stats_basic_packed bstats
;
132 struct gnet_stats_queue qstats
;
133 struct gnet_stats_rate_est64 rate_est
;
134 struct tc_cbq_xstats xstats
;
136 struct tcf_proto
*filter_list
;
141 struct cbq_class
*defaults
[TC_PRIO_MAX
+ 1];
144 struct cbq_sched_data
{
145 struct Qdisc_class_hash clhash
; /* Hash table of all classes */
146 int nclasses
[TC_CBQ_MAXPRIO
+ 1];
147 unsigned int quanta
[TC_CBQ_MAXPRIO
+ 1];
149 struct cbq_class link
;
151 unsigned int activemask
;
152 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+ 1]; /* List of all classes
155 #ifdef CONFIG_NET_CLS_ACT
156 struct cbq_class
*rx_class
;
158 struct cbq_class
*tx_class
;
159 struct cbq_class
*tx_borrowed
;
161 psched_time_t now
; /* Cached timestamp */
164 struct hrtimer delay_timer
;
165 struct qdisc_watchdog watchdog
; /* Watchdog timer,
169 psched_tdiff_t wd_expires
;
175 #define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
177 static inline struct cbq_class
*
178 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
180 struct Qdisc_class_common
*clc
;
182 clc
= qdisc_class_find(&q
->clhash
, classid
);
185 return container_of(clc
, struct cbq_class
, common
);
188 #ifdef CONFIG_NET_CLS_ACT
190 static struct cbq_class
*
191 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
193 struct cbq_class
*cl
;
195 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
) {
196 struct cbq_class
*new = cl
->defaults
[TC_PRIO_BESTEFFORT
];
198 if (new != NULL
&& new != this)
206 /* Classify packet. The procedure is pretty complicated, but
207 * it allows us to combine link sharing and priority scheduling
210 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 * so that it resolves to split nodes. Then packets are classified
212 * by logical priority, or a more specific classifier may be attached
216 static struct cbq_class
*
217 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
219 struct cbq_sched_data
*q
= qdisc_priv(sch
);
220 struct cbq_class
*head
= &q
->link
;
221 struct cbq_class
**defmap
;
222 struct cbq_class
*cl
= NULL
;
223 u32 prio
= skb
->priority
;
224 struct tcf_result res
;
227 * Step 1. If skb->priority points to one of our classes, use it.
229 if (TC_H_MAJ(prio
^ sch
->handle
) == 0 &&
230 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
233 *qerr
= NET_XMIT_SUCCESS
| __NET_XMIT_BYPASS
;
236 defmap
= head
->defaults
;
239 * Step 2+n. Apply classifier.
241 if (!head
->filter_list
||
242 (result
= tc_classify_compat(skb
, head
->filter_list
, &res
)) < 0)
245 cl
= (void *)res
.class;
247 if (TC_H_MAJ(res
.classid
))
248 cl
= cbq_class_lookup(q
, res
.classid
);
249 else if ((cl
= defmap
[res
.classid
& TC_PRIO_MAX
]) == NULL
)
250 cl
= defmap
[TC_PRIO_BESTEFFORT
];
255 if (cl
->level
>= head
->level
)
257 #ifdef CONFIG_NET_CLS_ACT
261 *qerr
= NET_XMIT_SUCCESS
| __NET_XMIT_STOLEN
;
264 case TC_ACT_RECLASSIFY
:
265 return cbq_reclassify(skb
, cl
);
272 * Step 3+n. If classifier selected a link sharing class,
273 * apply agency specific classifier.
274 * Repeat this procdure until we hit a leaf node.
283 * Step 4. No success...
285 if (TC_H_MAJ(prio
) == 0 &&
286 !(cl
= head
->defaults
[prio
& TC_PRIO_MAX
]) &&
287 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
294 * A packet has just been enqueued on the empty class.
295 * cbq_activate_class adds it to the tail of active class list
296 * of its priority band.
299 static inline void cbq_activate_class(struct cbq_class
*cl
)
301 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
302 int prio
= cl
->cpriority
;
303 struct cbq_class
*cl_tail
;
305 cl_tail
= q
->active
[prio
];
306 q
->active
[prio
] = cl
;
308 if (cl_tail
!= NULL
) {
309 cl
->next_alive
= cl_tail
->next_alive
;
310 cl_tail
->next_alive
= cl
;
313 q
->activemask
|= (1<<prio
);
318 * Unlink class from active chain.
319 * Note that this same procedure is done directly in cbq_dequeue*
320 * during round-robin procedure.
323 static void cbq_deactivate_class(struct cbq_class
*this)
325 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
326 int prio
= this->cpriority
;
327 struct cbq_class
*cl
;
328 struct cbq_class
*cl_prev
= q
->active
[prio
];
331 cl
= cl_prev
->next_alive
;
333 cl_prev
->next_alive
= cl
->next_alive
;
334 cl
->next_alive
= NULL
;
336 if (cl
== q
->active
[prio
]) {
337 q
->active
[prio
] = cl_prev
;
338 if (cl
== q
->active
[prio
]) {
339 q
->active
[prio
] = NULL
;
340 q
->activemask
&= ~(1<<prio
);
346 } while ((cl_prev
= cl
) != q
->active
[prio
]);
350 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
352 int toplevel
= q
->toplevel
;
354 if (toplevel
> cl
->level
&& !(qdisc_is_throttled(cl
->q
))) {
355 psched_time_t now
= psched_get_time();
358 if (cl
->undertime
< now
) {
359 q
->toplevel
= cl
->level
;
362 } while ((cl
= cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
367 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
369 struct cbq_sched_data
*q
= qdisc_priv(sch
);
370 int uninitialized_var(ret
);
371 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
373 #ifdef CONFIG_NET_CLS_ACT
377 if (ret
& __NET_XMIT_BYPASS
)
383 #ifdef CONFIG_NET_CLS_ACT
384 cl
->q
->__parent
= sch
;
386 ret
= qdisc_enqueue(skb
, cl
->q
);
387 if (ret
== NET_XMIT_SUCCESS
) {
389 cbq_mark_toplevel(q
, cl
);
391 cbq_activate_class(cl
);
395 if (net_xmit_drop_count(ret
)) {
397 cbq_mark_toplevel(q
, cl
);
403 /* Overlimit actions */
405 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
407 static void cbq_ovl_classic(struct cbq_class
*cl
)
409 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
410 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
413 delay
+= cl
->offtime
;
416 * Class goes to sleep, so that it will have no
417 * chance to work avgidle. Let's forgive it 8)
419 * BTW cbq-2.0 has a crap in this
420 * place, apparently they forgot to shift it by cl->ewma_log.
423 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
424 if (cl
->avgidle
< cl
->minidle
)
425 cl
->avgidle
= cl
->minidle
;
428 cl
->undertime
= q
->now
+ delay
;
430 cl
->xstats
.overactions
++;
433 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
434 q
->wd_expires
= delay
;
436 /* Dirty work! We must schedule wakeups based on
437 * real available rate, rather than leaf rate,
438 * which may be tiny (even zero).
440 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
442 psched_tdiff_t base_delay
= q
->wd_expires
;
444 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
445 delay
= b
->undertime
- q
->now
;
446 if (delay
< base_delay
) {
453 q
->wd_expires
= base_delay
;
457 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
461 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
463 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
464 struct cbq_class
*this = cl
;
467 if (cl
->level
> q
->toplevel
) {
471 } while ((cl
= cl
->borrow
) != NULL
);
478 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
480 static void cbq_ovl_delay(struct cbq_class
*cl
)
482 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
483 psched_tdiff_t delay
= cl
->undertime
- q
->now
;
485 if (test_bit(__QDISC_STATE_DEACTIVATED
,
486 &qdisc_root_sleeping(cl
->qdisc
)->state
))
490 psched_time_t sched
= q
->now
;
493 delay
+= cl
->offtime
;
495 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
496 if (cl
->avgidle
< cl
->minidle
)
497 cl
->avgidle
= cl
->minidle
;
498 cl
->undertime
= q
->now
+ delay
;
501 sched
+= delay
+ cl
->penalty
;
502 cl
->penalized
= sched
;
503 cl
->cpriority
= TC_CBQ_MAXPRIO
;
504 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
506 expires
= ns_to_ktime(PSCHED_TICKS2NS(sched
));
507 if (hrtimer_try_to_cancel(&q
->delay_timer
) &&
508 ktime_to_ns(ktime_sub(
509 hrtimer_get_expires(&q
->delay_timer
),
511 hrtimer_set_expires(&q
->delay_timer
, expires
);
512 hrtimer_restart(&q
->delay_timer
);
514 cl
->xstats
.overactions
++;
519 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
520 q
->wd_expires
= delay
;
523 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
525 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
527 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
529 cl
->penalized
= q
->now
+ cl
->penalty
;
531 if (cl
->cpriority
!= cl
->priority2
) {
532 cl
->cpriority
= cl
->priority2
;
533 q
->pmask
|= (1<<cl
->cpriority
);
534 cl
->xstats
.overactions
++;
539 /* TC_CBQ_OVL_DROP: penalize class by dropping */
541 static void cbq_ovl_drop(struct cbq_class
*cl
)
543 if (cl
->q
->ops
->drop
)
544 if (cl
->q
->ops
->drop(cl
->q
))
546 cl
->xstats
.overactions
++;
550 static psched_tdiff_t
cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
,
553 struct cbq_class
*cl
;
554 struct cbq_class
*cl_prev
= q
->active
[prio
];
555 psched_time_t sched
= now
;
561 cl
= cl_prev
->next_alive
;
562 if (now
- cl
->penalized
> 0) {
563 cl_prev
->next_alive
= cl
->next_alive
;
564 cl
->next_alive
= NULL
;
565 cl
->cpriority
= cl
->priority
;
567 cbq_activate_class(cl
);
569 if (cl
== q
->active
[prio
]) {
570 q
->active
[prio
] = cl_prev
;
571 if (cl
== q
->active
[prio
]) {
572 q
->active
[prio
] = NULL
;
577 cl
= cl_prev
->next_alive
;
578 } else if (sched
- cl
->penalized
> 0)
579 sched
= cl
->penalized
;
580 } while ((cl_prev
= cl
) != q
->active
[prio
]);
585 static enum hrtimer_restart
cbq_undelay(struct hrtimer
*timer
)
587 struct cbq_sched_data
*q
= container_of(timer
, struct cbq_sched_data
,
589 struct Qdisc
*sch
= q
->watchdog
.qdisc
;
591 psched_tdiff_t delay
= 0;
594 now
= psched_get_time();
600 int prio
= ffz(~pmask
);
605 tmp
= cbq_undelay_prio(q
, prio
, now
);
608 if (tmp
< delay
|| delay
== 0)
616 time
= ktime_set(0, 0);
617 time
= ktime_add_ns(time
, PSCHED_TICKS2NS(now
+ delay
));
618 hrtimer_start(&q
->delay_timer
, time
, HRTIMER_MODE_ABS
);
621 qdisc_unthrottled(sch
);
622 __netif_schedule(qdisc_root(sch
));
623 return HRTIMER_NORESTART
;
626 #ifdef CONFIG_NET_CLS_ACT
627 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
629 struct Qdisc
*sch
= child
->__parent
;
630 struct cbq_sched_data
*q
= qdisc_priv(sch
);
631 struct cbq_class
*cl
= q
->rx_class
;
635 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
638 cbq_mark_toplevel(q
, cl
);
641 cl
->q
->__parent
= sch
;
643 ret
= qdisc_enqueue(skb
, cl
->q
);
644 if (ret
== NET_XMIT_SUCCESS
) {
647 cbq_activate_class(cl
);
650 if (net_xmit_drop_count(ret
))
661 * It is mission critical procedure.
663 * We "regenerate" toplevel cutoff, if transmitting class
664 * has backlog and it is not regulated. It is not part of
665 * original CBQ description, but looks more reasonable.
666 * Probably, it is wrong. This question needs further investigation.
670 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
671 struct cbq_class
*borrowed
)
673 if (cl
&& q
->toplevel
>= borrowed
->level
) {
674 if (cl
->q
->q
.qlen
> 1) {
676 if (borrowed
->undertime
== PSCHED_PASTPERFECT
) {
677 q
->toplevel
= borrowed
->level
;
680 } while ((borrowed
= borrowed
->borrow
) != NULL
);
683 /* It is not necessary now. Uncommenting it
684 will save CPU cycles, but decrease fairness.
686 q
->toplevel
= TC_CBQ_MAXLEVEL
;
692 cbq_update(struct cbq_sched_data
*q
)
694 struct cbq_class
*this = q
->tx_class
;
695 struct cbq_class
*cl
= this;
700 /* Time integrator. We calculate EOS time
701 * by adding expected packet transmission time.
703 now
= q
->now
+ L2T(&q
->link
, len
);
705 for ( ; cl
; cl
= cl
->share
) {
706 long avgidle
= cl
->avgidle
;
709 cl
->bstats
.packets
++;
710 cl
->bstats
.bytes
+= len
;
713 * (now - last) is total time between packet right edges.
714 * (last_pktlen/rate) is "virtual" busy time, so that
716 * idle = (now - last) - last_pktlen/rate
719 idle
= now
- cl
->last
;
720 if ((unsigned long)idle
> 128*1024*1024) {
721 avgidle
= cl
->maxidle
;
723 idle
-= L2T(cl
, len
);
725 /* true_avgidle := (1-W)*true_avgidle + W*idle,
726 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
727 * cl->avgidle == true_avgidle/W,
730 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
734 /* Overlimit or at-limit */
736 if (avgidle
< cl
->minidle
)
737 avgidle
= cl
->minidle
;
739 cl
->avgidle
= avgidle
;
741 /* Calculate expected time, when this class
742 * will be allowed to send.
743 * It will occur, when:
744 * (1-W)*true_avgidle + W*delay = 0, i.e.
745 * idle = (1/W - 1)*(-true_avgidle)
747 * idle = (1 - W)*(-cl->avgidle);
749 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
753 * To maintain the rate allocated to the class,
754 * we add to undertime virtual clock,
755 * necessary to complete transmitted packet.
756 * (len/phys_bandwidth has been already passed
757 * to the moment of cbq_update)
760 idle
-= L2T(&q
->link
, len
);
761 idle
+= L2T(cl
, len
);
763 cl
->undertime
= now
+ idle
;
767 cl
->undertime
= PSCHED_PASTPERFECT
;
768 if (avgidle
> cl
->maxidle
)
769 cl
->avgidle
= cl
->maxidle
;
771 cl
->avgidle
= avgidle
;
773 if ((s64
)(now
- cl
->last
) > 0)
777 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
780 static inline struct cbq_class
*
781 cbq_under_limit(struct cbq_class
*cl
)
783 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
784 struct cbq_class
*this_cl
= cl
;
786 if (cl
->tparent
== NULL
)
789 if (cl
->undertime
== PSCHED_PASTPERFECT
|| q
->now
>= cl
->undertime
) {
795 /* It is very suspicious place. Now overlimit
796 * action is generated for not bounded classes
797 * only if link is completely congested.
798 * Though it is in agree with ancestor-only paradigm,
799 * it looks very stupid. Particularly,
800 * it means that this chunk of code will either
801 * never be called or result in strong amplification
802 * of burstiness. Dangerous, silly, and, however,
803 * no another solution exists.
807 this_cl
->qstats
.overlimits
++;
808 this_cl
->overlimit(this_cl
);
811 if (cl
->level
> q
->toplevel
)
813 } while (cl
->undertime
!= PSCHED_PASTPERFECT
&& q
->now
< cl
->undertime
);
819 static inline struct sk_buff
*
820 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
822 struct cbq_sched_data
*q
= qdisc_priv(sch
);
823 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
827 cl_tail
= cl_prev
= q
->active
[prio
];
828 cl
= cl_prev
->next_alive
;
835 struct cbq_class
*borrow
= cl
;
838 (borrow
= cbq_under_limit(cl
)) == NULL
)
841 if (cl
->deficit
<= 0) {
842 /* Class exhausted its allotment per
843 * this round. Switch to the next one.
846 cl
->deficit
+= cl
->quantum
;
850 skb
= cl
->q
->dequeue(cl
->q
);
852 /* Class did not give us any skb :-(
853 * It could occur even if cl->q->q.qlen != 0
854 * f.e. if cl->q == "tbf"
859 cl
->deficit
-= qdisc_pkt_len(skb
);
861 q
->tx_borrowed
= borrow
;
863 #ifndef CBQ_XSTATS_BORROWS_BYTES
864 borrow
->xstats
.borrows
++;
865 cl
->xstats
.borrows
++;
867 borrow
->xstats
.borrows
+= qdisc_pkt_len(skb
);
868 cl
->xstats
.borrows
+= qdisc_pkt_len(skb
);
871 q
->tx_len
= qdisc_pkt_len(skb
);
873 if (cl
->deficit
<= 0) {
874 q
->active
[prio
] = cl
;
876 cl
->deficit
+= cl
->quantum
;
881 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
882 /* Class is empty or penalized.
883 * Unlink it from active chain.
885 cl_prev
->next_alive
= cl
->next_alive
;
886 cl
->next_alive
= NULL
;
888 /* Did cl_tail point to it? */
893 /* Was it the last class in this band? */
896 q
->active
[prio
] = NULL
;
897 q
->activemask
&= ~(1<<prio
);
899 cbq_activate_class(cl
);
903 q
->active
[prio
] = cl_tail
;
906 cbq_activate_class(cl
);
914 } while (cl_prev
!= cl_tail
);
917 q
->active
[prio
] = cl_prev
;
922 static inline struct sk_buff
*
923 cbq_dequeue_1(struct Qdisc
*sch
)
925 struct cbq_sched_data
*q
= qdisc_priv(sch
);
927 unsigned int activemask
;
929 activemask
= q
->activemask
& 0xFF;
931 int prio
= ffz(~activemask
);
932 activemask
&= ~(1<<prio
);
933 skb
= cbq_dequeue_prio(sch
, prio
);
940 static struct sk_buff
*
941 cbq_dequeue(struct Qdisc
*sch
)
944 struct cbq_sched_data
*q
= qdisc_priv(sch
);
947 now
= psched_get_time();
957 skb
= cbq_dequeue_1(sch
);
959 qdisc_bstats_update(sch
, skb
);
961 qdisc_unthrottled(sch
);
965 /* All the classes are overlimit.
967 * It is possible, if:
969 * 1. Scheduler is empty.
970 * 2. Toplevel cutoff inhibited borrowing.
971 * 3. Root class is overlimit.
973 * Reset 2d and 3d conditions and retry.
975 * Note, that NS and cbq-2.0 are buggy, peeking
976 * an arbitrary class is appropriate for ancestor-only
977 * sharing, but not for toplevel algorithm.
979 * Our version is better, but slower, because it requires
980 * two passes, but it is unavoidable with top-level sharing.
983 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
984 q
->link
.undertime
== PSCHED_PASTPERFECT
)
987 q
->toplevel
= TC_CBQ_MAXLEVEL
;
988 q
->link
.undertime
= PSCHED_PASTPERFECT
;
991 /* No packets in scheduler or nobody wants to give them to us :-(
992 * Sigh... start watchdog timer in the last case.
996 sch
->qstats
.overlimits
++;
998 qdisc_watchdog_schedule(&q
->watchdog
,
999 now
+ q
->wd_expires
);
1004 /* CBQ class maintanance routines */
1006 static void cbq_adjust_levels(struct cbq_class
*this)
1013 struct cbq_class
*cl
;
1015 cl
= this->children
;
1018 if (cl
->level
> level
)
1020 } while ((cl
= cl
->sibling
) != this->children
);
1022 this->level
= level
+ 1;
1023 } while ((this = this->tparent
) != NULL
);
1026 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1028 struct cbq_class
*cl
;
1031 if (q
->quanta
[prio
] == 0)
1034 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1035 hlist_for_each_entry(cl
, &q
->clhash
.hash
[h
], common
.hnode
) {
1036 /* BUGGGG... Beware! This expression suffer of
1037 * arithmetic overflows!
1039 if (cl
->priority
== prio
) {
1040 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1043 if (cl
->quantum
<= 0 ||
1044 cl
->quantum
> 32*qdisc_dev(cl
->qdisc
)->mtu
) {
1045 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1046 cl
->common
.classid
, cl
->quantum
);
1047 cl
->quantum
= qdisc_dev(cl
->qdisc
)->mtu
/2 + 1;
1053 static void cbq_sync_defmap(struct cbq_class
*cl
)
1055 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1056 struct cbq_class
*split
= cl
->split
;
1063 for (i
= 0; i
<= TC_PRIO_MAX
; i
++) {
1064 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
& (1<<i
)))
1065 split
->defaults
[i
] = NULL
;
1068 for (i
= 0; i
<= TC_PRIO_MAX
; i
++) {
1069 int level
= split
->level
;
1071 if (split
->defaults
[i
])
1074 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1075 struct cbq_class
*c
;
1077 hlist_for_each_entry(c
, &q
->clhash
.hash
[h
],
1079 if (c
->split
== split
&& c
->level
< level
&&
1080 c
->defmap
& (1<<i
)) {
1081 split
->defaults
[i
] = c
;
1089 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1091 struct cbq_class
*split
= NULL
;
1097 splitid
= split
->common
.classid
;
1100 if (split
== NULL
|| split
->common
.classid
!= splitid
) {
1101 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1102 if (split
->common
.classid
== splitid
)
1109 if (cl
->split
!= split
) {
1111 cbq_sync_defmap(cl
);
1113 cl
->defmap
= def
& mask
;
1115 cl
->defmap
= (cl
->defmap
& ~mask
) | (def
& mask
);
1117 cbq_sync_defmap(cl
);
1120 static void cbq_unlink_class(struct cbq_class
*this)
1122 struct cbq_class
*cl
, **clp
;
1123 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1125 qdisc_class_hash_remove(&q
->clhash
, &this->common
);
1127 if (this->tparent
) {
1128 clp
= &this->sibling
;
1136 } while ((cl
= *clp
) != this->sibling
);
1138 if (this->tparent
->children
== this) {
1139 this->tparent
->children
= this->sibling
;
1140 if (this->sibling
== this)
1141 this->tparent
->children
= NULL
;
1144 WARN_ON(this->sibling
!= this);
1148 static void cbq_link_class(struct cbq_class
*this)
1150 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1151 struct cbq_class
*parent
= this->tparent
;
1153 this->sibling
= this;
1154 qdisc_class_hash_insert(&q
->clhash
, &this->common
);
1159 if (parent
->children
== NULL
) {
1160 parent
->children
= this;
1162 this->sibling
= parent
->children
->sibling
;
1163 parent
->children
->sibling
= this;
1167 static unsigned int cbq_drop(struct Qdisc
*sch
)
1169 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1170 struct cbq_class
*cl
, *cl_head
;
1174 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1175 cl_head
= q
->active
[prio
];
1181 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1184 cbq_deactivate_class(cl
);
1187 } while ((cl
= cl
->next_alive
) != cl_head
);
1193 cbq_reset(struct Qdisc
*sch
)
1195 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1196 struct cbq_class
*cl
;
1203 q
->tx_borrowed
= NULL
;
1204 qdisc_watchdog_cancel(&q
->watchdog
);
1205 hrtimer_cancel(&q
->delay_timer
);
1206 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1207 q
->now
= psched_get_time();
1209 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1210 q
->active
[prio
] = NULL
;
1212 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1213 hlist_for_each_entry(cl
, &q
->clhash
.hash
[h
], common
.hnode
) {
1216 cl
->next_alive
= NULL
;
1217 cl
->undertime
= PSCHED_PASTPERFECT
;
1218 cl
->avgidle
= cl
->maxidle
;
1219 cl
->deficit
= cl
->quantum
;
1220 cl
->cpriority
= cl
->priority
;
1227 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1229 if (lss
->change
& TCF_CBQ_LSS_FLAGS
) {
1230 cl
->share
= (lss
->flags
& TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1231 cl
->borrow
= (lss
->flags
& TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1233 if (lss
->change
& TCF_CBQ_LSS_EWMA
)
1234 cl
->ewma_log
= lss
->ewma_log
;
1235 if (lss
->change
& TCF_CBQ_LSS_AVPKT
)
1236 cl
->avpkt
= lss
->avpkt
;
1237 if (lss
->change
& TCF_CBQ_LSS_MINIDLE
)
1238 cl
->minidle
= -(long)lss
->minidle
;
1239 if (lss
->change
& TCF_CBQ_LSS_MAXIDLE
) {
1240 cl
->maxidle
= lss
->maxidle
;
1241 cl
->avgidle
= lss
->maxidle
;
1243 if (lss
->change
& TCF_CBQ_LSS_OFFTIME
)
1244 cl
->offtime
= lss
->offtime
;
1248 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1250 q
->nclasses
[cl
->priority
]--;
1251 q
->quanta
[cl
->priority
] -= cl
->weight
;
1252 cbq_normalize_quanta(q
, cl
->priority
);
1255 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1257 q
->nclasses
[cl
->priority
]++;
1258 q
->quanta
[cl
->priority
] += cl
->weight
;
1259 cbq_normalize_quanta(q
, cl
->priority
);
1262 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1264 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1267 cl
->allot
= wrr
->allot
;
1269 cl
->weight
= wrr
->weight
;
1270 if (wrr
->priority
) {
1271 cl
->priority
= wrr
->priority
- 1;
1272 cl
->cpriority
= cl
->priority
;
1273 if (cl
->priority
>= cl
->priority2
)
1274 cl
->priority2
= TC_CBQ_MAXPRIO
- 1;
1281 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1283 switch (ovl
->strategy
) {
1284 case TC_CBQ_OVL_CLASSIC
:
1285 cl
->overlimit
= cbq_ovl_classic
;
1287 case TC_CBQ_OVL_DELAY
:
1288 cl
->overlimit
= cbq_ovl_delay
;
1290 case TC_CBQ_OVL_LOWPRIO
:
1291 if (ovl
->priority2
- 1 >= TC_CBQ_MAXPRIO
||
1292 ovl
->priority2
- 1 <= cl
->priority
)
1294 cl
->priority2
= ovl
->priority2
- 1;
1295 cl
->overlimit
= cbq_ovl_lowprio
;
1297 case TC_CBQ_OVL_DROP
:
1298 cl
->overlimit
= cbq_ovl_drop
;
1300 case TC_CBQ_OVL_RCLASSIC
:
1301 cl
->overlimit
= cbq_ovl_rclassic
;
1306 cl
->penalty
= ovl
->penalty
;
1310 #ifdef CONFIG_NET_CLS_ACT
1311 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1313 cl
->police
= p
->police
;
1315 if (cl
->q
->handle
) {
1316 if (p
->police
== TC_POLICE_RECLASSIFY
)
1317 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1319 cl
->q
->reshape_fail
= NULL
;
1325 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1327 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1331 static const struct nla_policy cbq_policy
[TCA_CBQ_MAX
+ 1] = {
1332 [TCA_CBQ_LSSOPT
] = { .len
= sizeof(struct tc_cbq_lssopt
) },
1333 [TCA_CBQ_WRROPT
] = { .len
= sizeof(struct tc_cbq_wrropt
) },
1334 [TCA_CBQ_FOPT
] = { .len
= sizeof(struct tc_cbq_fopt
) },
1335 [TCA_CBQ_OVL_STRATEGY
] = { .len
= sizeof(struct tc_cbq_ovl
) },
1336 [TCA_CBQ_RATE
] = { .len
= sizeof(struct tc_ratespec
) },
1337 [TCA_CBQ_RTAB
] = { .type
= NLA_BINARY
, .len
= TC_RTAB_SIZE
},
1338 [TCA_CBQ_POLICE
] = { .len
= sizeof(struct tc_cbq_police
) },
1341 static int cbq_init(struct Qdisc
*sch
, struct nlattr
*opt
)
1343 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1344 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1345 struct tc_ratespec
*r
;
1348 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1352 if (tb
[TCA_CBQ_RTAB
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
)
1355 r
= nla_data(tb
[TCA_CBQ_RATE
]);
1357 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
])) == NULL
)
1360 err
= qdisc_class_hash_init(&q
->clhash
);
1365 q
->link
.sibling
= &q
->link
;
1366 q
->link
.common
.classid
= sch
->handle
;
1367 q
->link
.qdisc
= sch
;
1368 q
->link
.q
= qdisc_create_dflt(sch
->dev_queue
, &pfifo_qdisc_ops
,
1371 q
->link
.q
= &noop_qdisc
;
1373 q
->link
.priority
= TC_CBQ_MAXPRIO
- 1;
1374 q
->link
.priority2
= TC_CBQ_MAXPRIO
- 1;
1375 q
->link
.cpriority
= TC_CBQ_MAXPRIO
- 1;
1376 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1377 q
->link
.overlimit
= cbq_ovl_classic
;
1378 q
->link
.allot
= psched_mtu(qdisc_dev(sch
));
1379 q
->link
.quantum
= q
->link
.allot
;
1380 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1382 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1383 q
->link
.avpkt
= q
->link
.allot
/2;
1384 q
->link
.minidle
= -0x7FFFFFFF;
1386 qdisc_watchdog_init(&q
->watchdog
, sch
);
1387 hrtimer_init(&q
->delay_timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
1388 q
->delay_timer
.function
= cbq_undelay
;
1389 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1390 q
->now
= psched_get_time();
1392 cbq_link_class(&q
->link
);
1394 if (tb
[TCA_CBQ_LSSOPT
])
1395 cbq_set_lss(&q
->link
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1397 cbq_addprio(q
, &q
->link
);
1401 qdisc_put_rtab(q
->link
.R_tab
);
1405 static int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1407 unsigned char *b
= skb_tail_pointer(skb
);
1409 if (nla_put(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
))
1410 goto nla_put_failure
;
1418 static int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1420 unsigned char *b
= skb_tail_pointer(skb
);
1421 struct tc_cbq_lssopt opt
;
1424 if (cl
->borrow
== NULL
)
1425 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1426 if (cl
->share
== NULL
)
1427 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1428 opt
.ewma_log
= cl
->ewma_log
;
1429 opt
.level
= cl
->level
;
1430 opt
.avpkt
= cl
->avpkt
;
1431 opt
.maxidle
= cl
->maxidle
;
1432 opt
.minidle
= (u32
)(-cl
->minidle
);
1433 opt
.offtime
= cl
->offtime
;
1435 if (nla_put(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
))
1436 goto nla_put_failure
;
1444 static int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1446 unsigned char *b
= skb_tail_pointer(skb
);
1447 struct tc_cbq_wrropt opt
;
1449 memset(&opt
, 0, sizeof(opt
));
1451 opt
.allot
= cl
->allot
;
1452 opt
.priority
= cl
->priority
+ 1;
1453 opt
.cpriority
= cl
->cpriority
+ 1;
1454 opt
.weight
= cl
->weight
;
1455 if (nla_put(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
))
1456 goto nla_put_failure
;
1464 static int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1466 unsigned char *b
= skb_tail_pointer(skb
);
1467 struct tc_cbq_ovl opt
;
1469 opt
.strategy
= cl
->ovl_strategy
;
1470 opt
.priority2
= cl
->priority2
+ 1;
1472 opt
.penalty
= cl
->penalty
;
1473 if (nla_put(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
))
1474 goto nla_put_failure
;
1482 static int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1484 unsigned char *b
= skb_tail_pointer(skb
);
1485 struct tc_cbq_fopt opt
;
1487 if (cl
->split
|| cl
->defmap
) {
1488 opt
.split
= cl
->split
? cl
->split
->common
.classid
: 0;
1489 opt
.defmap
= cl
->defmap
;
1491 if (nla_put(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
))
1492 goto nla_put_failure
;
1501 #ifdef CONFIG_NET_CLS_ACT
1502 static int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1504 unsigned char *b
= skb_tail_pointer(skb
);
1505 struct tc_cbq_police opt
;
1508 opt
.police
= cl
->police
;
1511 if (nla_put(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
))
1512 goto nla_put_failure
;
1522 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1524 if (cbq_dump_lss(skb
, cl
) < 0 ||
1525 cbq_dump_rate(skb
, cl
) < 0 ||
1526 cbq_dump_wrr(skb
, cl
) < 0 ||
1527 cbq_dump_ovl(skb
, cl
) < 0 ||
1528 #ifdef CONFIG_NET_CLS_ACT
1529 cbq_dump_police(skb
, cl
) < 0 ||
1531 cbq_dump_fopt(skb
, cl
) < 0)
1536 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1538 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1539 struct nlattr
*nest
;
1541 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1543 goto nla_put_failure
;
1544 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1545 goto nla_put_failure
;
1546 return nla_nest_end(skb
, nest
);
1549 nla_nest_cancel(skb
, nest
);
1554 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1556 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1558 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1559 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1563 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1564 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1566 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1567 struct nlattr
*nest
;
1570 tcm
->tcm_parent
= cl
->tparent
->common
.classid
;
1572 tcm
->tcm_parent
= TC_H_ROOT
;
1573 tcm
->tcm_handle
= cl
->common
.classid
;
1574 tcm
->tcm_info
= cl
->q
->handle
;
1576 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
1578 goto nla_put_failure
;
1579 if (cbq_dump_attr(skb
, cl
) < 0)
1580 goto nla_put_failure
;
1581 return nla_nest_end(skb
, nest
);
1584 nla_nest_cancel(skb
, nest
);
1589 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1590 struct gnet_dump
*d
)
1592 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1593 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1595 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1596 cl
->xstats
.avgidle
= cl
->avgidle
;
1597 cl
->xstats
.undertime
= 0;
1599 if (cl
->undertime
!= PSCHED_PASTPERFECT
)
1600 cl
->xstats
.undertime
= cl
->undertime
- q
->now
;
1602 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1603 gnet_stats_copy_rate_est(d
, &cl
->bstats
, &cl
->rate_est
) < 0 ||
1604 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1607 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1610 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1613 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1616 new = qdisc_create_dflt(sch
->dev_queue
,
1617 &pfifo_qdisc_ops
, cl
->common
.classid
);
1621 #ifdef CONFIG_NET_CLS_ACT
1622 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1623 new->reshape_fail
= cbq_reshape_fail
;
1629 qdisc_tree_decrease_qlen(*old
, (*old
)->q
.qlen
);
1631 sch_tree_unlock(sch
);
1636 static struct Qdisc
*cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1638 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1643 static void cbq_qlen_notify(struct Qdisc
*sch
, unsigned long arg
)
1645 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1647 if (cl
->q
->q
.qlen
== 0)
1648 cbq_deactivate_class(cl
);
1651 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1653 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1654 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1658 return (unsigned long)cl
;
1663 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1665 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1667 WARN_ON(cl
->filters
);
1669 tcf_destroy_chain(&cl
->filter_list
);
1670 qdisc_destroy(cl
->q
);
1671 qdisc_put_rtab(cl
->R_tab
);
1672 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1677 static void cbq_destroy(struct Qdisc
*sch
)
1679 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1680 struct hlist_node
*next
;
1681 struct cbq_class
*cl
;
1684 #ifdef CONFIG_NET_CLS_ACT
1688 * Filters must be destroyed first because we don't destroy the
1689 * classes from root to leafs which means that filters can still
1690 * be bound to classes which have been destroyed already. --TGR '04
1692 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1693 hlist_for_each_entry(cl
, &q
->clhash
.hash
[h
], common
.hnode
)
1694 tcf_destroy_chain(&cl
->filter_list
);
1696 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
1697 hlist_for_each_entry_safe(cl
, next
, &q
->clhash
.hash
[h
],
1699 cbq_destroy_class(sch
, cl
);
1701 qdisc_class_hash_destroy(&q
->clhash
);
1704 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1706 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1708 if (--cl
->refcnt
== 0) {
1709 #ifdef CONFIG_NET_CLS_ACT
1710 spinlock_t
*root_lock
= qdisc_root_sleeping_lock(sch
);
1711 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1713 spin_lock_bh(root_lock
);
1714 if (q
->rx_class
== cl
)
1716 spin_unlock_bh(root_lock
);
1719 cbq_destroy_class(sch
, cl
);
1724 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct nlattr
**tca
,
1728 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1729 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1730 struct nlattr
*opt
= tca
[TCA_OPTIONS
];
1731 struct nlattr
*tb
[TCA_CBQ_MAX
+ 1];
1732 struct cbq_class
*parent
;
1733 struct qdisc_rate_table
*rtab
= NULL
;
1738 err
= nla_parse_nested(tb
, TCA_CBQ_MAX
, opt
, cbq_policy
);
1746 cl
->tparent
->common
.classid
!= parentid
)
1748 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1752 if (tb
[TCA_CBQ_RATE
]) {
1753 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]),
1759 if (tca
[TCA_RATE
]) {
1760 err
= gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1761 qdisc_root_sleeping_lock(sch
),
1764 qdisc_put_rtab(rtab
);
1769 /* Change class parameters */
1772 if (cl
->next_alive
!= NULL
)
1773 cbq_deactivate_class(cl
);
1776 qdisc_put_rtab(cl
->R_tab
);
1780 if (tb
[TCA_CBQ_LSSOPT
])
1781 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1783 if (tb
[TCA_CBQ_WRROPT
]) {
1785 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1788 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1789 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1791 #ifdef CONFIG_NET_CLS_ACT
1792 if (tb
[TCA_CBQ_POLICE
])
1793 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1796 if (tb
[TCA_CBQ_FOPT
])
1797 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1800 cbq_activate_class(cl
);
1802 sch_tree_unlock(sch
);
1807 if (parentid
== TC_H_ROOT
)
1810 if (tb
[TCA_CBQ_WRROPT
] == NULL
|| tb
[TCA_CBQ_RATE
] == NULL
||
1811 tb
[TCA_CBQ_LSSOPT
] == NULL
)
1814 rtab
= qdisc_get_rtab(nla_data(tb
[TCA_CBQ_RATE
]), tb
[TCA_CBQ_RTAB
]);
1820 if (TC_H_MAJ(classid
^ sch
->handle
) ||
1821 cbq_class_lookup(q
, classid
))
1825 classid
= TC_H_MAKE(sch
->handle
, 0x8000);
1827 for (i
= 0; i
< 0x8000; i
++) {
1828 if (++q
->hgenerator
>= 0x8000)
1830 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1836 classid
= classid
|q
->hgenerator
;
1841 parent
= cbq_class_lookup(q
, parentid
);
1848 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1852 if (tca
[TCA_RATE
]) {
1853 err
= gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1854 qdisc_root_sleeping_lock(sch
),
1865 cl
->q
= qdisc_create_dflt(sch
->dev_queue
, &pfifo_qdisc_ops
, classid
);
1867 cl
->q
= &noop_qdisc
;
1868 cl
->common
.classid
= classid
;
1869 cl
->tparent
= parent
;
1871 cl
->allot
= parent
->allot
;
1872 cl
->quantum
= cl
->allot
;
1873 cl
->weight
= cl
->R_tab
->rate
.rate
;
1877 cl
->borrow
= cl
->tparent
;
1878 if (cl
->tparent
!= &q
->link
)
1879 cl
->share
= cl
->tparent
;
1880 cbq_adjust_levels(parent
);
1881 cl
->minidle
= -0x7FFFFFFF;
1882 cbq_set_lss(cl
, nla_data(tb
[TCA_CBQ_LSSOPT
]));
1883 cbq_set_wrr(cl
, nla_data(tb
[TCA_CBQ_WRROPT
]));
1884 if (cl
->ewma_log
== 0)
1885 cl
->ewma_log
= q
->link
.ewma_log
;
1886 if (cl
->maxidle
== 0)
1887 cl
->maxidle
= q
->link
.maxidle
;
1889 cl
->avpkt
= q
->link
.avpkt
;
1890 cl
->overlimit
= cbq_ovl_classic
;
1891 if (tb
[TCA_CBQ_OVL_STRATEGY
])
1892 cbq_set_overlimit(cl
, nla_data(tb
[TCA_CBQ_OVL_STRATEGY
]));
1893 #ifdef CONFIG_NET_CLS_ACT
1894 if (tb
[TCA_CBQ_POLICE
])
1895 cbq_set_police(cl
, nla_data(tb
[TCA_CBQ_POLICE
]));
1897 if (tb
[TCA_CBQ_FOPT
])
1898 cbq_set_fopt(cl
, nla_data(tb
[TCA_CBQ_FOPT
]));
1899 sch_tree_unlock(sch
);
1901 qdisc_class_hash_grow(sch
, &q
->clhash
);
1903 *arg
= (unsigned long)cl
;
1907 qdisc_put_rtab(rtab
);
1911 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1913 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1914 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1917 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1922 qlen
= cl
->q
->q
.qlen
;
1924 qdisc_tree_decrease_qlen(cl
->q
, qlen
);
1927 cbq_deactivate_class(cl
);
1929 if (q
->tx_borrowed
== cl
)
1930 q
->tx_borrowed
= q
->tx_class
;
1931 if (q
->tx_class
== cl
) {
1933 q
->tx_borrowed
= NULL
;
1935 #ifdef CONFIG_NET_CLS_ACT
1936 if (q
->rx_class
== cl
)
1940 cbq_unlink_class(cl
);
1941 cbq_adjust_levels(cl
->tparent
);
1943 cbq_sync_defmap(cl
);
1946 sch_tree_unlock(sch
);
1948 BUG_ON(--cl
->refcnt
== 0);
1950 * This shouldn't happen: we "hold" one cops->get() when called
1951 * from tc_ctl_tclass; the destroy method is done from cops->put().
1957 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
1959 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1960 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1965 return &cl
->filter_list
;
1968 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
1971 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1972 struct cbq_class
*p
= (struct cbq_class
*)parent
;
1973 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1976 if (p
&& p
->level
<= cl
->level
)
1979 return (unsigned long)cl
;
1984 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
1986 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1991 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
1993 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1994 struct cbq_class
*cl
;
2000 for (h
= 0; h
< q
->clhash
.hashsize
; h
++) {
2001 hlist_for_each_entry(cl
, &q
->clhash
.hash
[h
], common
.hnode
) {
2002 if (arg
->count
< arg
->skip
) {
2006 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2015 static const struct Qdisc_class_ops cbq_class_ops
= {
2018 .qlen_notify
= cbq_qlen_notify
,
2021 .change
= cbq_change_class
,
2022 .delete = cbq_delete
,
2024 .tcf_chain
= cbq_find_tcf
,
2025 .bind_tcf
= cbq_bind_filter
,
2026 .unbind_tcf
= cbq_unbind_filter
,
2027 .dump
= cbq_dump_class
,
2028 .dump_stats
= cbq_dump_class_stats
,
2031 static struct Qdisc_ops cbq_qdisc_ops __read_mostly
= {
2033 .cl_ops
= &cbq_class_ops
,
2035 .priv_size
= sizeof(struct cbq_sched_data
),
2036 .enqueue
= cbq_enqueue
,
2037 .dequeue
= cbq_dequeue
,
2038 .peek
= qdisc_peek_dequeued
,
2042 .destroy
= cbq_destroy
,
2045 .dump_stats
= cbq_dump_stats
,
2046 .owner
= THIS_MODULE
,
2049 static int __init
cbq_module_init(void)
2051 return register_qdisc(&cbq_qdisc_ops
);
2053 static void __exit
cbq_module_exit(void)
2055 unregister_qdisc(&cbq_qdisc_ops
);
2057 module_init(cbq_module_init
)
2058 module_exit(cbq_module_exit
)
2059 MODULE_LICENSE("GPL");