Merge branch 'omap-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[deliverable/linux.git] / net / sched / sch_cbq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
1da177e4 13#include <linux/module.h>
5a0e3ad6 14#include <linux/slab.h>
1da177e4
LT
15#include <linux/types.h>
16#include <linux/kernel.h>
1da177e4 17#include <linux/string.h>
1da177e4 18#include <linux/errno.h>
1da177e4 19#include <linux/skbuff.h>
0ba48053 20#include <net/netlink.h>
1da177e4
LT
21#include <net/pkt_sched.h>
22
23
24/* Class-Based Queueing (CBQ) algorithm.
25 =======================================
26
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
10297b99 28 Management Models for Packet Networks",
1da177e4
LT
29 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30
10297b99 31 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
1da177e4 32
10297b99 33 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
1da177e4
LT
34 Parameters", 1996
35
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
38
39 -----------------------------------------------------------------------
40
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
45
46 --- The WRR algorithm is different. Our version looks more
10297b99
YH
47 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
1da177e4
LT
53
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
58
59
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
64
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
71
72struct cbq_sched_data;
73
74
75struct cbq_class
76{
d77fea2e 77 struct Qdisc_class_common common;
1da177e4
LT
78 struct cbq_class *next_alive; /* next class with backlog in this priority band */
79
80/* Parameters */
1da177e4
LT
81 unsigned char priority; /* class priority */
82 unsigned char priority2; /* priority to be used after overlimit */
83 unsigned char ewma_log; /* time constant for idle time calculation */
84 unsigned char ovl_strategy;
c3bc7cff 85#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
86 unsigned char police;
87#endif
88
89 u32 defmap;
90
91 /* Link-sharing scheduler parameters */
92 long maxidle; /* Class parameters: see below. */
93 long offtime;
94 long minidle;
95 u32 avpkt;
96 struct qdisc_rate_table *R_tab;
97
98 /* Overlimit strategy parameters */
99 void (*overlimit)(struct cbq_class *cl);
1a13cb63 100 psched_tdiff_t penalty;
1da177e4
LT
101
102 /* General scheduler (WRR) parameters */
103 long allot;
104 long quantum; /* Allotment per WRR round */
105 long weight; /* Relative allotment: see below */
106
107 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
108 struct cbq_class *split; /* Ptr to split node */
109 struct cbq_class *share; /* Ptr to LS parent in the class tree */
110 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
111 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 parent otherwise */
113 struct cbq_class *sibling; /* Sibling chain */
114 struct cbq_class *children; /* Pointer to children chain */
115
116 struct Qdisc *q; /* Elementary queueing discipline */
117
118
119/* Variables */
120 unsigned char cpriority; /* Effective priority */
121 unsigned char delayed;
122 unsigned char level; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
125 */
126
127 psched_time_t last; /* Last end of service */
128 psched_time_t undertime;
129 long avgidle;
130 long deficit; /* Saved deficit for WRR */
1a13cb63 131 psched_time_t penalized;
c1a8f1f1 132 struct gnet_stats_basic_packed bstats;
1da177e4
LT
133 struct gnet_stats_queue qstats;
134 struct gnet_stats_rate_est rate_est;
1da177e4
LT
135 struct tc_cbq_xstats xstats;
136
137 struct tcf_proto *filter_list;
138
139 int refcnt;
140 int filters;
141
142 struct cbq_class *defaults[TC_PRIO_MAX+1];
143};
144
145struct cbq_sched_data
146{
d77fea2e 147 struct Qdisc_class_hash clhash; /* Hash table of all classes */
1da177e4
LT
148 int nclasses[TC_CBQ_MAXPRIO+1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1];
150
151 struct cbq_class link;
152
153 unsigned activemask;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
155 with backlog */
156
c3bc7cff 157#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
158 struct cbq_class *rx_class;
159#endif
160 struct cbq_class *tx_class;
161 struct cbq_class *tx_borrowed;
162 int tx_len;
163 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */
165 unsigned pmask;
166
2fbd3da3 167 struct hrtimer delay_timer;
88a99354 168 struct qdisc_watchdog watchdog; /* Watchdog timer,
1da177e4
LT
169 started when CBQ has
170 backlog, but cannot
171 transmit just now */
88a99354 172 psched_tdiff_t wd_expires;
1da177e4
LT
173 int toplevel;
174 u32 hgenerator;
175};
176
177
e9bef55d 178#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
1da177e4 179
1da177e4
LT
180static __inline__ struct cbq_class *
181cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182{
d77fea2e 183 struct Qdisc_class_common *clc;
1da177e4 184
d77fea2e
PM
185 clc = qdisc_class_find(&q->clhash, classid);
186 if (clc == NULL)
187 return NULL;
188 return container_of(clc, struct cbq_class, common);
1da177e4
LT
189}
190
c3bc7cff 191#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
192
193static struct cbq_class *
194cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195{
196 struct cbq_class *cl, *new;
197
198 for (cl = this->tparent; cl; cl = cl->tparent)
199 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
200 return new;
201
202 return NULL;
203}
204
205#endif
206
207/* Classify packet. The procedure is pretty complicated, but
208 it allows us to combine link sharing and priority scheduling
209 transparently.
210
211 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
212 so that it resolves to split nodes. Then packets are classified
213 by logical priority, or a more specific classifier may be attached
214 to the split node.
215 */
216
217static struct cbq_class *
218cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
219{
220 struct cbq_sched_data *q = qdisc_priv(sch);
221 struct cbq_class *head = &q->link;
222 struct cbq_class **defmap;
223 struct cbq_class *cl = NULL;
224 u32 prio = skb->priority;
225 struct tcf_result res;
226
227 /*
228 * Step 1. If skb->priority points to one of our classes, use it.
229 */
230 if (TC_H_MAJ(prio^sch->handle) == 0 &&
231 (cl = cbq_class_lookup(q, prio)) != NULL)
232 return cl;
233
c27f339a 234 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
235 for (;;) {
236 int result = 0;
237 defmap = head->defaults;
238
239 /*
240 * Step 2+n. Apply classifier.
241 */
73ca4918
PM
242 if (!head->filter_list ||
243 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
1da177e4
LT
244 goto fallback;
245
246 if ((cl = (void*)res.class) == NULL) {
247 if (TC_H_MAJ(res.classid))
248 cl = cbq_class_lookup(q, res.classid);
249 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
250 cl = defmap[TC_PRIO_BESTEFFORT];
251
252 if (cl == NULL || cl->level >= head->level)
253 goto fallback;
254 }
255
256#ifdef CONFIG_NET_CLS_ACT
257 switch (result) {
258 case TC_ACT_QUEUED:
10297b99 259 case TC_ACT_STOLEN:
378a2f09 260 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
261 case TC_ACT_SHOT:
262 return NULL;
73ca4918
PM
263 case TC_ACT_RECLASSIFY:
264 return cbq_reclassify(skb, cl);
1da177e4 265 }
1da177e4
LT
266#endif
267 if (cl->level == 0)
268 return cl;
269
270 /*
271 * Step 3+n. If classifier selected a link sharing class,
272 * apply agency specific classifier.
273 * Repeat this procdure until we hit a leaf node.
274 */
275 head = cl;
276 }
277
278fallback:
279 cl = head;
280
281 /*
282 * Step 4. No success...
283 */
284 if (TC_H_MAJ(prio) == 0 &&
285 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
286 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
287 return head;
288
289 return cl;
290}
291
292/*
293 A packet has just been enqueued on the empty class.
294 cbq_activate_class adds it to the tail of active class list
295 of its priority band.
296 */
297
298static __inline__ void cbq_activate_class(struct cbq_class *cl)
299{
300 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
301 int prio = cl->cpriority;
302 struct cbq_class *cl_tail;
303
304 cl_tail = q->active[prio];
305 q->active[prio] = cl;
306
307 if (cl_tail != NULL) {
308 cl->next_alive = cl_tail->next_alive;
309 cl_tail->next_alive = cl;
310 } else {
311 cl->next_alive = cl;
312 q->activemask |= (1<<prio);
313 }
314}
315
316/*
317 Unlink class from active chain.
318 Note that this same procedure is done directly in cbq_dequeue*
319 during round-robin procedure.
320 */
321
322static void cbq_deactivate_class(struct cbq_class *this)
323{
324 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
325 int prio = this->cpriority;
326 struct cbq_class *cl;
327 struct cbq_class *cl_prev = q->active[prio];
328
329 do {
330 cl = cl_prev->next_alive;
331 if (cl == this) {
332 cl_prev->next_alive = cl->next_alive;
333 cl->next_alive = NULL;
334
335 if (cl == q->active[prio]) {
336 q->active[prio] = cl_prev;
337 if (cl == q->active[prio]) {
338 q->active[prio] = NULL;
339 q->activemask &= ~(1<<prio);
340 return;
341 }
342 }
1da177e4
LT
343 return;
344 }
345 } while ((cl_prev = cl) != q->active[prio]);
346}
347
348static void
349cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350{
351 int toplevel = q->toplevel;
352
353 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
354 psched_time_t now;
355 psched_tdiff_t incr;
356
3bebcda2 357 now = psched_get_time();
8edc0c31 358 incr = now - q->now_rt;
7c59e25f 359 now = q->now + incr;
1da177e4
LT
360
361 do {
104e0878 362 if (cl->undertime < now) {
1da177e4
LT
363 q->toplevel = cl->level;
364 return;
365 }
366 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
367 }
368}
369
370static int
371cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
372{
373 struct cbq_sched_data *q = qdisc_priv(sch);
ddeee3ce 374 int uninitialized_var(ret);
1da177e4
LT
375 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
376
c3bc7cff 377#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
378 q->rx_class = cl;
379#endif
380 if (cl == NULL) {
c27f339a 381 if (ret & __NET_XMIT_BYPASS)
1da177e4
LT
382 sch->qstats.drops++;
383 kfree_skb(skb);
384 return ret;
385 }
386
c3bc7cff 387#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
388 cl->q->__parent = sch;
389#endif
5f86173b
JK
390 ret = qdisc_enqueue(skb, cl->q);
391 if (ret == NET_XMIT_SUCCESS) {
1da177e4 392 sch->q.qlen++;
1da177e4
LT
393 cbq_mark_toplevel(q, cl);
394 if (!cl->next_alive)
395 cbq_activate_class(cl);
396 return ret;
397 }
398
378a2f09
JP
399 if (net_xmit_drop_count(ret)) {
400 sch->qstats.drops++;
401 cbq_mark_toplevel(q, cl);
402 cl->qstats.drops++;
403 }
1da177e4
LT
404 return ret;
405}
406
1da177e4
LT
407/* Overlimit actions */
408
409/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
410
411static void cbq_ovl_classic(struct cbq_class *cl)
412{
413 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 414 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
415
416 if (!cl->delayed) {
417 delay += cl->offtime;
418
10297b99 419 /*
1da177e4
LT
420 Class goes to sleep, so that it will have no
421 chance to work avgidle. Let's forgive it 8)
422
423 BTW cbq-2.0 has a crap in this
424 place, apparently they forgot to shift it by cl->ewma_log.
425 */
426 if (cl->avgidle < 0)
427 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
428 if (cl->avgidle < cl->minidle)
429 cl->avgidle = cl->minidle;
430 if (delay <= 0)
431 delay = 1;
7c59e25f 432 cl->undertime = q->now + delay;
1da177e4
LT
433
434 cl->xstats.overactions++;
435 cl->delayed = 1;
436 }
437 if (q->wd_expires == 0 || q->wd_expires > delay)
438 q->wd_expires = delay;
439
440 /* Dirty work! We must schedule wakeups based on
441 real available rate, rather than leaf rate,
442 which may be tiny (even zero).
443 */
444 if (q->toplevel == TC_CBQ_MAXLEVEL) {
445 struct cbq_class *b;
446 psched_tdiff_t base_delay = q->wd_expires;
447
448 for (b = cl->borrow; b; b = b->borrow) {
8edc0c31 449 delay = b->undertime - q->now;
1da177e4
LT
450 if (delay < base_delay) {
451 if (delay <= 0)
452 delay = 1;
453 base_delay = delay;
454 }
455 }
456
457 q->wd_expires = base_delay;
458 }
459}
460
461/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
462 they go overlimit
463 */
464
465static void cbq_ovl_rclassic(struct cbq_class *cl)
466{
467 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
468 struct cbq_class *this = cl;
469
470 do {
471 if (cl->level > q->toplevel) {
472 cl = NULL;
473 break;
474 }
475 } while ((cl = cl->borrow) != NULL);
476
477 if (cl == NULL)
478 cl = this;
479 cbq_ovl_classic(cl);
480}
481
482/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
483
484static void cbq_ovl_delay(struct cbq_class *cl)
485{
486 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 487 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4 488
2540e051
JP
489 if (test_bit(__QDISC_STATE_DEACTIVATED,
490 &qdisc_root_sleeping(cl->qdisc)->state))
491 return;
492
1da177e4 493 if (!cl->delayed) {
1a13cb63
PM
494 psched_time_t sched = q->now;
495 ktime_t expires;
1da177e4
LT
496
497 delay += cl->offtime;
498 if (cl->avgidle < 0)
499 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
500 if (cl->avgidle < cl->minidle)
501 cl->avgidle = cl->minidle;
7c59e25f 502 cl->undertime = q->now + delay;
1da177e4
LT
503
504 if (delay > 0) {
1a13cb63 505 sched += delay + cl->penalty;
1da177e4
LT
506 cl->penalized = sched;
507 cl->cpriority = TC_CBQ_MAXPRIO;
508 q->pmask |= (1<<TC_CBQ_MAXPRIO);
1a13cb63
PM
509
510 expires = ktime_set(0, 0);
ca44d6e6 511 expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
2fbd3da3
DM
512 if (hrtimer_try_to_cancel(&q->delay_timer) &&
513 ktime_to_ns(ktime_sub(
514 hrtimer_get_expires(&q->delay_timer),
515 expires)) > 0)
516 hrtimer_set_expires(&q->delay_timer, expires);
517 hrtimer_restart(&q->delay_timer);
1da177e4
LT
518 cl->delayed = 1;
519 cl->xstats.overactions++;
520 return;
521 }
522 delay = 1;
523 }
524 if (q->wd_expires == 0 || q->wd_expires > delay)
525 q->wd_expires = delay;
526}
527
528/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
529
530static void cbq_ovl_lowprio(struct cbq_class *cl)
531{
532 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
533
1a13cb63 534 cl->penalized = q->now + cl->penalty;
1da177e4
LT
535
536 if (cl->cpriority != cl->priority2) {
537 cl->cpriority = cl->priority2;
538 q->pmask |= (1<<cl->cpriority);
539 cl->xstats.overactions++;
540 }
541 cbq_ovl_classic(cl);
542}
543
544/* TC_CBQ_OVL_DROP: penalize class by dropping */
545
546static void cbq_ovl_drop(struct cbq_class *cl)
547{
548 if (cl->q->ops->drop)
549 if (cl->q->ops->drop(cl->q))
550 cl->qdisc->q.qlen--;
551 cl->xstats.overactions++;
552 cbq_ovl_classic(cl);
553}
554
1a13cb63
PM
555static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
556 psched_time_t now)
1da177e4
LT
557{
558 struct cbq_class *cl;
559 struct cbq_class *cl_prev = q->active[prio];
1a13cb63 560 psched_time_t sched = now;
1da177e4
LT
561
562 if (cl_prev == NULL)
e9054a33 563 return 0;
1da177e4
LT
564
565 do {
566 cl = cl_prev->next_alive;
1a13cb63 567 if (now - cl->penalized > 0) {
1da177e4
LT
568 cl_prev->next_alive = cl->next_alive;
569 cl->next_alive = NULL;
570 cl->cpriority = cl->priority;
571 cl->delayed = 0;
572 cbq_activate_class(cl);
573
574 if (cl == q->active[prio]) {
575 q->active[prio] = cl_prev;
576 if (cl == q->active[prio]) {
577 q->active[prio] = NULL;
578 return 0;
579 }
580 }
581
582 cl = cl_prev->next_alive;
1a13cb63 583 } else if (sched - cl->penalized > 0)
1da177e4
LT
584 sched = cl->penalized;
585 } while ((cl_prev = cl) != q->active[prio]);
586
1a13cb63 587 return sched - now;
1da177e4
LT
588}
589
1a13cb63 590static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
1da177e4 591{
1a13cb63 592 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
2fbd3da3 593 delay_timer);
1a13cb63
PM
594 struct Qdisc *sch = q->watchdog.qdisc;
595 psched_time_t now;
596 psched_tdiff_t delay = 0;
1da177e4
LT
597 unsigned pmask;
598
3bebcda2 599 now = psched_get_time();
1a13cb63 600
1da177e4
LT
601 pmask = q->pmask;
602 q->pmask = 0;
603
604 while (pmask) {
605 int prio = ffz(~pmask);
1a13cb63 606 psched_tdiff_t tmp;
1da177e4
LT
607
608 pmask &= ~(1<<prio);
609
1a13cb63 610 tmp = cbq_undelay_prio(q, prio, now);
1da177e4
LT
611 if (tmp > 0) {
612 q->pmask |= 1<<prio;
613 if (tmp < delay || delay == 0)
614 delay = tmp;
615 }
616 }
617
618 if (delay) {
1a13cb63
PM
619 ktime_t time;
620
621 time = ktime_set(0, 0);
ca44d6e6 622 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
2fbd3da3 623 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
1da177e4
LT
624 }
625
626 sch->flags &= ~TCQ_F_THROTTLED;
8608db03 627 __netif_schedule(qdisc_root(sch));
1a13cb63 628 return HRTIMER_NORESTART;
1da177e4
LT
629}
630
c3bc7cff 631#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
632static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
633{
1da177e4
LT
634 struct Qdisc *sch = child->__parent;
635 struct cbq_sched_data *q = qdisc_priv(sch);
636 struct cbq_class *cl = q->rx_class;
637
638 q->rx_class = NULL;
639
640 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
378a2f09 641 int ret;
1da177e4
LT
642
643 cbq_mark_toplevel(q, cl);
644
645 q->rx_class = cl;
646 cl->q->__parent = sch;
647
378a2f09
JP
648 ret = qdisc_enqueue(skb, cl->q);
649 if (ret == NET_XMIT_SUCCESS) {
1da177e4 650 sch->q.qlen++;
1da177e4
LT
651 if (!cl->next_alive)
652 cbq_activate_class(cl);
653 return 0;
654 }
378a2f09
JP
655 if (net_xmit_drop_count(ret))
656 sch->qstats.drops++;
1da177e4
LT
657 return 0;
658 }
659
660 sch->qstats.drops++;
661 return -1;
662}
663#endif
664
10297b99 665/*
1da177e4
LT
666 It is mission critical procedure.
667
668 We "regenerate" toplevel cutoff, if transmitting class
669 has backlog and it is not regulated. It is not part of
670 original CBQ description, but looks more reasonable.
671 Probably, it is wrong. This question needs further investigation.
672*/
673
674static __inline__ void
675cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
676 struct cbq_class *borrowed)
677{
678 if (cl && q->toplevel >= borrowed->level) {
679 if (cl->q->q.qlen > 1) {
680 do {
a084980d 681 if (borrowed->undertime == PSCHED_PASTPERFECT) {
1da177e4
LT
682 q->toplevel = borrowed->level;
683 return;
684 }
685 } while ((borrowed=borrowed->borrow) != NULL);
686 }
10297b99 687#if 0
1da177e4
LT
688 /* It is not necessary now. Uncommenting it
689 will save CPU cycles, but decrease fairness.
690 */
691 q->toplevel = TC_CBQ_MAXLEVEL;
692#endif
693 }
694}
695
696static void
697cbq_update(struct cbq_sched_data *q)
698{
699 struct cbq_class *this = q->tx_class;
700 struct cbq_class *cl = this;
701 int len = q->tx_len;
702
703 q->tx_class = NULL;
704
705 for ( ; cl; cl = cl->share) {
706 long avgidle = cl->avgidle;
707 long idle;
708
709 cl->bstats.packets++;
710 cl->bstats.bytes += len;
711
712 /*
713 (now - last) is total time between packet right edges.
714 (last_pktlen/rate) is "virtual" busy time, so that
715
10297b99 716 idle = (now - last) - last_pktlen/rate
1da177e4
LT
717 */
718
8edc0c31 719 idle = q->now - cl->last;
1da177e4
LT
720 if ((unsigned long)idle > 128*1024*1024) {
721 avgidle = cl->maxidle;
722 } else {
723 idle -= L2T(cl, len);
724
725 /* true_avgidle := (1-W)*true_avgidle + W*idle,
726 where W=2^{-ewma_log}. But cl->avgidle is scaled:
727 cl->avgidle == true_avgidle/W,
728 hence:
729 */
730 avgidle += idle - (avgidle>>cl->ewma_log);
731 }
732
733 if (avgidle <= 0) {
734 /* Overlimit or at-limit */
735
736 if (avgidle < cl->minidle)
737 avgidle = cl->minidle;
738
739 cl->avgidle = avgidle;
740
741 /* Calculate expected time, when this class
742 will be allowed to send.
743 It will occur, when:
744 (1-W)*true_avgidle + W*delay = 0, i.e.
745 idle = (1/W - 1)*(-true_avgidle)
746 or
747 idle = (1 - W)*(-cl->avgidle);
748 */
749 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
750
751 /*
752 That is not all.
753 To maintain the rate allocated to the class,
754 we add to undertime virtual clock,
755 necessary to complete transmitted packet.
756 (len/phys_bandwidth has been already passed
757 to the moment of cbq_update)
758 */
759
760 idle -= L2T(&q->link, len);
761 idle += L2T(cl, len);
762
7c59e25f 763 cl->undertime = q->now + idle;
1da177e4
LT
764 } else {
765 /* Underlimit */
766
a084980d 767 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
768 if (avgidle > cl->maxidle)
769 cl->avgidle = cl->maxidle;
770 else
771 cl->avgidle = avgidle;
772 }
773 cl->last = q->now;
774 }
775
776 cbq_update_toplevel(q, this, q->tx_borrowed);
777}
778
779static __inline__ struct cbq_class *
780cbq_under_limit(struct cbq_class *cl)
781{
782 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
783 struct cbq_class *this_cl = cl;
784
785 if (cl->tparent == NULL)
786 return cl;
787
a084980d 788 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
1da177e4
LT
789 cl->delayed = 0;
790 return cl;
791 }
792
793 do {
794 /* It is very suspicious place. Now overlimit
795 action is generated for not bounded classes
796 only if link is completely congested.
797 Though it is in agree with ancestor-only paradigm,
798 it looks very stupid. Particularly,
799 it means that this chunk of code will either
800 never be called or result in strong amplification
801 of burstiness. Dangerous, silly, and, however,
802 no another solution exists.
803 */
804 if ((cl = cl->borrow) == NULL) {
805 this_cl->qstats.overlimits++;
806 this_cl->overlimit(this_cl);
807 return NULL;
808 }
809 if (cl->level > q->toplevel)
810 return NULL;
a084980d 811 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
1da177e4
LT
812
813 cl->delayed = 0;
814 return cl;
815}
816
817static __inline__ struct sk_buff *
818cbq_dequeue_prio(struct Qdisc *sch, int prio)
819{
820 struct cbq_sched_data *q = qdisc_priv(sch);
821 struct cbq_class *cl_tail, *cl_prev, *cl;
822 struct sk_buff *skb;
823 int deficit;
824
825 cl_tail = cl_prev = q->active[prio];
826 cl = cl_prev->next_alive;
827
828 do {
829 deficit = 0;
830
831 /* Start round */
832 do {
833 struct cbq_class *borrow = cl;
834
835 if (cl->q->q.qlen &&
836 (borrow = cbq_under_limit(cl)) == NULL)
837 goto skip_class;
838
839 if (cl->deficit <= 0) {
840 /* Class exhausted its allotment per
841 this round. Switch to the next one.
842 */
843 deficit = 1;
844 cl->deficit += cl->quantum;
845 goto next_class;
846 }
847
848 skb = cl->q->dequeue(cl->q);
849
850 /* Class did not give us any skb :-(
10297b99 851 It could occur even if cl->q->q.qlen != 0
1da177e4
LT
852 f.e. if cl->q == "tbf"
853 */
854 if (skb == NULL)
855 goto skip_class;
856
0abf77e5 857 cl->deficit -= qdisc_pkt_len(skb);
1da177e4
LT
858 q->tx_class = cl;
859 q->tx_borrowed = borrow;
860 if (borrow != cl) {
861#ifndef CBQ_XSTATS_BORROWS_BYTES
862 borrow->xstats.borrows++;
863 cl->xstats.borrows++;
864#else
0abf77e5
JK
865 borrow->xstats.borrows += qdisc_pkt_len(skb);
866 cl->xstats.borrows += qdisc_pkt_len(skb);
1da177e4
LT
867#endif
868 }
0abf77e5 869 q->tx_len = qdisc_pkt_len(skb);
1da177e4
LT
870
871 if (cl->deficit <= 0) {
872 q->active[prio] = cl;
873 cl = cl->next_alive;
874 cl->deficit += cl->quantum;
875 }
876 return skb;
877
878skip_class:
879 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
880 /* Class is empty or penalized.
881 Unlink it from active chain.
882 */
883 cl_prev->next_alive = cl->next_alive;
884 cl->next_alive = NULL;
885
886 /* Did cl_tail point to it? */
887 if (cl == cl_tail) {
888 /* Repair it! */
889 cl_tail = cl_prev;
890
891 /* Was it the last class in this band? */
892 if (cl == cl_tail) {
893 /* Kill the band! */
894 q->active[prio] = NULL;
895 q->activemask &= ~(1<<prio);
896 if (cl->q->q.qlen)
897 cbq_activate_class(cl);
898 return NULL;
899 }
900
901 q->active[prio] = cl_tail;
902 }
903 if (cl->q->q.qlen)
904 cbq_activate_class(cl);
905
906 cl = cl_prev;
907 }
908
909next_class:
910 cl_prev = cl;
911 cl = cl->next_alive;
912 } while (cl_prev != cl_tail);
913 } while (deficit);
914
915 q->active[prio] = cl_prev;
916
917 return NULL;
918}
919
920static __inline__ struct sk_buff *
921cbq_dequeue_1(struct Qdisc *sch)
922{
923 struct cbq_sched_data *q = qdisc_priv(sch);
924 struct sk_buff *skb;
925 unsigned activemask;
926
927 activemask = q->activemask&0xFF;
928 while (activemask) {
929 int prio = ffz(~activemask);
930 activemask &= ~(1<<prio);
931 skb = cbq_dequeue_prio(sch, prio);
932 if (skb)
933 return skb;
934 }
935 return NULL;
936}
937
938static struct sk_buff *
939cbq_dequeue(struct Qdisc *sch)
940{
941 struct sk_buff *skb;
942 struct cbq_sched_data *q = qdisc_priv(sch);
943 psched_time_t now;
944 psched_tdiff_t incr;
945
3bebcda2 946 now = psched_get_time();
8edc0c31 947 incr = now - q->now_rt;
1da177e4
LT
948
949 if (q->tx_class) {
950 psched_tdiff_t incr2;
951 /* Time integrator. We calculate EOS time
952 by adding expected packet transmission time.
953 If real time is greater, we warp artificial clock,
954 so that:
955
956 cbq_time = max(real_time, work);
957 */
958 incr2 = L2T(&q->link, q->tx_len);
7c59e25f 959 q->now += incr2;
1da177e4
LT
960 cbq_update(q);
961 if ((incr -= incr2) < 0)
962 incr = 0;
963 }
7c59e25f 964 q->now += incr;
1da177e4
LT
965 q->now_rt = now;
966
967 for (;;) {
968 q->wd_expires = 0;
969
970 skb = cbq_dequeue_1(sch);
971 if (skb) {
9190b3b3 972 qdisc_bstats_update(sch, skb);
1da177e4
LT
973 sch->q.qlen--;
974 sch->flags &= ~TCQ_F_THROTTLED;
975 return skb;
976 }
977
978 /* All the classes are overlimit.
979
980 It is possible, if:
981
982 1. Scheduler is empty.
983 2. Toplevel cutoff inhibited borrowing.
984 3. Root class is overlimit.
985
986 Reset 2d and 3d conditions and retry.
987
988 Note, that NS and cbq-2.0 are buggy, peeking
989 an arbitrary class is appropriate for ancestor-only
990 sharing, but not for toplevel algorithm.
991
992 Our version is better, but slower, because it requires
993 two passes, but it is unavoidable with top-level sharing.
994 */
995
996 if (q->toplevel == TC_CBQ_MAXLEVEL &&
a084980d 997 q->link.undertime == PSCHED_PASTPERFECT)
1da177e4
LT
998 break;
999
1000 q->toplevel = TC_CBQ_MAXLEVEL;
a084980d 1001 q->link.undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1002 }
1003
1004 /* No packets in scheduler or nobody wants to give them to us :-(
1005 Sigh... start watchdog timer in the last case. */
1006
1007 if (sch->q.qlen) {
1008 sch->qstats.overlimits++;
88a99354
PM
1009 if (q->wd_expires)
1010 qdisc_watchdog_schedule(&q->watchdog,
bb239acf 1011 now + q->wd_expires);
1da177e4
LT
1012 }
1013 return NULL;
1014}
1015
1016/* CBQ class maintanance routines */
1017
1018static void cbq_adjust_levels(struct cbq_class *this)
1019{
1020 if (this == NULL)
1021 return;
1022
1023 do {
1024 int level = 0;
1025 struct cbq_class *cl;
1026
1027 if ((cl = this->children) != NULL) {
1028 do {
1029 if (cl->level > level)
1030 level = cl->level;
1031 } while ((cl = cl->sibling) != this->children);
1032 }
1033 this->level = level+1;
1034 } while ((this = this->tparent) != NULL);
1035}
1036
1037static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1038{
1039 struct cbq_class *cl;
d77fea2e
PM
1040 struct hlist_node *n;
1041 unsigned int h;
1da177e4
LT
1042
1043 if (q->quanta[prio] == 0)
1044 return;
1045
d77fea2e
PM
1046 for (h = 0; h < q->clhash.hashsize; h++) {
1047 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1048 /* BUGGGG... Beware! This expression suffer of
1049 arithmetic overflows!
1050 */
1051 if (cl->priority == prio) {
1052 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1053 q->quanta[prio];
1054 }
5ce2d488 1055 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
d77fea2e 1056 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
5ce2d488 1057 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1da177e4
LT
1058 }
1059 }
1060 }
1061}
1062
1063static void cbq_sync_defmap(struct cbq_class *cl)
1064{
1065 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1066 struct cbq_class *split = cl->split;
1067 unsigned h;
1068 int i;
1069
1070 if (split == NULL)
1071 return;
1072
1073 for (i=0; i<=TC_PRIO_MAX; i++) {
1074 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1075 split->defaults[i] = NULL;
1076 }
1077
1078 for (i=0; i<=TC_PRIO_MAX; i++) {
1079 int level = split->level;
1080
1081 if (split->defaults[i])
1082 continue;
1083
d77fea2e
PM
1084 for (h = 0; h < q->clhash.hashsize; h++) {
1085 struct hlist_node *n;
1da177e4
LT
1086 struct cbq_class *c;
1087
d77fea2e
PM
1088 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1089 common.hnode) {
1da177e4
LT
1090 if (c->split == split && c->level < level &&
1091 c->defmap&(1<<i)) {
1092 split->defaults[i] = c;
1093 level = c->level;
1094 }
1095 }
1096 }
1097 }
1098}
1099
1100static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1101{
1102 struct cbq_class *split = NULL;
1103
1104 if (splitid == 0) {
1105 if ((split = cl->split) == NULL)
1106 return;
d77fea2e 1107 splitid = split->common.classid;
1da177e4
LT
1108 }
1109
d77fea2e 1110 if (split == NULL || split->common.classid != splitid) {
1da177e4 1111 for (split = cl->tparent; split; split = split->tparent)
d77fea2e 1112 if (split->common.classid == splitid)
1da177e4
LT
1113 break;
1114 }
1115
1116 if (split == NULL)
1117 return;
1118
1119 if (cl->split != split) {
1120 cl->defmap = 0;
1121 cbq_sync_defmap(cl);
1122 cl->split = split;
1123 cl->defmap = def&mask;
1124 } else
1125 cl->defmap = (cl->defmap&~mask)|(def&mask);
1126
1127 cbq_sync_defmap(cl);
1128}
1129
1130static void cbq_unlink_class(struct cbq_class *this)
1131{
1132 struct cbq_class *cl, **clp;
1133 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1134
d77fea2e 1135 qdisc_class_hash_remove(&q->clhash, &this->common);
1da177e4
LT
1136
1137 if (this->tparent) {
1138 clp=&this->sibling;
1139 cl = *clp;
1140 do {
1141 if (cl == this) {
1142 *clp = cl->sibling;
1143 break;
1144 }
1145 clp = &cl->sibling;
1146 } while ((cl = *clp) != this->sibling);
1147
1148 if (this->tparent->children == this) {
1149 this->tparent->children = this->sibling;
1150 if (this->sibling == this)
1151 this->tparent->children = NULL;
1152 }
1153 } else {
547b792c 1154 WARN_ON(this->sibling != this);
1da177e4
LT
1155 }
1156}
1157
1158static void cbq_link_class(struct cbq_class *this)
1159{
1160 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1da177e4
LT
1161 struct cbq_class *parent = this->tparent;
1162
1163 this->sibling = this;
d77fea2e 1164 qdisc_class_hash_insert(&q->clhash, &this->common);
1da177e4
LT
1165
1166 if (parent == NULL)
1167 return;
1168
1169 if (parent->children == NULL) {
1170 parent->children = this;
1171 } else {
1172 this->sibling = parent->children->sibling;
1173 parent->children->sibling = this;
1174 }
1175}
1176
1177static unsigned int cbq_drop(struct Qdisc* sch)
1178{
1179 struct cbq_sched_data *q = qdisc_priv(sch);
1180 struct cbq_class *cl, *cl_head;
1181 int prio;
1182 unsigned int len;
1183
1184 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1185 if ((cl_head = q->active[prio]) == NULL)
1186 continue;
1187
1188 cl = cl_head;
1189 do {
1190 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1191 sch->q.qlen--;
a37ef2e3
JP
1192 if (!cl->q->q.qlen)
1193 cbq_deactivate_class(cl);
1da177e4
LT
1194 return len;
1195 }
1196 } while ((cl = cl->next_alive) != cl_head);
1197 }
1198 return 0;
1199}
1200
1201static void
1202cbq_reset(struct Qdisc* sch)
1203{
1204 struct cbq_sched_data *q = qdisc_priv(sch);
1205 struct cbq_class *cl;
d77fea2e 1206 struct hlist_node *n;
1da177e4
LT
1207 int prio;
1208 unsigned h;
1209
1210 q->activemask = 0;
1211 q->pmask = 0;
1212 q->tx_class = NULL;
1213 q->tx_borrowed = NULL;
88a99354 1214 qdisc_watchdog_cancel(&q->watchdog);
2fbd3da3 1215 hrtimer_cancel(&q->delay_timer);
1da177e4 1216 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1217 q->now = psched_get_time();
1da177e4
LT
1218 q->now_rt = q->now;
1219
1220 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1221 q->active[prio] = NULL;
1222
d77fea2e
PM
1223 for (h = 0; h < q->clhash.hashsize; h++) {
1224 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1225 qdisc_reset(cl->q);
1226
1227 cl->next_alive = NULL;
a084980d 1228 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1229 cl->avgidle = cl->maxidle;
1230 cl->deficit = cl->quantum;
1231 cl->cpriority = cl->priority;
1232 }
1233 }
1234 sch->q.qlen = 0;
1235}
1236
1237
1238static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1239{
1240 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1241 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1242 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1243 }
1244 if (lss->change&TCF_CBQ_LSS_EWMA)
1245 cl->ewma_log = lss->ewma_log;
1246 if (lss->change&TCF_CBQ_LSS_AVPKT)
1247 cl->avpkt = lss->avpkt;
1248 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1249 cl->minidle = -(long)lss->minidle;
1250 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1251 cl->maxidle = lss->maxidle;
1252 cl->avgidle = lss->maxidle;
1253 }
1254 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1255 cl->offtime = lss->offtime;
1256 return 0;
1257}
1258
1259static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1260{
1261 q->nclasses[cl->priority]--;
1262 q->quanta[cl->priority] -= cl->weight;
1263 cbq_normalize_quanta(q, cl->priority);
1264}
1265
1266static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1267{
1268 q->nclasses[cl->priority]++;
1269 q->quanta[cl->priority] += cl->weight;
1270 cbq_normalize_quanta(q, cl->priority);
1271}
1272
1273static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1274{
1275 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1276
1277 if (wrr->allot)
1278 cl->allot = wrr->allot;
1279 if (wrr->weight)
1280 cl->weight = wrr->weight;
1281 if (wrr->priority) {
1282 cl->priority = wrr->priority-1;
1283 cl->cpriority = cl->priority;
1284 if (cl->priority >= cl->priority2)
1285 cl->priority2 = TC_CBQ_MAXPRIO-1;
1286 }
1287
1288 cbq_addprio(q, cl);
1289 return 0;
1290}
1291
1292static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1293{
1294 switch (ovl->strategy) {
1295 case TC_CBQ_OVL_CLASSIC:
1296 cl->overlimit = cbq_ovl_classic;
1297 break;
1298 case TC_CBQ_OVL_DELAY:
1299 cl->overlimit = cbq_ovl_delay;
1300 break;
1301 case TC_CBQ_OVL_LOWPRIO:
1302 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1303 ovl->priority2-1 <= cl->priority)
1304 return -EINVAL;
1305 cl->priority2 = ovl->priority2-1;
1306 cl->overlimit = cbq_ovl_lowprio;
1307 break;
1308 case TC_CBQ_OVL_DROP:
1309 cl->overlimit = cbq_ovl_drop;
1310 break;
1311 case TC_CBQ_OVL_RCLASSIC:
1312 cl->overlimit = cbq_ovl_rclassic;
1313 break;
1314 default:
1315 return -EINVAL;
1316 }
1a13cb63 1317 cl->penalty = ovl->penalty;
1da177e4
LT
1318 return 0;
1319}
1320
c3bc7cff 1321#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1322static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1323{
1324 cl->police = p->police;
1325
1326 if (cl->q->handle) {
1327 if (p->police == TC_POLICE_RECLASSIFY)
1328 cl->q->reshape_fail = cbq_reshape_fail;
1329 else
1330 cl->q->reshape_fail = NULL;
1331 }
1332 return 0;
1333}
1334#endif
1335
1336static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1337{
1338 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1339 return 0;
1340}
1341
27a3421e
PM
1342static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1343 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1344 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1345 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1346 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1347 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1348 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1349 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1350};
1351
1e90474c 1352static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1353{
1354 struct cbq_sched_data *q = qdisc_priv(sch);
1e90474c 1355 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4 1356 struct tc_ratespec *r;
cee63723
PM
1357 int err;
1358
27a3421e 1359 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1360 if (err < 0)
1361 return err;
1da177e4 1362
27a3421e 1363 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1da177e4
LT
1364 return -EINVAL;
1365
1e90474c 1366 r = nla_data(tb[TCA_CBQ_RATE]);
1da177e4 1367
1e90474c 1368 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1da177e4
LT
1369 return -EINVAL;
1370
d77fea2e
PM
1371 err = qdisc_class_hash_init(&q->clhash);
1372 if (err < 0)
1373 goto put_rtab;
1374
1da177e4
LT
1375 q->link.refcnt = 1;
1376 q->link.sibling = &q->link;
d77fea2e 1377 q->link.common.classid = sch->handle;
1da177e4 1378 q->link.qdisc = sch;
3511c913
CG
1379 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1380 sch->handle);
1381 if (!q->link.q)
1da177e4
LT
1382 q->link.q = &noop_qdisc;
1383
1384 q->link.priority = TC_CBQ_MAXPRIO-1;
1385 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1386 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1387 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1388 q->link.overlimit = cbq_ovl_classic;
5ce2d488 1389 q->link.allot = psched_mtu(qdisc_dev(sch));
1da177e4
LT
1390 q->link.quantum = q->link.allot;
1391 q->link.weight = q->link.R_tab->rate.rate;
1392
1393 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1394 q->link.avpkt = q->link.allot/2;
1395 q->link.minidle = -0x7FFFFFFF;
1da177e4 1396
88a99354 1397 qdisc_watchdog_init(&q->watchdog, sch);
2fbd3da3 1398 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1da177e4
LT
1399 q->delay_timer.function = cbq_undelay;
1400 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1401 q->now = psched_get_time();
1da177e4
LT
1402 q->now_rt = q->now;
1403
1404 cbq_link_class(&q->link);
1405
1e90474c
PM
1406 if (tb[TCA_CBQ_LSSOPT])
1407 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4
LT
1408
1409 cbq_addprio(q, &q->link);
1410 return 0;
d77fea2e
PM
1411
1412put_rtab:
1413 qdisc_put_rtab(q->link.R_tab);
1414 return err;
1da177e4
LT
1415}
1416
1417static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1418{
27a884dc 1419 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1420
1e90474c 1421 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1da177e4
LT
1422 return skb->len;
1423
1e90474c 1424nla_put_failure:
dc5fc579 1425 nlmsg_trim(skb, b);
1da177e4
LT
1426 return -1;
1427}
1428
1429static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1430{
27a884dc 1431 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1432 struct tc_cbq_lssopt opt;
1433
1434 opt.flags = 0;
1435 if (cl->borrow == NULL)
1436 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1437 if (cl->share == NULL)
1438 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1439 opt.ewma_log = cl->ewma_log;
1440 opt.level = cl->level;
1441 opt.avpkt = cl->avpkt;
1442 opt.maxidle = cl->maxidle;
1443 opt.minidle = (u32)(-cl->minidle);
1444 opt.offtime = cl->offtime;
1445 opt.change = ~0;
1e90474c 1446 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1da177e4
LT
1447 return skb->len;
1448
1e90474c 1449nla_put_failure:
dc5fc579 1450 nlmsg_trim(skb, b);
1da177e4
LT
1451 return -1;
1452}
1453
1454static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1455{
27a884dc 1456 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1457 struct tc_cbq_wrropt opt;
1458
1459 opt.flags = 0;
1460 opt.allot = cl->allot;
1461 opt.priority = cl->priority+1;
1462 opt.cpriority = cl->cpriority+1;
1463 opt.weight = cl->weight;
1e90474c 1464 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1da177e4
LT
1465 return skb->len;
1466
1e90474c 1467nla_put_failure:
dc5fc579 1468 nlmsg_trim(skb, b);
1da177e4
LT
1469 return -1;
1470}
1471
1472static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1473{
27a884dc 1474 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1475 struct tc_cbq_ovl opt;
1476
1477 opt.strategy = cl->ovl_strategy;
1478 opt.priority2 = cl->priority2+1;
8a47077a 1479 opt.pad = 0;
1a13cb63 1480 opt.penalty = cl->penalty;
1e90474c 1481 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1da177e4
LT
1482 return skb->len;
1483
1e90474c 1484nla_put_failure:
dc5fc579 1485 nlmsg_trim(skb, b);
1da177e4
LT
1486 return -1;
1487}
1488
1489static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1490{
27a884dc 1491 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1492 struct tc_cbq_fopt opt;
1493
1494 if (cl->split || cl->defmap) {
d77fea2e 1495 opt.split = cl->split ? cl->split->common.classid : 0;
1da177e4
LT
1496 opt.defmap = cl->defmap;
1497 opt.defchange = ~0;
1e90474c 1498 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1da177e4
LT
1499 }
1500 return skb->len;
1501
1e90474c 1502nla_put_failure:
dc5fc579 1503 nlmsg_trim(skb, b);
1da177e4
LT
1504 return -1;
1505}
1506
c3bc7cff 1507#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1508static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1509{
27a884dc 1510 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1511 struct tc_cbq_police opt;
1512
1513 if (cl->police) {
1514 opt.police = cl->police;
9ef1d4c7
PM
1515 opt.__res1 = 0;
1516 opt.__res2 = 0;
1e90474c 1517 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1da177e4
LT
1518 }
1519 return skb->len;
1520
1e90474c 1521nla_put_failure:
dc5fc579 1522 nlmsg_trim(skb, b);
1da177e4
LT
1523 return -1;
1524}
1525#endif
1526
1527static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1528{
1529 if (cbq_dump_lss(skb, cl) < 0 ||
1530 cbq_dump_rate(skb, cl) < 0 ||
1531 cbq_dump_wrr(skb, cl) < 0 ||
1532 cbq_dump_ovl(skb, cl) < 0 ||
c3bc7cff 1533#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1534 cbq_dump_police(skb, cl) < 0 ||
1535#endif
1536 cbq_dump_fopt(skb, cl) < 0)
1537 return -1;
1538 return 0;
1539}
1540
1541static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1542{
1543 struct cbq_sched_data *q = qdisc_priv(sch);
4b3550ef 1544 struct nlattr *nest;
1da177e4 1545
4b3550ef
PM
1546 nest = nla_nest_start(skb, TCA_OPTIONS);
1547 if (nest == NULL)
1548 goto nla_put_failure;
1da177e4 1549 if (cbq_dump_attr(skb, &q->link) < 0)
1e90474c 1550 goto nla_put_failure;
4b3550ef 1551 nla_nest_end(skb, nest);
1da177e4
LT
1552 return skb->len;
1553
1e90474c 1554nla_put_failure:
4b3550ef 1555 nla_nest_cancel(skb, nest);
1da177e4
LT
1556 return -1;
1557}
1558
1559static int
1560cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1561{
1562 struct cbq_sched_data *q = qdisc_priv(sch);
1563
1564 q->link.xstats.avgidle = q->link.avgidle;
1565 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1566}
1567
1568static int
1569cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1570 struct sk_buff *skb, struct tcmsg *tcm)
1571{
1572 struct cbq_class *cl = (struct cbq_class*)arg;
4b3550ef 1573 struct nlattr *nest;
1da177e4
LT
1574
1575 if (cl->tparent)
d77fea2e 1576 tcm->tcm_parent = cl->tparent->common.classid;
1da177e4
LT
1577 else
1578 tcm->tcm_parent = TC_H_ROOT;
d77fea2e 1579 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1580 tcm->tcm_info = cl->q->handle;
1581
4b3550ef
PM
1582 nest = nla_nest_start(skb, TCA_OPTIONS);
1583 if (nest == NULL)
1584 goto nla_put_failure;
1da177e4 1585 if (cbq_dump_attr(skb, cl) < 0)
1e90474c 1586 goto nla_put_failure;
4b3550ef 1587 nla_nest_end(skb, nest);
1da177e4
LT
1588 return skb->len;
1589
1e90474c 1590nla_put_failure:
4b3550ef 1591 nla_nest_cancel(skb, nest);
1da177e4
LT
1592 return -1;
1593}
1594
1595static int
1596cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1597 struct gnet_dump *d)
1598{
1599 struct cbq_sched_data *q = qdisc_priv(sch);
1600 struct cbq_class *cl = (struct cbq_class*)arg;
1601
1602 cl->qstats.qlen = cl->q->q.qlen;
1603 cl->xstats.avgidle = cl->avgidle;
1604 cl->xstats.undertime = 0;
1605
a084980d 1606 if (cl->undertime != PSCHED_PASTPERFECT)
8edc0c31 1607 cl->xstats.undertime = cl->undertime - q->now;
1da177e4
LT
1608
1609 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
d250a5f9 1610 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1da177e4
LT
1611 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1612 return -1;
1613
1614 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1615}
1616
1617static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1618 struct Qdisc **old)
1619{
1620 struct cbq_class *cl = (struct cbq_class*)arg;
1621
5b9a9ccf 1622 if (new == NULL) {
3511c913 1623 new = qdisc_create_dflt(sch->dev_queue,
5b9a9ccf
PM
1624 &pfifo_qdisc_ops, cl->common.classid);
1625 if (new == NULL)
1626 return -ENOBUFS;
1627 } else {
c3bc7cff 1628#ifdef CONFIG_NET_CLS_ACT
5b9a9ccf
PM
1629 if (cl->police == TC_POLICE_RECLASSIFY)
1630 new->reshape_fail = cbq_reshape_fail;
1da177e4 1631#endif
1da177e4 1632 }
5b9a9ccf
PM
1633 sch_tree_lock(sch);
1634 *old = cl->q;
1635 cl->q = new;
1636 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1637 qdisc_reset(*old);
1638 sch_tree_unlock(sch);
1639
1640 return 0;
1da177e4
LT
1641}
1642
1643static struct Qdisc *
1644cbq_leaf(struct Qdisc *sch, unsigned long arg)
1645{
1646 struct cbq_class *cl = (struct cbq_class*)arg;
1647
5b9a9ccf 1648 return cl->q;
1da177e4
LT
1649}
1650
a37ef2e3
JP
1651static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1652{
1653 struct cbq_class *cl = (struct cbq_class *)arg;
1654
1655 if (cl->q->q.qlen == 0)
1656 cbq_deactivate_class(cl);
1657}
1658
1da177e4
LT
1659static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1660{
1661 struct cbq_sched_data *q = qdisc_priv(sch);
1662 struct cbq_class *cl = cbq_class_lookup(q, classid);
1663
1664 if (cl) {
1665 cl->refcnt++;
1666 return (unsigned long)cl;
1667 }
1668 return 0;
1669}
1670
1da177e4
LT
1671static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1672{
1673 struct cbq_sched_data *q = qdisc_priv(sch);
1674
547b792c 1675 WARN_ON(cl->filters);
1da177e4 1676
ff31ab56 1677 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1678 qdisc_destroy(cl->q);
1679 qdisc_put_rtab(cl->R_tab);
1da177e4 1680 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1681 if (cl != &q->link)
1682 kfree(cl);
1683}
1684
1685static void
1686cbq_destroy(struct Qdisc* sch)
1687{
1688 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e 1689 struct hlist_node *n, *next;
1da177e4
LT
1690 struct cbq_class *cl;
1691 unsigned h;
1692
c3bc7cff 1693#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1694 q->rx_class = NULL;
1695#endif
1696 /*
1697 * Filters must be destroyed first because we don't destroy the
1698 * classes from root to leafs which means that filters can still
1699 * be bound to classes which have been destroyed already. --TGR '04
1700 */
d77fea2e
PM
1701 for (h = 0; h < q->clhash.hashsize; h++) {
1702 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
ff31ab56 1703 tcf_destroy_chain(&cl->filter_list);
b00b4bf9 1704 }
d77fea2e
PM
1705 for (h = 0; h < q->clhash.hashsize; h++) {
1706 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1707 common.hnode)
1da177e4 1708 cbq_destroy_class(sch, cl);
1da177e4 1709 }
d77fea2e 1710 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1711}
1712
1713static void cbq_put(struct Qdisc *sch, unsigned long arg)
1714{
1715 struct cbq_class *cl = (struct cbq_class*)arg;
1716
1717 if (--cl->refcnt == 0) {
c3bc7cff 1718#ifdef CONFIG_NET_CLS_ACT
102396ae 1719 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4
LT
1720 struct cbq_sched_data *q = qdisc_priv(sch);
1721
7698b4fc 1722 spin_lock_bh(root_lock);
1da177e4
LT
1723 if (q->rx_class == cl)
1724 q->rx_class = NULL;
7698b4fc 1725 spin_unlock_bh(root_lock);
1da177e4
LT
1726#endif
1727
1728 cbq_destroy_class(sch, cl);
1729 }
1730}
1731
1732static int
1e90474c 1733cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1da177e4
LT
1734 unsigned long *arg)
1735{
1736 int err;
1737 struct cbq_sched_data *q = qdisc_priv(sch);
1738 struct cbq_class *cl = (struct cbq_class*)*arg;
1e90474c
PM
1739 struct nlattr *opt = tca[TCA_OPTIONS];
1740 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4
LT
1741 struct cbq_class *parent;
1742 struct qdisc_rate_table *rtab = NULL;
1743
cee63723 1744 if (opt == NULL)
1da177e4
LT
1745 return -EINVAL;
1746
27a3421e 1747 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1748 if (err < 0)
1749 return err;
1750
1da177e4
LT
1751 if (cl) {
1752 /* Check parent */
1753 if (parentid) {
d77fea2e
PM
1754 if (cl->tparent &&
1755 cl->tparent->common.classid != parentid)
1da177e4
LT
1756 return -EINVAL;
1757 if (!cl->tparent && parentid != TC_H_ROOT)
1758 return -EINVAL;
1759 }
1760
1e90474c 1761 if (tb[TCA_CBQ_RATE]) {
71bcb09a
SH
1762 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1763 tb[TCA_CBQ_RTAB]);
1da177e4
LT
1764 if (rtab == NULL)
1765 return -EINVAL;
1766 }
1767
71bcb09a
SH
1768 if (tca[TCA_RATE]) {
1769 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1770 qdisc_root_sleeping_lock(sch),
1771 tca[TCA_RATE]);
1772 if (err) {
1773 if (rtab)
1774 qdisc_put_rtab(rtab);
1775 return err;
1776 }
1777 }
1778
1da177e4
LT
1779 /* Change class parameters */
1780 sch_tree_lock(sch);
1781
1782 if (cl->next_alive != NULL)
1783 cbq_deactivate_class(cl);
1784
1785 if (rtab) {
b94c8afc
PM
1786 qdisc_put_rtab(cl->R_tab);
1787 cl->R_tab = rtab;
1da177e4
LT
1788 }
1789
1e90474c
PM
1790 if (tb[TCA_CBQ_LSSOPT])
1791 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4 1792
1e90474c 1793 if (tb[TCA_CBQ_WRROPT]) {
1da177e4 1794 cbq_rmprio(q, cl);
1e90474c 1795 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1796 }
1797
1e90474c
PM
1798 if (tb[TCA_CBQ_OVL_STRATEGY])
1799 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1da177e4 1800
c3bc7cff 1801#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1802 if (tb[TCA_CBQ_POLICE])
1803 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4
LT
1804#endif
1805
1e90474c
PM
1806 if (tb[TCA_CBQ_FOPT])
1807 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1808
1809 if (cl->q->q.qlen)
1810 cbq_activate_class(cl);
1811
1812 sch_tree_unlock(sch);
1813
1da177e4
LT
1814 return 0;
1815 }
1816
1817 if (parentid == TC_H_ROOT)
1818 return -EINVAL;
1819
1e90474c
PM
1820 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1821 tb[TCA_CBQ_LSSOPT] == NULL)
1da177e4
LT
1822 return -EINVAL;
1823
1e90474c 1824 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1825 if (rtab == NULL)
1826 return -EINVAL;
1827
1828 if (classid) {
1829 err = -EINVAL;
1830 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1831 goto failure;
1832 } else {
1833 int i;
1834 classid = TC_H_MAKE(sch->handle,0x8000);
1835
1836 for (i=0; i<0x8000; i++) {
1837 if (++q->hgenerator >= 0x8000)
1838 q->hgenerator = 1;
1839 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1840 break;
1841 }
1842 err = -ENOSR;
1843 if (i >= 0x8000)
1844 goto failure;
1845 classid = classid|q->hgenerator;
1846 }
1847
1848 parent = &q->link;
1849 if (parentid) {
1850 parent = cbq_class_lookup(q, parentid);
1851 err = -EINVAL;
1852 if (parent == NULL)
1853 goto failure;
1854 }
1855
1856 err = -ENOBUFS;
0da974f4 1857 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1da177e4
LT
1858 if (cl == NULL)
1859 goto failure;
71bcb09a
SH
1860
1861 if (tca[TCA_RATE]) {
1862 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1863 qdisc_root_sleeping_lock(sch),
1864 tca[TCA_RATE]);
1865 if (err) {
1866 kfree(cl);
1867 goto failure;
1868 }
1869 }
1870
1da177e4
LT
1871 cl->R_tab = rtab;
1872 rtab = NULL;
1873 cl->refcnt = 1;
3511c913
CG
1874 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1875 if (!cl->q)
1da177e4 1876 cl->q = &noop_qdisc;
d77fea2e 1877 cl->common.classid = classid;
1da177e4
LT
1878 cl->tparent = parent;
1879 cl->qdisc = sch;
1880 cl->allot = parent->allot;
1881 cl->quantum = cl->allot;
1882 cl->weight = cl->R_tab->rate.rate;
1da177e4
LT
1883
1884 sch_tree_lock(sch);
1885 cbq_link_class(cl);
1886 cl->borrow = cl->tparent;
1887 if (cl->tparent != &q->link)
1888 cl->share = cl->tparent;
1889 cbq_adjust_levels(parent);
1890 cl->minidle = -0x7FFFFFFF;
1e90474c
PM
1891 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1892 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1893 if (cl->ewma_log==0)
1894 cl->ewma_log = q->link.ewma_log;
1895 if (cl->maxidle==0)
1896 cl->maxidle = q->link.maxidle;
1897 if (cl->avpkt==0)
1898 cl->avpkt = q->link.avpkt;
1899 cl->overlimit = cbq_ovl_classic;
1e90474c
PM
1900 if (tb[TCA_CBQ_OVL_STRATEGY])
1901 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
c3bc7cff 1902#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1903 if (tb[TCA_CBQ_POLICE])
1904 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4 1905#endif
1e90474c
PM
1906 if (tb[TCA_CBQ_FOPT])
1907 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1908 sch_tree_unlock(sch);
1909
d77fea2e
PM
1910 qdisc_class_hash_grow(sch, &q->clhash);
1911
1da177e4
LT
1912 *arg = (unsigned long)cl;
1913 return 0;
1914
1915failure:
1916 qdisc_put_rtab(rtab);
1917 return err;
1918}
1919
1920static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1921{
1922 struct cbq_sched_data *q = qdisc_priv(sch);
1923 struct cbq_class *cl = (struct cbq_class*)arg;
a37ef2e3 1924 unsigned int qlen;
1da177e4
LT
1925
1926 if (cl->filters || cl->children || cl == &q->link)
1927 return -EBUSY;
1928
1929 sch_tree_lock(sch);
1930
a37ef2e3
JP
1931 qlen = cl->q->q.qlen;
1932 qdisc_reset(cl->q);
1933 qdisc_tree_decrease_qlen(cl->q, qlen);
1934
1da177e4
LT
1935 if (cl->next_alive)
1936 cbq_deactivate_class(cl);
1937
1938 if (q->tx_borrowed == cl)
1939 q->tx_borrowed = q->tx_class;
1940 if (q->tx_class == cl) {
1941 q->tx_class = NULL;
1942 q->tx_borrowed = NULL;
1943 }
c3bc7cff 1944#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1945 if (q->rx_class == cl)
1946 q->rx_class = NULL;
1947#endif
1948
1949 cbq_unlink_class(cl);
1950 cbq_adjust_levels(cl->tparent);
1951 cl->defmap = 0;
1952 cbq_sync_defmap(cl);
1953
1954 cbq_rmprio(q, cl);
1955 sch_tree_unlock(sch);
1956
7cd0a638
JP
1957 BUG_ON(--cl->refcnt == 0);
1958 /*
1959 * This shouldn't happen: we "hold" one cops->get() when called
1960 * from tc_ctl_tclass; the destroy method is done from cops->put().
1961 */
1da177e4
LT
1962
1963 return 0;
1964}
1965
1966static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1967{
1968 struct cbq_sched_data *q = qdisc_priv(sch);
1969 struct cbq_class *cl = (struct cbq_class *)arg;
1970
1971 if (cl == NULL)
1972 cl = &q->link;
1973
1974 return &cl->filter_list;
1975}
1976
1977static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1978 u32 classid)
1979{
1980 struct cbq_sched_data *q = qdisc_priv(sch);
1981 struct cbq_class *p = (struct cbq_class*)parent;
1982 struct cbq_class *cl = cbq_class_lookup(q, classid);
1983
1984 if (cl) {
1985 if (p && p->level <= cl->level)
1986 return 0;
1987 cl->filters++;
1988 return (unsigned long)cl;
1989 }
1990 return 0;
1991}
1992
1993static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1994{
1995 struct cbq_class *cl = (struct cbq_class*)arg;
1996
1997 cl->filters--;
1998}
1999
2000static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2001{
2002 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e
PM
2003 struct cbq_class *cl;
2004 struct hlist_node *n;
1da177e4
LT
2005 unsigned h;
2006
2007 if (arg->stop)
2008 return;
2009
d77fea2e
PM
2010 for (h = 0; h < q->clhash.hashsize; h++) {
2011 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
2012 if (arg->count < arg->skip) {
2013 arg->count++;
2014 continue;
2015 }
2016 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2017 arg->stop = 1;
2018 return;
2019 }
2020 arg->count++;
2021 }
2022 }
2023}
2024
20fea08b 2025static const struct Qdisc_class_ops cbq_class_ops = {
1da177e4
LT
2026 .graft = cbq_graft,
2027 .leaf = cbq_leaf,
a37ef2e3 2028 .qlen_notify = cbq_qlen_notify,
1da177e4
LT
2029 .get = cbq_get,
2030 .put = cbq_put,
2031 .change = cbq_change_class,
2032 .delete = cbq_delete,
2033 .walk = cbq_walk,
2034 .tcf_chain = cbq_find_tcf,
2035 .bind_tcf = cbq_bind_filter,
2036 .unbind_tcf = cbq_unbind_filter,
2037 .dump = cbq_dump_class,
2038 .dump_stats = cbq_dump_class_stats,
2039};
2040
20fea08b 2041static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
1da177e4
LT
2042 .next = NULL,
2043 .cl_ops = &cbq_class_ops,
2044 .id = "cbq",
2045 .priv_size = sizeof(struct cbq_sched_data),
2046 .enqueue = cbq_enqueue,
2047 .dequeue = cbq_dequeue,
77be155c 2048 .peek = qdisc_peek_dequeued,
1da177e4
LT
2049 .drop = cbq_drop,
2050 .init = cbq_init,
2051 .reset = cbq_reset,
2052 .destroy = cbq_destroy,
2053 .change = NULL,
2054 .dump = cbq_dump,
2055 .dump_stats = cbq_dump_stats,
2056 .owner = THIS_MODULE,
2057};
2058
2059static int __init cbq_module_init(void)
2060{
2061 return register_qdisc(&cbq_qdisc_ops);
2062}
10297b99 2063static void __exit cbq_module_exit(void)
1da177e4
LT
2064{
2065 unregister_qdisc(&cbq_qdisc_ops);
2066}
2067module_init(cbq_module_init)
2068module_exit(cbq_module_exit)
2069MODULE_LICENSE("GPL");
This page took 0.71011 seconds and 5 git commands to generate.