pkt_sched: Use qdisc->ops->peek() instead of ->dequeue() & ->requeue()
[deliverable/linux.git] / net / sched / sch_tbf.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_tbf.c Token Bucket Filter queue.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
11 * original idea by Martin Devera
12 *
13 */
14
1da177e4 15#include <linux/module.h>
1da177e4
LT
16#include <linux/types.h>
17#include <linux/kernel.h>
1da177e4 18#include <linux/string.h>
1da177e4 19#include <linux/errno.h>
1da177e4 20#include <linux/skbuff.h>
0ba48053 21#include <net/netlink.h>
1da177e4
LT
22#include <net/pkt_sched.h>
23
24
25/* Simple Token Bucket Filter.
26 =======================================
27
28 SOURCE.
29 -------
30
31 None.
32
33 Description.
34 ------------
35
36 A data flow obeys TBF with rate R and depth B, if for any
37 time interval t_i...t_f the number of transmitted bits
38 does not exceed B + R*(t_f-t_i).
39
40 Packetized version of this definition:
41 The sequence of packets of sizes s_i served at moments t_i
42 obeys TBF, if for any i<=k:
43
44 s_i+....+s_k <= B + R*(t_k - t_i)
45
46 Algorithm.
47 ----------
48
49 Let N(t_i) be B/R initially and N(t) grow continuously with time as:
50
51 N(t+delta) = min{B/R, N(t) + delta}
52
53 If the first packet in queue has length S, it may be
54 transmitted only at the time t_* when S/R <= N(t_*),
55 and in this case N(t) jumps:
56
57 N(t_* + 0) = N(t_* - 0) - S/R.
58
59
60
61 Actually, QoS requires two TBF to be applied to a data stream.
62 One of them controls steady state burst size, another
63 one with rate P (peak rate) and depth M (equal to link MTU)
64 limits bursts at a smaller time scale.
65
66 It is easy to see that P>R, and B>M. If P is infinity, this double
67 TBF is equivalent to a single one.
68
69 When TBF works in reshaping mode, latency is estimated as:
70
71 lat = max ((L-B)/R, (L-M)/P)
72
73
74 NOTES.
75 ------
76
77 If TBF throttles, it starts a watchdog timer, which will wake it up
78 when it is ready to transmit.
79 Note that the minimal timer resolution is 1/HZ.
80 If no new packets arrive during this period,
81 or if the device is not awaken by EOI for some previous packet,
82 TBF can stop its activity for 1/HZ.
83
84
85 This means, that with depth B, the maximal rate is
86
87 R_crit = B*HZ
88
89 F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
90
91 Note that the peak rate TBF is much more tough: with MTU 1500
92 P_crit = 150Kbytes/sec. So, if you need greater peak
93 rates, use alpha with HZ=1000 :-)
94
95 With classful TBF, limit is just kept for backwards compatibility.
96 It is passed to the default bfifo qdisc - if the inner qdisc is
97 changed the limit is not effective anymore.
98*/
99
100struct tbf_sched_data
101{
102/* Parameters */
103 u32 limit; /* Maximal length of backlog: bytes */
104 u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
105 u32 mtu;
106 u32 max_size;
107 struct qdisc_rate_table *R_tab;
108 struct qdisc_rate_table *P_tab;
109
110/* Variables */
111 long tokens; /* Current number of B tokens */
112 long ptokens; /* Current number of P tokens */
113 psched_time_t t_c; /* Time check-point */
1da177e4 114 struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
f7f593e3 115 struct qdisc_watchdog watchdog; /* Watchdog timer */
1da177e4
LT
116};
117
e9bef55d
JDB
118#define L2T(q,L) qdisc_l2t((q)->R_tab,L)
119#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
1da177e4
LT
120
121static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
122{
123 struct tbf_sched_data *q = qdisc_priv(sch);
124 int ret;
125
69747650
DM
126 if (qdisc_pkt_len(skb) > q->max_size)
127 return qdisc_reshape_fail(skb, sch);
1da177e4 128
5f86173b
JK
129 ret = qdisc_enqueue(skb, q->qdisc);
130 if (ret != 0) {
378a2f09
JP
131 if (net_xmit_drop_count(ret))
132 sch->qstats.drops++;
1da177e4
LT
133 return ret;
134 }
135
136 sch->q.qlen++;
0abf77e5 137 sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4
LT
138 sch->bstats.packets++;
139 return 0;
140}
141
142static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
143{
144 struct tbf_sched_data *q = qdisc_priv(sch);
145 int ret;
146
147 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
148 sch->q.qlen++;
149 sch->qstats.requeues++;
150 }
151
152 return ret;
153}
154
155static unsigned int tbf_drop(struct Qdisc* sch)
156{
157 struct tbf_sched_data *q = qdisc_priv(sch);
6d037a26 158 unsigned int len = 0;
1da177e4 159
6d037a26 160 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4
LT
161 sch->q.qlen--;
162 sch->qstats.drops++;
163 }
164 return len;
165}
166
1da177e4
LT
167static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
168{
169 struct tbf_sched_data *q = qdisc_priv(sch);
170 struct sk_buff *skb;
171
03c05f0d 172 skb = q->qdisc->ops->peek(q->qdisc);
1da177e4
LT
173
174 if (skb) {
175 psched_time_t now;
f7f593e3 176 long toks;
1da177e4 177 long ptoks = 0;
0abf77e5 178 unsigned int len = qdisc_pkt_len(skb);
1da177e4 179
3bebcda2 180 now = psched_get_time();
03cc45c0 181 toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
1da177e4
LT
182
183 if (q->P_tab) {
184 ptoks = toks + q->ptokens;
185 if (ptoks > (long)q->mtu)
186 ptoks = q->mtu;
187 ptoks -= L2T_P(q, len);
188 }
189 toks += q->tokens;
190 if (toks > (long)q->buffer)
191 toks = q->buffer;
192 toks -= L2T(q, len);
193
194 if ((toks|ptoks) >= 0) {
03c05f0d
JP
195 skb = q->qdisc->dequeue(q->qdisc);
196 if (unlikely(!skb))
197 return NULL;
198
1da177e4
LT
199 q->t_c = now;
200 q->tokens = toks;
201 q->ptokens = ptoks;
202 sch->q.qlen--;
203 sch->flags &= ~TCQ_F_THROTTLED;
204 return skb;
205 }
206
f7f593e3
PM
207 qdisc_watchdog_schedule(&q->watchdog,
208 now + max_t(long, -toks, -ptoks));
1da177e4
LT
209
210 /* Maybe we have a shorter packet in the queue,
211 which can be sent now. It sounds cool,
212 but, however, this is wrong in principle.
213 We MUST NOT reorder packets under these circumstances.
214
215 Really, if we split the flow into independent
216 subflows, it would be a very good solution.
217 This is the main idea of all FQ algorithms
218 (cf. CSZ, HPFQ, HFSC)
219 */
220
1da177e4
LT
221 sch->qstats.overlimits++;
222 }
223 return NULL;
224}
225
226static void tbf_reset(struct Qdisc* sch)
227{
228 struct tbf_sched_data *q = qdisc_priv(sch);
229
230 qdisc_reset(q->qdisc);
231 sch->q.qlen = 0;
3bebcda2 232 q->t_c = psched_get_time();
1da177e4
LT
233 q->tokens = q->buffer;
234 q->ptokens = q->mtu;
f7f593e3 235 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
236}
237
27a3421e
PM
238static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
239 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
240 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
241 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
242};
243
1e90474c 244static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
1da177e4 245{
cee63723 246 int err;
1da177e4 247 struct tbf_sched_data *q = qdisc_priv(sch);
1e90474c 248 struct nlattr *tb[TCA_TBF_PTAB + 1];
1da177e4
LT
249 struct tc_tbf_qopt *qopt;
250 struct qdisc_rate_table *rtab = NULL;
251 struct qdisc_rate_table *ptab = NULL;
252 struct Qdisc *child = NULL;
253 int max_size,n;
254
27a3421e 255 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
cee63723
PM
256 if (err < 0)
257 return err;
258
259 err = -EINVAL;
27a3421e 260 if (tb[TCA_TBF_PARMS] == NULL)
1da177e4
LT
261 goto done;
262
1e90474c
PM
263 qopt = nla_data(tb[TCA_TBF_PARMS]);
264 rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
1da177e4
LT
265 if (rtab == NULL)
266 goto done;
267
268 if (qopt->peakrate.rate) {
269 if (qopt->peakrate.rate > qopt->rate.rate)
1e90474c 270 ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
1da177e4
LT
271 if (ptab == NULL)
272 goto done;
273 }
274
275 for (n = 0; n < 256; n++)
276 if (rtab->data[n] > qopt->buffer) break;
277 max_size = (n << qopt->rate.cell_log)-1;
278 if (ptab) {
279 int size;
280
281 for (n = 0; n < 256; n++)
282 if (ptab->data[n] > qopt->mtu) break;
283 size = (n << qopt->peakrate.cell_log)-1;
284 if (size < max_size) max_size = size;
285 }
286 if (max_size < 0)
287 goto done;
288
053cfed7 289 if (qopt->limit > 0) {
fb0305ce
PM
290 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
291 if (IS_ERR(child)) {
292 err = PTR_ERR(child);
1da177e4 293 goto done;
fb0305ce 294 }
1da177e4
LT
295 }
296
297 sch_tree_lock(sch);
5e50da01
PM
298 if (child) {
299 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
053cfed7 300 qdisc_destroy(xchg(&q->qdisc, child));
5e50da01 301 }
1da177e4
LT
302 q->limit = qopt->limit;
303 q->mtu = qopt->mtu;
304 q->max_size = max_size;
305 q->buffer = qopt->buffer;
306 q->tokens = q->buffer;
307 q->ptokens = q->mtu;
308 rtab = xchg(&q->R_tab, rtab);
309 ptab = xchg(&q->P_tab, ptab);
310 sch_tree_unlock(sch);
311 err = 0;
312done:
313 if (rtab)
314 qdisc_put_rtab(rtab);
315 if (ptab)
316 qdisc_put_rtab(ptab);
317 return err;
318}
319
1e90474c 320static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
1da177e4
LT
321{
322 struct tbf_sched_data *q = qdisc_priv(sch);
323
324 if (opt == NULL)
325 return -EINVAL;
326
3bebcda2 327 q->t_c = psched_get_time();
f7f593e3 328 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
329 q->qdisc = &noop_qdisc;
330
331 return tbf_change(sch, opt);
332}
333
334static void tbf_destroy(struct Qdisc *sch)
335{
336 struct tbf_sched_data *q = qdisc_priv(sch);
337
f7f593e3 338 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
339
340 if (q->P_tab)
341 qdisc_put_rtab(q->P_tab);
342 if (q->R_tab)
343 qdisc_put_rtab(q->R_tab);
344
345 qdisc_destroy(q->qdisc);
346}
347
348static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
349{
350 struct tbf_sched_data *q = qdisc_priv(sch);
4b3550ef 351 struct nlattr *nest;
1da177e4
LT
352 struct tc_tbf_qopt opt;
353
4b3550ef
PM
354 nest = nla_nest_start(skb, TCA_OPTIONS);
355 if (nest == NULL)
356 goto nla_put_failure;
1da177e4
LT
357
358 opt.limit = q->limit;
359 opt.rate = q->R_tab->rate;
360 if (q->P_tab)
361 opt.peakrate = q->P_tab->rate;
362 else
363 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
364 opt.mtu = q->mtu;
365 opt.buffer = q->buffer;
1e90474c 366 NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
1da177e4 367
4b3550ef 368 nla_nest_end(skb, nest);
1da177e4
LT
369 return skb->len;
370
1e90474c 371nla_put_failure:
4b3550ef 372 nla_nest_cancel(skb, nest);
1da177e4
LT
373 return -1;
374}
375
376static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
377 struct sk_buff *skb, struct tcmsg *tcm)
378{
379 struct tbf_sched_data *q = qdisc_priv(sch);
380
381 if (cl != 1) /* only one class */
382 return -ENOENT;
383
384 tcm->tcm_handle |= TC_H_MIN(1);
385 tcm->tcm_info = q->qdisc->handle;
386
387 return 0;
388}
389
390static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
391 struct Qdisc **old)
392{
393 struct tbf_sched_data *q = qdisc_priv(sch);
394
395 if (new == NULL)
396 new = &noop_qdisc;
397
398 sch_tree_lock(sch);
399 *old = xchg(&q->qdisc, new);
5e50da01 400 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4 401 qdisc_reset(*old);
1da177e4
LT
402 sch_tree_unlock(sch);
403
404 return 0;
405}
406
407static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
408{
409 struct tbf_sched_data *q = qdisc_priv(sch);
410 return q->qdisc;
411}
412
413static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
414{
415 return 1;
416}
417
418static void tbf_put(struct Qdisc *sch, unsigned long arg)
419{
420}
421
10297b99 422static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1e90474c 423 struct nlattr **tca, unsigned long *arg)
1da177e4
LT
424{
425 return -ENOSYS;
426}
427
428static int tbf_delete(struct Qdisc *sch, unsigned long arg)
429{
430 return -ENOSYS;
431}
432
433static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
434{
435 if (!walker->stop) {
436 if (walker->count >= walker->skip)
437 if (walker->fn(sch, 1, walker) < 0) {
438 walker->stop = 1;
439 return;
440 }
441 walker->count++;
442 }
443}
444
445static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
446{
447 return NULL;
448}
449
20fea08b 450static const struct Qdisc_class_ops tbf_class_ops =
1da177e4
LT
451{
452 .graft = tbf_graft,
453 .leaf = tbf_leaf,
454 .get = tbf_get,
455 .put = tbf_put,
456 .change = tbf_change_class,
457 .delete = tbf_delete,
458 .walk = tbf_walk,
459 .tcf_chain = tbf_find_tcf,
460 .dump = tbf_dump_class,
461};
462
20fea08b 463static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
1da177e4
LT
464 .next = NULL,
465 .cl_ops = &tbf_class_ops,
466 .id = "tbf",
467 .priv_size = sizeof(struct tbf_sched_data),
468 .enqueue = tbf_enqueue,
469 .dequeue = tbf_dequeue,
470 .requeue = tbf_requeue,
471 .drop = tbf_drop,
472 .init = tbf_init,
473 .reset = tbf_reset,
474 .destroy = tbf_destroy,
475 .change = tbf_change,
476 .dump = tbf_dump,
477 .owner = THIS_MODULE,
478};
479
480static int __init tbf_module_init(void)
481{
482 return register_qdisc(&tbf_qdisc_ops);
483}
484
485static void __exit tbf_module_exit(void)
486{
487 unregister_qdisc(&tbf_qdisc_ops);
488}
489module_init(tbf_module_init)
490module_exit(tbf_module_exit)
491MODULE_LICENSE("GPL");
This page took 0.661052 seconds and 5 git commands to generate.