Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[deliverable/linux.git] / net / sched / sch_netem.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
798b6b19 7 * 2 of the License.
1da177e4
LT
8 *
9 * Many of the algorithms and ideas for this came from
10297b99 10 * NIST Net which is not copyrighted.
1da177e4
LT
11 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
b7f080cf 16#include <linux/mm.h>
1da177e4 17#include <linux/module.h>
5a0e3ad6 18#include <linux/slab.h>
1da177e4
LT
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/errno.h>
1da177e4 22#include <linux/skbuff.h>
78776d3f 23#include <linux/vmalloc.h>
1da177e4 24#include <linux/rtnetlink.h>
90b41a1c 25#include <linux/reciprocal_div.h>
1da177e4 26
dc5fc579 27#include <net/netlink.h>
1da177e4
LT
28#include <net/pkt_sched.h>
29
250a65f7 30#define VERSION "1.3"
eb229c4c 31
1da177e4
LT
32/* Network Emulation Queuing algorithm.
33 ====================================
34
35 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
36 Network Emulation Tool
37 [2] Luigi Rizzo, DummyNet for FreeBSD
38
39 ----------------------------------------------------------------
40
41 This started out as a simple way to delay outgoing packets to
42 test TCP but has grown to include most of the functionality
43 of a full blown network emulator like NISTnet. It can delay
44 packets and add random jitter (and correlation). The random
45 distribution can be loaded from a table as well to provide
46 normal, Pareto, or experimental curves. Packet loss,
47 duplication, and reordering can also be emulated.
48
49 This qdisc does not do classification that can be handled in
50 layering other disciplines. It does not need to do bandwidth
51 control either since that can be handled by using token
52 bucket or other rate control.
661b7972 53
54 Correlated Loss Generator models
55
56 Added generation of correlated loss according to the
57 "Gilbert-Elliot" model, a 4-state markov model.
58
59 References:
60 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
61 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
62 and intuitive loss model for packet networks and its implementation
63 in the Netem module in the Linux kernel", available in [1]
64
65 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
66 Fabio Ludovici <fabio.ludovici at yahoo.it>
1da177e4
LT
67*/
68
69struct netem_sched_data {
70 struct Qdisc *qdisc;
59cb5c67 71 struct qdisc_watchdog watchdog;
1da177e4 72
b407621c
SH
73 psched_tdiff_t latency;
74 psched_tdiff_t jitter;
75
1da177e4
LT
76 u32 loss;
77 u32 limit;
78 u32 counter;
79 u32 gap;
1da177e4 80 u32 duplicate;
0dca51d3 81 u32 reorder;
c865e5d9 82 u32 corrupt;
7bc0f28c 83 u32 rate;
90b41a1c
HPP
84 s32 packet_overhead;
85 u32 cell_size;
86 u32 cell_size_reciprocal;
87 s32 cell_overhead;
1da177e4
LT
88
89 struct crndstate {
b407621c
SH
90 u32 last;
91 u32 rho;
c865e5d9 92 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1da177e4
LT
93
94 struct disttable {
95 u32 size;
96 s16 table[0];
97 } *delay_dist;
661b7972 98
99 enum {
100 CLG_RANDOM,
101 CLG_4_STATES,
102 CLG_GILB_ELL,
103 } loss_model;
104
105 /* Correlated Loss Generation models */
106 struct clgstate {
107 /* state of the Markov chain */
108 u8 state;
109
110 /* 4-states and Gilbert-Elliot models */
111 u32 a1; /* p13 for 4-states or p for GE */
112 u32 a2; /* p31 for 4-states or r for GE */
113 u32 a3; /* p32 for 4-states or h for GE */
114 u32 a4; /* p14 for 4-states or 1-k for GE */
115 u32 a5; /* p23 used only in 4-states */
116 } clg;
117
1da177e4
LT
118};
119
120/* Time stamp put into socket buffer control block */
121struct netem_skb_cb {
122 psched_time_t time_to_send;
123};
124
5f86173b
JK
125static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
126{
175f9c1b
JK
127 BUILD_BUG_ON(sizeof(skb->cb) <
128 sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
129 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
5f86173b
JK
130}
131
1da177e4
LT
132/* init_crandom - initialize correlated random number generator
133 * Use entropy source for initial seed.
134 */
135static void init_crandom(struct crndstate *state, unsigned long rho)
136{
137 state->rho = rho;
138 state->last = net_random();
139}
140
141/* get_crandom - correlated random number generator
142 * Next number depends on last value.
143 * rho is scaled to avoid floating point.
144 */
b407621c 145static u32 get_crandom(struct crndstate *state)
1da177e4
LT
146{
147 u64 value, rho;
148 unsigned long answer;
149
bb2f8cc0 150 if (state->rho == 0) /* no correlation */
1da177e4
LT
151 return net_random();
152
153 value = net_random();
154 rho = (u64)state->rho + 1;
155 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
156 state->last = answer;
157 return answer;
158}
159
661b7972 160/* loss_4state - 4-state model loss generator
161 * Generates losses according to the 4-state Markov chain adopted in
162 * the GI (General and Intuitive) loss model.
163 */
164static bool loss_4state(struct netem_sched_data *q)
165{
166 struct clgstate *clg = &q->clg;
167 u32 rnd = net_random();
168
169 /*
25985edc 170 * Makes a comparison between rnd and the transition
661b7972 171 * probabilities outgoing from the current state, then decides the
172 * next state and if the next packet has to be transmitted or lost.
173 * The four states correspond to:
174 * 1 => successfully transmitted packets within a gap period
175 * 4 => isolated losses within a gap period
176 * 3 => lost packets within a burst period
177 * 2 => successfully transmitted packets within a burst period
178 */
179 switch (clg->state) {
180 case 1:
181 if (rnd < clg->a4) {
182 clg->state = 4;
183 return true;
184 } else if (clg->a4 < rnd && rnd < clg->a1) {
185 clg->state = 3;
186 return true;
187 } else if (clg->a1 < rnd)
188 clg->state = 1;
189
190 break;
191 case 2:
192 if (rnd < clg->a5) {
193 clg->state = 3;
194 return true;
195 } else
196 clg->state = 2;
197
198 break;
199 case 3:
200 if (rnd < clg->a3)
201 clg->state = 2;
202 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
203 clg->state = 1;
204 return true;
205 } else if (clg->a2 + clg->a3 < rnd) {
206 clg->state = 3;
207 return true;
208 }
209 break;
210 case 4:
211 clg->state = 1;
212 break;
213 }
214
215 return false;
216}
217
218/* loss_gilb_ell - Gilbert-Elliot model loss generator
219 * Generates losses according to the Gilbert-Elliot loss model or
220 * its special cases (Gilbert or Simple Gilbert)
221 *
25985edc 222 * Makes a comparison between random number and the transition
661b7972 223 * probabilities outgoing from the current state, then decides the
25985edc 224 * next state. A second random number is extracted and the comparison
661b7972 225 * with the loss probability of the current state decides if the next
226 * packet will be transmitted or lost.
227 */
228static bool loss_gilb_ell(struct netem_sched_data *q)
229{
230 struct clgstate *clg = &q->clg;
231
232 switch (clg->state) {
233 case 1:
234 if (net_random() < clg->a1)
235 clg->state = 2;
236 if (net_random() < clg->a4)
237 return true;
238 case 2:
239 if (net_random() < clg->a2)
240 clg->state = 1;
241 if (clg->a3 > net_random())
242 return true;
243 }
244
245 return false;
246}
247
248static bool loss_event(struct netem_sched_data *q)
249{
250 switch (q->loss_model) {
251 case CLG_RANDOM:
252 /* Random packet drop 0 => none, ~0 => all */
253 return q->loss && q->loss >= get_crandom(&q->loss_cor);
254
255 case CLG_4_STATES:
256 /* 4state loss model algorithm (used also for GI model)
257 * Extracts a value from the markov 4 state loss generator,
258 * if it is 1 drops a packet and if needed writes the event in
259 * the kernel logs
260 */
261 return loss_4state(q);
262
263 case CLG_GILB_ELL:
264 /* Gilbert-Elliot loss model algorithm
265 * Extracts a value from the Gilbert-Elliot loss generator,
266 * if it is 1 drops a packet and if needed writes the event in
267 * the kernel logs
268 */
269 return loss_gilb_ell(q);
270 }
271
272 return false; /* not reached */
273}
274
275
1da177e4
LT
276/* tabledist - return a pseudo-randomly distributed value with mean mu and
277 * std deviation sigma. Uses table lookup to approximate the desired
278 * distribution, and a uniformly-distributed pseudo-random source.
279 */
b407621c
SH
280static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
281 struct crndstate *state,
282 const struct disttable *dist)
1da177e4 283{
b407621c
SH
284 psched_tdiff_t x;
285 long t;
286 u32 rnd;
1da177e4
LT
287
288 if (sigma == 0)
289 return mu;
290
291 rnd = get_crandom(state);
292
293 /* default uniform distribution */
10297b99 294 if (dist == NULL)
1da177e4
LT
295 return (rnd % (2*sigma)) - sigma + mu;
296
297 t = dist->table[rnd % dist->size];
298 x = (sigma % NETEM_DIST_SCALE) * t;
299 if (x >= 0)
300 x += NETEM_DIST_SCALE/2;
301 else
302 x -= NETEM_DIST_SCALE/2;
303
304 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
305}
306
90b41a1c 307static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
7bc0f28c 308{
90b41a1c 309 u64 ticks;
fc33cc72 310
90b41a1c
HPP
311 len += q->packet_overhead;
312
313 if (q->cell_size) {
314 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
315
316 if (len > cells * q->cell_size) /* extra cell needed for remainder */
317 cells++;
318 len = cells * (q->cell_size + q->cell_overhead);
319 }
320
321 ticks = (u64)len * NSEC_PER_SEC;
322
323 do_div(ticks, q->rate);
fc33cc72 324 return PSCHED_NS2TICKS(ticks);
7bc0f28c
HPP
325}
326
0afb51e7
SH
327/*
328 * Insert one skb into qdisc.
329 * Note: parent depends on return value to account for queue length.
330 * NET_XMIT_DROP: queue length didn't change.
331 * NET_XMIT_SUCCESS: one skb was queued.
332 */
1da177e4
LT
333static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
334{
335 struct netem_sched_data *q = qdisc_priv(sch);
89e1df74
GC
336 /* We don't fill cb now as skb_unshare() may invalidate it */
337 struct netem_skb_cb *cb;
0afb51e7 338 struct sk_buff *skb2;
1da177e4 339 int ret;
0afb51e7 340 int count = 1;
1da177e4 341
0afb51e7
SH
342 /* Random duplication */
343 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
344 ++count;
345
661b7972 346 /* Drop packet? */
347 if (loss_event(q))
0afb51e7
SH
348 --count;
349
350 if (count == 0) {
1da177e4
LT
351 sch->qstats.drops++;
352 kfree_skb(skb);
c27f339a 353 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
354 }
355
4e8a5201
DM
356 skb_orphan(skb);
357
0afb51e7
SH
358 /*
359 * If we need to duplicate packet, then re-insert at top of the
360 * qdisc tree, since parent queuer expects that only one
361 * skb will be queued.
362 */
363 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
7698b4fc 364 struct Qdisc *rootq = qdisc_root(sch);
0afb51e7
SH
365 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
366 q->duplicate = 0;
367
5f86173b 368 qdisc_enqueue_root(skb2, rootq);
0afb51e7 369 q->duplicate = dupsave;
1da177e4
LT
370 }
371
c865e5d9
SH
372 /*
373 * Randomized packet corruption.
374 * Make copy if needed since we are modifying
375 * If packet is going to be hardware checksummed, then
376 * do it now in software before we mangle it.
377 */
378 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
f64f9e71
JP
379 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
380 (skb->ip_summed == CHECKSUM_PARTIAL &&
381 skb_checksum_help(skb))) {
c865e5d9
SH
382 sch->qstats.drops++;
383 return NET_XMIT_DROP;
384 }
385
386 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
387 }
388
5f86173b 389 cb = netem_skb_cb(skb);
cc7ec456
ED
390 if (q->gap == 0 || /* not doing reordering */
391 q->counter < q->gap || /* inside last reordering gap */
f64f9e71 392 q->reorder < get_crandom(&q->reorder_cor)) {
0f9f32ac 393 psched_time_t now;
07aaa115
SH
394 psched_tdiff_t delay;
395
396 delay = tabledist(q->latency, q->jitter,
397 &q->delay_cor, q->delay_dist);
398
3bebcda2 399 now = psched_get_time();
7bc0f28c
HPP
400
401 if (q->rate) {
402 struct sk_buff_head *list = &q->qdisc->q;
403
90b41a1c 404 delay += packet_len_2_sched_time(skb->len, q);
7bc0f28c
HPP
405
406 if (!skb_queue_empty(list)) {
407 /*
408 * Last packet in queue is reference point (now).
409 * First packet in queue is already in flight,
410 * calculate this time bonus and substract
411 * from delay.
412 */
413 delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
414 now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
415 }
416 }
417
7c59e25f 418 cb->time_to_send = now + delay;
1da177e4 419 ++q->counter;
5f86173b 420 ret = qdisc_enqueue(skb, q->qdisc);
1da177e4 421 } else {
10297b99 422 /*
0dca51d3
SH
423 * Do re-ordering by putting one out of N packets at the front
424 * of the queue.
425 */
3bebcda2 426 cb->time_to_send = psched_get_time();
0dca51d3 427 q->counter = 0;
8ba25dad
JP
428
429 __skb_queue_head(&q->qdisc->q, skb);
430 q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
431 q->qdisc->qstats.requeues++;
432 ret = NET_XMIT_SUCCESS;
1da177e4
LT
433 }
434
10f6dfcf 435 if (ret != NET_XMIT_SUCCESS) {
436 if (net_xmit_drop_count(ret)) {
437 sch->qstats.drops++;
438 return ret;
439 }
378a2f09 440 }
1da177e4 441
10f6dfcf 442 sch->q.qlen++;
443 return NET_XMIT_SUCCESS;
1da177e4
LT
444}
445
cc7ec456 446static unsigned int netem_drop(struct Qdisc *sch)
1da177e4
LT
447{
448 struct netem_sched_data *q = qdisc_priv(sch);
6d037a26 449 unsigned int len = 0;
1da177e4 450
6d037a26 451 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4
LT
452 sch->q.qlen--;
453 sch->qstats.drops++;
454 }
455 return len;
456}
457
1da177e4
LT
458static struct sk_buff *netem_dequeue(struct Qdisc *sch)
459{
460 struct netem_sched_data *q = qdisc_priv(sch);
461 struct sk_buff *skb;
462
fd245a4a 463 if (qdisc_is_throttled(sch))
11274e5a
SH
464 return NULL;
465
03c05f0d 466 skb = q->qdisc->ops->peek(q->qdisc);
771018e7 467 if (skb) {
5f86173b 468 const struct netem_skb_cb *cb = netem_skb_cb(skb);
3bebcda2 469 psched_time_t now = psched_get_time();
0f9f32ac
SH
470
471 /* if more time remaining? */
104e0878 472 if (cb->time_to_send <= now) {
77be155c
JP
473 skb = qdisc_dequeue_peeked(q->qdisc);
474 if (unlikely(!skb))
03c05f0d
JP
475 return NULL;
476
8caf1539
JP
477#ifdef CONFIG_NET_CLS_ACT
478 /*
479 * If it's at ingress let's pretend the delay is
480 * from the network (tstamp will be updated).
481 */
482 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
483 skb->tstamp.tv64 = 0;
484#endif
10f6dfcf 485
0f9f32ac 486 sch->q.qlen--;
10f6dfcf 487 qdisc_unthrottled(sch);
488 qdisc_bstats_update(sch, skb);
0f9f32ac 489 return skb;
07aaa115 490 }
11274e5a 491
11274e5a 492 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
0f9f32ac
SH
493 }
494
495 return NULL;
1da177e4
LT
496}
497
1da177e4
LT
498static void netem_reset(struct Qdisc *sch)
499{
500 struct netem_sched_data *q = qdisc_priv(sch);
501
502 qdisc_reset(q->qdisc);
1da177e4 503 sch->q.qlen = 0;
59cb5c67 504 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
505}
506
6373a9a2 507static void dist_free(struct disttable *d)
508{
509 if (d) {
510 if (is_vmalloc_addr(d))
511 vfree(d);
512 else
513 kfree(d);
514 }
515}
516
1da177e4
LT
517/*
518 * Distribution data is a variable size payload containing
519 * signed 16 bit values.
520 */
1e90474c 521static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
1da177e4
LT
522{
523 struct netem_sched_data *q = qdisc_priv(sch);
6373a9a2 524 size_t n = nla_len(attr)/sizeof(__s16);
1e90474c 525 const __s16 *data = nla_data(attr);
7698b4fc 526 spinlock_t *root_lock;
1da177e4
LT
527 struct disttable *d;
528 int i;
6373a9a2 529 size_t s;
1da177e4 530
df173bda 531 if (n > NETEM_DIST_MAX)
1da177e4
LT
532 return -EINVAL;
533
6373a9a2 534 s = sizeof(struct disttable) + n * sizeof(s16);
bb52c7ac 535 d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
6373a9a2 536 if (!d)
537 d = vmalloc(s);
1da177e4
LT
538 if (!d)
539 return -ENOMEM;
540
541 d->size = n;
542 for (i = 0; i < n; i++)
543 d->table[i] = data[i];
10297b99 544
102396ae 545 root_lock = qdisc_root_sleeping_lock(sch);
7698b4fc
DM
546
547 spin_lock_bh(root_lock);
bb52c7ac 548 swap(q->delay_dist, d);
7698b4fc 549 spin_unlock_bh(root_lock);
bb52c7ac
ED
550
551 dist_free(d);
1da177e4
LT
552 return 0;
553}
554
265eb67f 555static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
1da177e4
LT
556{
557 struct netem_sched_data *q = qdisc_priv(sch);
1e90474c 558 const struct tc_netem_corr *c = nla_data(attr);
1da177e4 559
1da177e4
LT
560 init_crandom(&q->delay_cor, c->delay_corr);
561 init_crandom(&q->loss_cor, c->loss_corr);
562 init_crandom(&q->dup_cor, c->dup_corr);
1da177e4
LT
563}
564
265eb67f 565static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
0dca51d3
SH
566{
567 struct netem_sched_data *q = qdisc_priv(sch);
1e90474c 568 const struct tc_netem_reorder *r = nla_data(attr);
0dca51d3 569
0dca51d3
SH
570 q->reorder = r->probability;
571 init_crandom(&q->reorder_cor, r->correlation);
0dca51d3
SH
572}
573
265eb67f 574static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
c865e5d9
SH
575{
576 struct netem_sched_data *q = qdisc_priv(sch);
1e90474c 577 const struct tc_netem_corrupt *r = nla_data(attr);
c865e5d9 578
c865e5d9
SH
579 q->corrupt = r->probability;
580 init_crandom(&q->corrupt_cor, r->correlation);
c865e5d9
SH
581}
582
7bc0f28c
HPP
583static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
584{
585 struct netem_sched_data *q = qdisc_priv(sch);
586 const struct tc_netem_rate *r = nla_data(attr);
587
588 q->rate = r->rate;
90b41a1c
HPP
589 q->packet_overhead = r->packet_overhead;
590 q->cell_size = r->cell_size;
591 if (q->cell_size)
592 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
593 q->cell_overhead = r->cell_overhead;
7bc0f28c
HPP
594}
595
661b7972 596static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
597{
598 struct netem_sched_data *q = qdisc_priv(sch);
599 const struct nlattr *la;
600 int rem;
601
602 nla_for_each_nested(la, attr, rem) {
603 u16 type = nla_type(la);
604
605 switch(type) {
606 case NETEM_LOSS_GI: {
607 const struct tc_netem_gimodel *gi = nla_data(la);
608
2494654d 609 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
661b7972 610 pr_info("netem: incorrect gi model size\n");
611 return -EINVAL;
612 }
613
614 q->loss_model = CLG_4_STATES;
615
616 q->clg.state = 1;
617 q->clg.a1 = gi->p13;
618 q->clg.a2 = gi->p31;
619 q->clg.a3 = gi->p32;
620 q->clg.a4 = gi->p14;
621 q->clg.a5 = gi->p23;
622 break;
623 }
624
625 case NETEM_LOSS_GE: {
626 const struct tc_netem_gemodel *ge = nla_data(la);
627
2494654d 628 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
629 pr_info("netem: incorrect ge model size\n");
661b7972 630 return -EINVAL;
631 }
632
633 q->loss_model = CLG_GILB_ELL;
634 q->clg.state = 1;
635 q->clg.a1 = ge->p;
636 q->clg.a2 = ge->r;
637 q->clg.a3 = ge->h;
638 q->clg.a4 = ge->k1;
639 break;
640 }
641
642 default:
643 pr_info("netem: unknown loss type %u\n", type);
644 return -EINVAL;
645 }
646 }
647
648 return 0;
649}
650
27a3421e
PM
651static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
652 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
653 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
654 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
7bc0f28c 655 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
661b7972 656 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
27a3421e
PM
657};
658
2c10b32b
TG
659static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
660 const struct nla_policy *policy, int len)
661{
662 int nested_len = nla_len(nla) - NLA_ALIGN(len);
663
661b7972 664 if (nested_len < 0) {
665 pr_info("netem: invalid attributes len %d\n", nested_len);
2c10b32b 666 return -EINVAL;
661b7972 667 }
668
2c10b32b
TG
669 if (nested_len >= nla_attr_size(0))
670 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
671 nested_len, policy);
661b7972 672
2c10b32b
TG
673 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
674 return 0;
675}
676
c865e5d9 677/* Parse netlink message to set options */
1e90474c 678static int netem_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
679{
680 struct netem_sched_data *q = qdisc_priv(sch);
b03f4672 681 struct nlattr *tb[TCA_NETEM_MAX + 1];
1da177e4
LT
682 struct tc_netem_qopt *qopt;
683 int ret;
10297b99 684
b03f4672 685 if (opt == NULL)
1da177e4
LT
686 return -EINVAL;
687
2c10b32b
TG
688 qopt = nla_data(opt);
689 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
b03f4672
PM
690 if (ret < 0)
691 return ret;
692
fb0305ce 693 ret = fifo_set_limit(q->qdisc, qopt->limit);
1da177e4 694 if (ret) {
250a65f7 695 pr_info("netem: can't set fifo limit\n");
1da177e4
LT
696 return ret;
697 }
10297b99 698
1da177e4
LT
699 q->latency = qopt->latency;
700 q->jitter = qopt->jitter;
701 q->limit = qopt->limit;
702 q->gap = qopt->gap;
0dca51d3 703 q->counter = 0;
1da177e4
LT
704 q->loss = qopt->loss;
705 q->duplicate = qopt->duplicate;
706
bb2f8cc0
SH
707 /* for compatibility with earlier versions.
708 * if gap is set, need to assume 100% probability
0dca51d3 709 */
a362e0a7
SH
710 if (q->gap)
711 q->reorder = ~0;
0dca51d3 712
265eb67f
SH
713 if (tb[TCA_NETEM_CORR])
714 get_correlation(sch, tb[TCA_NETEM_CORR]);
1da177e4 715
b03f4672
PM
716 if (tb[TCA_NETEM_DELAY_DIST]) {
717 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
718 if (ret)
719 return ret;
720 }
c865e5d9 721
265eb67f
SH
722 if (tb[TCA_NETEM_REORDER])
723 get_reorder(sch, tb[TCA_NETEM_REORDER]);
1da177e4 724
265eb67f
SH
725 if (tb[TCA_NETEM_CORRUPT])
726 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
1da177e4 727
7bc0f28c
HPP
728 if (tb[TCA_NETEM_RATE])
729 get_rate(sch, tb[TCA_NETEM_RATE]);
730
661b7972 731 q->loss_model = CLG_RANDOM;
732 if (tb[TCA_NETEM_LOSS])
733 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
734
735 return ret;
1da177e4
LT
736}
737
300ce174
SH
738/*
739 * Special case version of FIFO queue for use by netem.
740 * It queues in order based on timestamps in skb's
741 */
742struct fifo_sched_data {
743 u32 limit;
075aa573 744 psched_time_t oldest;
300ce174
SH
745};
746
747static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
748{
749 struct fifo_sched_data *q = qdisc_priv(sch);
750 struct sk_buff_head *list = &sch->q;
5f86173b 751 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
300ce174
SH
752 struct sk_buff *skb;
753
754 if (likely(skb_queue_len(list) < q->limit)) {
075aa573 755 /* Optimize for add at tail */
104e0878 756 if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
075aa573
SH
757 q->oldest = tnext;
758 return qdisc_enqueue_tail(nskb, sch);
759 }
760
300ce174 761 skb_queue_reverse_walk(list, skb) {
5f86173b 762 const struct netem_skb_cb *cb = netem_skb_cb(skb);
300ce174 763
104e0878 764 if (tnext >= cb->time_to_send)
300ce174
SH
765 break;
766 }
767
768 __skb_queue_after(list, skb, nskb);
769
0abf77e5 770 sch->qstats.backlog += qdisc_pkt_len(nskb);
300ce174
SH
771
772 return NET_XMIT_SUCCESS;
773 }
774
075aa573 775 return qdisc_reshape_fail(nskb, sch);
300ce174
SH
776}
777
1e90474c 778static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
300ce174
SH
779{
780 struct fifo_sched_data *q = qdisc_priv(sch);
781
782 if (opt) {
1e90474c
PM
783 struct tc_fifo_qopt *ctl = nla_data(opt);
784 if (nla_len(opt) < sizeof(*ctl))
300ce174
SH
785 return -EINVAL;
786
787 q->limit = ctl->limit;
788 } else
5ce2d488 789 q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
300ce174 790
a084980d 791 q->oldest = PSCHED_PASTPERFECT;
300ce174
SH
792 return 0;
793}
794
795static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
796{
797 struct fifo_sched_data *q = qdisc_priv(sch);
798 struct tc_fifo_qopt opt = { .limit = q->limit };
799
1e90474c 800 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
300ce174
SH
801 return skb->len;
802
1e90474c 803nla_put_failure:
300ce174
SH
804 return -1;
805}
806
20fea08b 807static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
300ce174
SH
808 .id = "tfifo",
809 .priv_size = sizeof(struct fifo_sched_data),
810 .enqueue = tfifo_enqueue,
811 .dequeue = qdisc_dequeue_head,
8e3af978 812 .peek = qdisc_peek_head,
300ce174
SH
813 .drop = qdisc_queue_drop,
814 .init = tfifo_init,
815 .reset = qdisc_reset_queue,
816 .change = tfifo_init,
817 .dump = tfifo_dump,
818};
819
1e90474c 820static int netem_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
821{
822 struct netem_sched_data *q = qdisc_priv(sch);
823 int ret;
824
825 if (!opt)
826 return -EINVAL;
827
59cb5c67 828 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4 829
661b7972 830 q->loss_model = CLG_RANDOM;
3511c913 831 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
9f9afec4 832 TC_H_MAKE(sch->handle, 1));
1da177e4 833 if (!q->qdisc) {
250a65f7 834 pr_notice("netem: qdisc create tfifo qdisc failed\n");
1da177e4
LT
835 return -ENOMEM;
836 }
837
838 ret = netem_change(sch, opt);
839 if (ret) {
250a65f7 840 pr_info("netem: change failed\n");
1da177e4
LT
841 qdisc_destroy(q->qdisc);
842 }
843 return ret;
844}
845
846static void netem_destroy(struct Qdisc *sch)
847{
848 struct netem_sched_data *q = qdisc_priv(sch);
849
59cb5c67 850 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 851 qdisc_destroy(q->qdisc);
6373a9a2 852 dist_free(q->delay_dist);
1da177e4
LT
853}
854
661b7972 855static int dump_loss_model(const struct netem_sched_data *q,
856 struct sk_buff *skb)
857{
858 struct nlattr *nest;
859
860 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
861 if (nest == NULL)
862 goto nla_put_failure;
863
864 switch (q->loss_model) {
865 case CLG_RANDOM:
866 /* legacy loss model */
867 nla_nest_cancel(skb, nest);
868 return 0; /* no data */
869
870 case CLG_4_STATES: {
871 struct tc_netem_gimodel gi = {
872 .p13 = q->clg.a1,
873 .p31 = q->clg.a2,
874 .p32 = q->clg.a3,
875 .p14 = q->clg.a4,
876 .p23 = q->clg.a5,
877 };
878
879 NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
880 break;
881 }
882 case CLG_GILB_ELL: {
883 struct tc_netem_gemodel ge = {
884 .p = q->clg.a1,
885 .r = q->clg.a2,
886 .h = q->clg.a3,
887 .k1 = q->clg.a4,
888 };
889
890 NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
891 break;
892 }
893 }
894
895 nla_nest_end(skb, nest);
896 return 0;
897
898nla_put_failure:
899 nla_nest_cancel(skb, nest);
900 return -1;
901}
902
1da177e4
LT
903static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
904{
905 const struct netem_sched_data *q = qdisc_priv(sch);
861d7f74 906 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1da177e4
LT
907 struct tc_netem_qopt qopt;
908 struct tc_netem_corr cor;
0dca51d3 909 struct tc_netem_reorder reorder;
c865e5d9 910 struct tc_netem_corrupt corrupt;
7bc0f28c 911 struct tc_netem_rate rate;
1da177e4
LT
912
913 qopt.latency = q->latency;
914 qopt.jitter = q->jitter;
915 qopt.limit = q->limit;
916 qopt.loss = q->loss;
917 qopt.gap = q->gap;
918 qopt.duplicate = q->duplicate;
1e90474c 919 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
1da177e4
LT
920
921 cor.delay_corr = q->delay_cor.rho;
922 cor.loss_corr = q->loss_cor.rho;
923 cor.dup_corr = q->dup_cor.rho;
1e90474c 924 NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
0dca51d3
SH
925
926 reorder.probability = q->reorder;
927 reorder.correlation = q->reorder_cor.rho;
1e90474c 928 NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
0dca51d3 929
c865e5d9
SH
930 corrupt.probability = q->corrupt;
931 corrupt.correlation = q->corrupt_cor.rho;
1e90474c 932 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
c865e5d9 933
7bc0f28c 934 rate.rate = q->rate;
90b41a1c
HPP
935 rate.packet_overhead = q->packet_overhead;
936 rate.cell_size = q->cell_size;
937 rate.cell_overhead = q->cell_overhead;
7bc0f28c
HPP
938 NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
939
661b7972 940 if (dump_loss_model(q, skb) != 0)
941 goto nla_put_failure;
942
861d7f74 943 return nla_nest_end(skb, nla);
1da177e4 944
1e90474c 945nla_put_failure:
861d7f74 946 nlmsg_trim(skb, nla);
1da177e4
LT
947 return -1;
948}
949
10f6dfcf 950static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
951 struct sk_buff *skb, struct tcmsg *tcm)
952{
953 struct netem_sched_data *q = qdisc_priv(sch);
954
955 if (cl != 1) /* only one class */
956 return -ENOENT;
957
958 tcm->tcm_handle |= TC_H_MIN(1);
959 tcm->tcm_info = q->qdisc->handle;
960
961 return 0;
962}
963
964static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
965 struct Qdisc **old)
966{
967 struct netem_sched_data *q = qdisc_priv(sch);
968
969 if (new == NULL)
970 new = &noop_qdisc;
971
972 sch_tree_lock(sch);
973 *old = q->qdisc;
974 q->qdisc = new;
975 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
976 qdisc_reset(*old);
977 sch_tree_unlock(sch);
978
979 return 0;
980}
981
982static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
983{
984 struct netem_sched_data *q = qdisc_priv(sch);
985 return q->qdisc;
986}
987
988static unsigned long netem_get(struct Qdisc *sch, u32 classid)
989{
990 return 1;
991}
992
993static void netem_put(struct Qdisc *sch, unsigned long arg)
994{
995}
996
997static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
998{
999 if (!walker->stop) {
1000 if (walker->count >= walker->skip)
1001 if (walker->fn(sch, 1, walker) < 0) {
1002 walker->stop = 1;
1003 return;
1004 }
1005 walker->count++;
1006 }
1007}
1008
1009static const struct Qdisc_class_ops netem_class_ops = {
1010 .graft = netem_graft,
1011 .leaf = netem_leaf,
1012 .get = netem_get,
1013 .put = netem_put,
1014 .walk = netem_walk,
1015 .dump = netem_dump_class,
1016};
1017
20fea08b 1018static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1da177e4 1019 .id = "netem",
10f6dfcf 1020 .cl_ops = &netem_class_ops,
1da177e4
LT
1021 .priv_size = sizeof(struct netem_sched_data),
1022 .enqueue = netem_enqueue,
1023 .dequeue = netem_dequeue,
77be155c 1024 .peek = qdisc_peek_dequeued,
1da177e4
LT
1025 .drop = netem_drop,
1026 .init = netem_init,
1027 .reset = netem_reset,
1028 .destroy = netem_destroy,
1029 .change = netem_change,
1030 .dump = netem_dump,
1031 .owner = THIS_MODULE,
1032};
1033
1034
1035static int __init netem_module_init(void)
1036{
eb229c4c 1037 pr_info("netem: version " VERSION "\n");
1da177e4
LT
1038 return register_qdisc(&netem_qdisc_ops);
1039}
1040static void __exit netem_module_exit(void)
1041{
1042 unregister_qdisc(&netem_qdisc_ops);
1043}
1044module_init(netem_module_init)
1045module_exit(netem_module_exit)
1046MODULE_LICENSE("GPL");
This page took 0.644245 seconds and 5 git commands to generate.