Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[deliverable/linux.git] / net / sched / sch_netem.c
1 /*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License.
8 *
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
11 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/skbuff.h>
23 #include <linux/vmalloc.h>
24 #include <linux/rtnetlink.h>
25 #include <linux/reciprocal_div.h>
26
27 #include <net/netlink.h>
28 #include <net/pkt_sched.h>
29
30 #define VERSION "1.3"
31
32 /* Network Emulation Queuing algorithm.
33 ====================================
34
35 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
36 Network Emulation Tool
37 [2] Luigi Rizzo, DummyNet for FreeBSD
38
39 ----------------------------------------------------------------
40
41 This started out as a simple way to delay outgoing packets to
42 test TCP but has grown to include most of the functionality
43 of a full blown network emulator like NISTnet. It can delay
44 packets and add random jitter (and correlation). The random
45 distribution can be loaded from a table as well to provide
46 normal, Pareto, or experimental curves. Packet loss,
47 duplication, and reordering can also be emulated.
48
49 This qdisc does not do classification that can be handled in
50 layering other disciplines. It does not need to do bandwidth
51 control either since that can be handled by using token
52 bucket or other rate control.
53
54 Correlated Loss Generator models
55
56 Added generation of correlated loss according to the
57 "Gilbert-Elliot" model, a 4-state markov model.
58
59 References:
60 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
61 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
62 and intuitive loss model for packet networks and its implementation
63 in the Netem module in the Linux kernel", available in [1]
64
65 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
66 Fabio Ludovici <fabio.ludovici at yahoo.it>
67 */
68
69 struct netem_sched_data {
70 struct Qdisc *qdisc;
71 struct qdisc_watchdog watchdog;
72
73 psched_tdiff_t latency;
74 psched_tdiff_t jitter;
75
76 u32 loss;
77 u32 limit;
78 u32 counter;
79 u32 gap;
80 u32 duplicate;
81 u32 reorder;
82 u32 corrupt;
83 u32 rate;
84 s32 packet_overhead;
85 u32 cell_size;
86 u32 cell_size_reciprocal;
87 s32 cell_overhead;
88
89 struct crndstate {
90 u32 last;
91 u32 rho;
92 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
93
94 struct disttable {
95 u32 size;
96 s16 table[0];
97 } *delay_dist;
98
99 enum {
100 CLG_RANDOM,
101 CLG_4_STATES,
102 CLG_GILB_ELL,
103 } loss_model;
104
105 /* Correlated Loss Generation models */
106 struct clgstate {
107 /* state of the Markov chain */
108 u8 state;
109
110 /* 4-states and Gilbert-Elliot models */
111 u32 a1; /* p13 for 4-states or p for GE */
112 u32 a2; /* p31 for 4-states or r for GE */
113 u32 a3; /* p32 for 4-states or h for GE */
114 u32 a4; /* p14 for 4-states or 1-k for GE */
115 u32 a5; /* p23 used only in 4-states */
116 } clg;
117
118 };
119
120 /* Time stamp put into socket buffer control block */
121 struct netem_skb_cb {
122 psched_time_t time_to_send;
123 };
124
125 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
126 {
127 BUILD_BUG_ON(sizeof(skb->cb) <
128 sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
129 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
130 }
131
132 /* init_crandom - initialize correlated random number generator
133 * Use entropy source for initial seed.
134 */
135 static void init_crandom(struct crndstate *state, unsigned long rho)
136 {
137 state->rho = rho;
138 state->last = net_random();
139 }
140
141 /* get_crandom - correlated random number generator
142 * Next number depends on last value.
143 * rho is scaled to avoid floating point.
144 */
145 static u32 get_crandom(struct crndstate *state)
146 {
147 u64 value, rho;
148 unsigned long answer;
149
150 if (state->rho == 0) /* no correlation */
151 return net_random();
152
153 value = net_random();
154 rho = (u64)state->rho + 1;
155 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
156 state->last = answer;
157 return answer;
158 }
159
160 /* loss_4state - 4-state model loss generator
161 * Generates losses according to the 4-state Markov chain adopted in
162 * the GI (General and Intuitive) loss model.
163 */
164 static bool loss_4state(struct netem_sched_data *q)
165 {
166 struct clgstate *clg = &q->clg;
167 u32 rnd = net_random();
168
169 /*
170 * Makes a comparison between rnd and the transition
171 * probabilities outgoing from the current state, then decides the
172 * next state and if the next packet has to be transmitted or lost.
173 * The four states correspond to:
174 * 1 => successfully transmitted packets within a gap period
175 * 4 => isolated losses within a gap period
176 * 3 => lost packets within a burst period
177 * 2 => successfully transmitted packets within a burst period
178 */
179 switch (clg->state) {
180 case 1:
181 if (rnd < clg->a4) {
182 clg->state = 4;
183 return true;
184 } else if (clg->a4 < rnd && rnd < clg->a1) {
185 clg->state = 3;
186 return true;
187 } else if (clg->a1 < rnd)
188 clg->state = 1;
189
190 break;
191 case 2:
192 if (rnd < clg->a5) {
193 clg->state = 3;
194 return true;
195 } else
196 clg->state = 2;
197
198 break;
199 case 3:
200 if (rnd < clg->a3)
201 clg->state = 2;
202 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
203 clg->state = 1;
204 return true;
205 } else if (clg->a2 + clg->a3 < rnd) {
206 clg->state = 3;
207 return true;
208 }
209 break;
210 case 4:
211 clg->state = 1;
212 break;
213 }
214
215 return false;
216 }
217
218 /* loss_gilb_ell - Gilbert-Elliot model loss generator
219 * Generates losses according to the Gilbert-Elliot loss model or
220 * its special cases (Gilbert or Simple Gilbert)
221 *
222 * Makes a comparison between random number and the transition
223 * probabilities outgoing from the current state, then decides the
224 * next state. A second random number is extracted and the comparison
225 * with the loss probability of the current state decides if the next
226 * packet will be transmitted or lost.
227 */
228 static bool loss_gilb_ell(struct netem_sched_data *q)
229 {
230 struct clgstate *clg = &q->clg;
231
232 switch (clg->state) {
233 case 1:
234 if (net_random() < clg->a1)
235 clg->state = 2;
236 if (net_random() < clg->a4)
237 return true;
238 case 2:
239 if (net_random() < clg->a2)
240 clg->state = 1;
241 if (clg->a3 > net_random())
242 return true;
243 }
244
245 return false;
246 }
247
248 static bool loss_event(struct netem_sched_data *q)
249 {
250 switch (q->loss_model) {
251 case CLG_RANDOM:
252 /* Random packet drop 0 => none, ~0 => all */
253 return q->loss && q->loss >= get_crandom(&q->loss_cor);
254
255 case CLG_4_STATES:
256 /* 4state loss model algorithm (used also for GI model)
257 * Extracts a value from the markov 4 state loss generator,
258 * if it is 1 drops a packet and if needed writes the event in
259 * the kernel logs
260 */
261 return loss_4state(q);
262
263 case CLG_GILB_ELL:
264 /* Gilbert-Elliot loss model algorithm
265 * Extracts a value from the Gilbert-Elliot loss generator,
266 * if it is 1 drops a packet and if needed writes the event in
267 * the kernel logs
268 */
269 return loss_gilb_ell(q);
270 }
271
272 return false; /* not reached */
273 }
274
275
276 /* tabledist - return a pseudo-randomly distributed value with mean mu and
277 * std deviation sigma. Uses table lookup to approximate the desired
278 * distribution, and a uniformly-distributed pseudo-random source.
279 */
280 static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
281 struct crndstate *state,
282 const struct disttable *dist)
283 {
284 psched_tdiff_t x;
285 long t;
286 u32 rnd;
287
288 if (sigma == 0)
289 return mu;
290
291 rnd = get_crandom(state);
292
293 /* default uniform distribution */
294 if (dist == NULL)
295 return (rnd % (2*sigma)) - sigma + mu;
296
297 t = dist->table[rnd % dist->size];
298 x = (sigma % NETEM_DIST_SCALE) * t;
299 if (x >= 0)
300 x += NETEM_DIST_SCALE/2;
301 else
302 x -= NETEM_DIST_SCALE/2;
303
304 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
305 }
306
307 static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
308 {
309 u64 ticks;
310
311 len += q->packet_overhead;
312
313 if (q->cell_size) {
314 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
315
316 if (len > cells * q->cell_size) /* extra cell needed for remainder */
317 cells++;
318 len = cells * (q->cell_size + q->cell_overhead);
319 }
320
321 ticks = (u64)len * NSEC_PER_SEC;
322
323 do_div(ticks, q->rate);
324 return PSCHED_NS2TICKS(ticks);
325 }
326
327 /*
328 * Insert one skb into qdisc.
329 * Note: parent depends on return value to account for queue length.
330 * NET_XMIT_DROP: queue length didn't change.
331 * NET_XMIT_SUCCESS: one skb was queued.
332 */
333 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
334 {
335 struct netem_sched_data *q = qdisc_priv(sch);
336 /* We don't fill cb now as skb_unshare() may invalidate it */
337 struct netem_skb_cb *cb;
338 struct sk_buff *skb2;
339 int ret;
340 int count = 1;
341
342 /* Random duplication */
343 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
344 ++count;
345
346 /* Drop packet? */
347 if (loss_event(q))
348 --count;
349
350 if (count == 0) {
351 sch->qstats.drops++;
352 kfree_skb(skb);
353 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
354 }
355
356 skb_orphan(skb);
357
358 /*
359 * If we need to duplicate packet, then re-insert at top of the
360 * qdisc tree, since parent queuer expects that only one
361 * skb will be queued.
362 */
363 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
364 struct Qdisc *rootq = qdisc_root(sch);
365 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
366 q->duplicate = 0;
367
368 qdisc_enqueue_root(skb2, rootq);
369 q->duplicate = dupsave;
370 }
371
372 /*
373 * Randomized packet corruption.
374 * Make copy if needed since we are modifying
375 * If packet is going to be hardware checksummed, then
376 * do it now in software before we mangle it.
377 */
378 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
379 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
380 (skb->ip_summed == CHECKSUM_PARTIAL &&
381 skb_checksum_help(skb))) {
382 sch->qstats.drops++;
383 return NET_XMIT_DROP;
384 }
385
386 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
387 }
388
389 cb = netem_skb_cb(skb);
390 if (q->gap == 0 || /* not doing reordering */
391 q->counter < q->gap || /* inside last reordering gap */
392 q->reorder < get_crandom(&q->reorder_cor)) {
393 psched_time_t now;
394 psched_tdiff_t delay;
395
396 delay = tabledist(q->latency, q->jitter,
397 &q->delay_cor, q->delay_dist);
398
399 now = psched_get_time();
400
401 if (q->rate) {
402 struct sk_buff_head *list = &q->qdisc->q;
403
404 delay += packet_len_2_sched_time(skb->len, q);
405
406 if (!skb_queue_empty(list)) {
407 /*
408 * Last packet in queue is reference point (now).
409 * First packet in queue is already in flight,
410 * calculate this time bonus and substract
411 * from delay.
412 */
413 delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
414 now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
415 }
416 }
417
418 cb->time_to_send = now + delay;
419 ++q->counter;
420 ret = qdisc_enqueue(skb, q->qdisc);
421 } else {
422 /*
423 * Do re-ordering by putting one out of N packets at the front
424 * of the queue.
425 */
426 cb->time_to_send = psched_get_time();
427 q->counter = 0;
428
429 __skb_queue_head(&q->qdisc->q, skb);
430 q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
431 q->qdisc->qstats.requeues++;
432 ret = NET_XMIT_SUCCESS;
433 }
434
435 if (ret != NET_XMIT_SUCCESS) {
436 if (net_xmit_drop_count(ret)) {
437 sch->qstats.drops++;
438 return ret;
439 }
440 }
441
442 sch->q.qlen++;
443 return NET_XMIT_SUCCESS;
444 }
445
446 static unsigned int netem_drop(struct Qdisc *sch)
447 {
448 struct netem_sched_data *q = qdisc_priv(sch);
449 unsigned int len = 0;
450
451 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
452 sch->q.qlen--;
453 sch->qstats.drops++;
454 }
455 return len;
456 }
457
458 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
459 {
460 struct netem_sched_data *q = qdisc_priv(sch);
461 struct sk_buff *skb;
462
463 if (qdisc_is_throttled(sch))
464 return NULL;
465
466 skb = q->qdisc->ops->peek(q->qdisc);
467 if (skb) {
468 const struct netem_skb_cb *cb = netem_skb_cb(skb);
469 psched_time_t now = psched_get_time();
470
471 /* if more time remaining? */
472 if (cb->time_to_send <= now) {
473 skb = qdisc_dequeue_peeked(q->qdisc);
474 if (unlikely(!skb))
475 return NULL;
476
477 #ifdef CONFIG_NET_CLS_ACT
478 /*
479 * If it's at ingress let's pretend the delay is
480 * from the network (tstamp will be updated).
481 */
482 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
483 skb->tstamp.tv64 = 0;
484 #endif
485
486 sch->q.qlen--;
487 qdisc_unthrottled(sch);
488 qdisc_bstats_update(sch, skb);
489 return skb;
490 }
491
492 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
493 }
494
495 return NULL;
496 }
497
498 static void netem_reset(struct Qdisc *sch)
499 {
500 struct netem_sched_data *q = qdisc_priv(sch);
501
502 qdisc_reset(q->qdisc);
503 sch->q.qlen = 0;
504 qdisc_watchdog_cancel(&q->watchdog);
505 }
506
507 static void dist_free(struct disttable *d)
508 {
509 if (d) {
510 if (is_vmalloc_addr(d))
511 vfree(d);
512 else
513 kfree(d);
514 }
515 }
516
517 /*
518 * Distribution data is a variable size payload containing
519 * signed 16 bit values.
520 */
521 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
522 {
523 struct netem_sched_data *q = qdisc_priv(sch);
524 size_t n = nla_len(attr)/sizeof(__s16);
525 const __s16 *data = nla_data(attr);
526 spinlock_t *root_lock;
527 struct disttable *d;
528 int i;
529 size_t s;
530
531 if (n > NETEM_DIST_MAX)
532 return -EINVAL;
533
534 s = sizeof(struct disttable) + n * sizeof(s16);
535 d = kmalloc(s, GFP_KERNEL);
536 if (!d)
537 d = vmalloc(s);
538 if (!d)
539 return -ENOMEM;
540
541 d->size = n;
542 for (i = 0; i < n; i++)
543 d->table[i] = data[i];
544
545 root_lock = qdisc_root_sleeping_lock(sch);
546
547 spin_lock_bh(root_lock);
548 dist_free(q->delay_dist);
549 q->delay_dist = d;
550 spin_unlock_bh(root_lock);
551 return 0;
552 }
553
554 static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
555 {
556 struct netem_sched_data *q = qdisc_priv(sch);
557 const struct tc_netem_corr *c = nla_data(attr);
558
559 init_crandom(&q->delay_cor, c->delay_corr);
560 init_crandom(&q->loss_cor, c->loss_corr);
561 init_crandom(&q->dup_cor, c->dup_corr);
562 }
563
564 static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
565 {
566 struct netem_sched_data *q = qdisc_priv(sch);
567 const struct tc_netem_reorder *r = nla_data(attr);
568
569 q->reorder = r->probability;
570 init_crandom(&q->reorder_cor, r->correlation);
571 }
572
573 static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
574 {
575 struct netem_sched_data *q = qdisc_priv(sch);
576 const struct tc_netem_corrupt *r = nla_data(attr);
577
578 q->corrupt = r->probability;
579 init_crandom(&q->corrupt_cor, r->correlation);
580 }
581
582 static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
583 {
584 struct netem_sched_data *q = qdisc_priv(sch);
585 const struct tc_netem_rate *r = nla_data(attr);
586
587 q->rate = r->rate;
588 q->packet_overhead = r->packet_overhead;
589 q->cell_size = r->cell_size;
590 if (q->cell_size)
591 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
592 q->cell_overhead = r->cell_overhead;
593 }
594
595 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
596 {
597 struct netem_sched_data *q = qdisc_priv(sch);
598 const struct nlattr *la;
599 int rem;
600
601 nla_for_each_nested(la, attr, rem) {
602 u16 type = nla_type(la);
603
604 switch(type) {
605 case NETEM_LOSS_GI: {
606 const struct tc_netem_gimodel *gi = nla_data(la);
607
608 if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
609 pr_info("netem: incorrect gi model size\n");
610 return -EINVAL;
611 }
612
613 q->loss_model = CLG_4_STATES;
614
615 q->clg.state = 1;
616 q->clg.a1 = gi->p13;
617 q->clg.a2 = gi->p31;
618 q->clg.a3 = gi->p32;
619 q->clg.a4 = gi->p14;
620 q->clg.a5 = gi->p23;
621 break;
622 }
623
624 case NETEM_LOSS_GE: {
625 const struct tc_netem_gemodel *ge = nla_data(la);
626
627 if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
628 pr_info("netem: incorrect gi model size\n");
629 return -EINVAL;
630 }
631
632 q->loss_model = CLG_GILB_ELL;
633 q->clg.state = 1;
634 q->clg.a1 = ge->p;
635 q->clg.a2 = ge->r;
636 q->clg.a3 = ge->h;
637 q->clg.a4 = ge->k1;
638 break;
639 }
640
641 default:
642 pr_info("netem: unknown loss type %u\n", type);
643 return -EINVAL;
644 }
645 }
646
647 return 0;
648 }
649
650 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
651 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
652 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
653 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
654 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
655 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
656 };
657
658 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
659 const struct nla_policy *policy, int len)
660 {
661 int nested_len = nla_len(nla) - NLA_ALIGN(len);
662
663 if (nested_len < 0) {
664 pr_info("netem: invalid attributes len %d\n", nested_len);
665 return -EINVAL;
666 }
667
668 if (nested_len >= nla_attr_size(0))
669 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
670 nested_len, policy);
671
672 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
673 return 0;
674 }
675
676 /* Parse netlink message to set options */
677 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
678 {
679 struct netem_sched_data *q = qdisc_priv(sch);
680 struct nlattr *tb[TCA_NETEM_MAX + 1];
681 struct tc_netem_qopt *qopt;
682 int ret;
683
684 if (opt == NULL)
685 return -EINVAL;
686
687 qopt = nla_data(opt);
688 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
689 if (ret < 0)
690 return ret;
691
692 ret = fifo_set_limit(q->qdisc, qopt->limit);
693 if (ret) {
694 pr_info("netem: can't set fifo limit\n");
695 return ret;
696 }
697
698 q->latency = qopt->latency;
699 q->jitter = qopt->jitter;
700 q->limit = qopt->limit;
701 q->gap = qopt->gap;
702 q->counter = 0;
703 q->loss = qopt->loss;
704 q->duplicate = qopt->duplicate;
705
706 /* for compatibility with earlier versions.
707 * if gap is set, need to assume 100% probability
708 */
709 if (q->gap)
710 q->reorder = ~0;
711
712 if (tb[TCA_NETEM_CORR])
713 get_correlation(sch, tb[TCA_NETEM_CORR]);
714
715 if (tb[TCA_NETEM_DELAY_DIST]) {
716 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
717 if (ret)
718 return ret;
719 }
720
721 if (tb[TCA_NETEM_REORDER])
722 get_reorder(sch, tb[TCA_NETEM_REORDER]);
723
724 if (tb[TCA_NETEM_CORRUPT])
725 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
726
727 if (tb[TCA_NETEM_RATE])
728 get_rate(sch, tb[TCA_NETEM_RATE]);
729
730 q->loss_model = CLG_RANDOM;
731 if (tb[TCA_NETEM_LOSS])
732 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
733
734 return ret;
735 }
736
737 /*
738 * Special case version of FIFO queue for use by netem.
739 * It queues in order based on timestamps in skb's
740 */
741 struct fifo_sched_data {
742 u32 limit;
743 psched_time_t oldest;
744 };
745
746 static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
747 {
748 struct fifo_sched_data *q = qdisc_priv(sch);
749 struct sk_buff_head *list = &sch->q;
750 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
751 struct sk_buff *skb;
752
753 if (likely(skb_queue_len(list) < q->limit)) {
754 /* Optimize for add at tail */
755 if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
756 q->oldest = tnext;
757 return qdisc_enqueue_tail(nskb, sch);
758 }
759
760 skb_queue_reverse_walk(list, skb) {
761 const struct netem_skb_cb *cb = netem_skb_cb(skb);
762
763 if (tnext >= cb->time_to_send)
764 break;
765 }
766
767 __skb_queue_after(list, skb, nskb);
768
769 sch->qstats.backlog += qdisc_pkt_len(nskb);
770
771 return NET_XMIT_SUCCESS;
772 }
773
774 return qdisc_reshape_fail(nskb, sch);
775 }
776
777 static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
778 {
779 struct fifo_sched_data *q = qdisc_priv(sch);
780
781 if (opt) {
782 struct tc_fifo_qopt *ctl = nla_data(opt);
783 if (nla_len(opt) < sizeof(*ctl))
784 return -EINVAL;
785
786 q->limit = ctl->limit;
787 } else
788 q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
789
790 q->oldest = PSCHED_PASTPERFECT;
791 return 0;
792 }
793
794 static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
795 {
796 struct fifo_sched_data *q = qdisc_priv(sch);
797 struct tc_fifo_qopt opt = { .limit = q->limit };
798
799 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
800 return skb->len;
801
802 nla_put_failure:
803 return -1;
804 }
805
806 static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
807 .id = "tfifo",
808 .priv_size = sizeof(struct fifo_sched_data),
809 .enqueue = tfifo_enqueue,
810 .dequeue = qdisc_dequeue_head,
811 .peek = qdisc_peek_head,
812 .drop = qdisc_queue_drop,
813 .init = tfifo_init,
814 .reset = qdisc_reset_queue,
815 .change = tfifo_init,
816 .dump = tfifo_dump,
817 };
818
819 static int netem_init(struct Qdisc *sch, struct nlattr *opt)
820 {
821 struct netem_sched_data *q = qdisc_priv(sch);
822 int ret;
823
824 if (!opt)
825 return -EINVAL;
826
827 qdisc_watchdog_init(&q->watchdog, sch);
828
829 q->loss_model = CLG_RANDOM;
830 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
831 TC_H_MAKE(sch->handle, 1));
832 if (!q->qdisc) {
833 pr_notice("netem: qdisc create tfifo qdisc failed\n");
834 return -ENOMEM;
835 }
836
837 ret = netem_change(sch, opt);
838 if (ret) {
839 pr_info("netem: change failed\n");
840 qdisc_destroy(q->qdisc);
841 }
842 return ret;
843 }
844
845 static void netem_destroy(struct Qdisc *sch)
846 {
847 struct netem_sched_data *q = qdisc_priv(sch);
848
849 qdisc_watchdog_cancel(&q->watchdog);
850 qdisc_destroy(q->qdisc);
851 dist_free(q->delay_dist);
852 }
853
854 static int dump_loss_model(const struct netem_sched_data *q,
855 struct sk_buff *skb)
856 {
857 struct nlattr *nest;
858
859 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
860 if (nest == NULL)
861 goto nla_put_failure;
862
863 switch (q->loss_model) {
864 case CLG_RANDOM:
865 /* legacy loss model */
866 nla_nest_cancel(skb, nest);
867 return 0; /* no data */
868
869 case CLG_4_STATES: {
870 struct tc_netem_gimodel gi = {
871 .p13 = q->clg.a1,
872 .p31 = q->clg.a2,
873 .p32 = q->clg.a3,
874 .p14 = q->clg.a4,
875 .p23 = q->clg.a5,
876 };
877
878 NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
879 break;
880 }
881 case CLG_GILB_ELL: {
882 struct tc_netem_gemodel ge = {
883 .p = q->clg.a1,
884 .r = q->clg.a2,
885 .h = q->clg.a3,
886 .k1 = q->clg.a4,
887 };
888
889 NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
890 break;
891 }
892 }
893
894 nla_nest_end(skb, nest);
895 return 0;
896
897 nla_put_failure:
898 nla_nest_cancel(skb, nest);
899 return -1;
900 }
901
902 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
903 {
904 const struct netem_sched_data *q = qdisc_priv(sch);
905 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
906 struct tc_netem_qopt qopt;
907 struct tc_netem_corr cor;
908 struct tc_netem_reorder reorder;
909 struct tc_netem_corrupt corrupt;
910 struct tc_netem_rate rate;
911
912 qopt.latency = q->latency;
913 qopt.jitter = q->jitter;
914 qopt.limit = q->limit;
915 qopt.loss = q->loss;
916 qopt.gap = q->gap;
917 qopt.duplicate = q->duplicate;
918 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
919
920 cor.delay_corr = q->delay_cor.rho;
921 cor.loss_corr = q->loss_cor.rho;
922 cor.dup_corr = q->dup_cor.rho;
923 NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
924
925 reorder.probability = q->reorder;
926 reorder.correlation = q->reorder_cor.rho;
927 NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
928
929 corrupt.probability = q->corrupt;
930 corrupt.correlation = q->corrupt_cor.rho;
931 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
932
933 rate.rate = q->rate;
934 rate.packet_overhead = q->packet_overhead;
935 rate.cell_size = q->cell_size;
936 rate.cell_overhead = q->cell_overhead;
937 NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
938
939 if (dump_loss_model(q, skb) != 0)
940 goto nla_put_failure;
941
942 return nla_nest_end(skb, nla);
943
944 nla_put_failure:
945 nlmsg_trim(skb, nla);
946 return -1;
947 }
948
949 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
950 struct sk_buff *skb, struct tcmsg *tcm)
951 {
952 struct netem_sched_data *q = qdisc_priv(sch);
953
954 if (cl != 1) /* only one class */
955 return -ENOENT;
956
957 tcm->tcm_handle |= TC_H_MIN(1);
958 tcm->tcm_info = q->qdisc->handle;
959
960 return 0;
961 }
962
963 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
964 struct Qdisc **old)
965 {
966 struct netem_sched_data *q = qdisc_priv(sch);
967
968 if (new == NULL)
969 new = &noop_qdisc;
970
971 sch_tree_lock(sch);
972 *old = q->qdisc;
973 q->qdisc = new;
974 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
975 qdisc_reset(*old);
976 sch_tree_unlock(sch);
977
978 return 0;
979 }
980
981 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
982 {
983 struct netem_sched_data *q = qdisc_priv(sch);
984 return q->qdisc;
985 }
986
987 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
988 {
989 return 1;
990 }
991
992 static void netem_put(struct Qdisc *sch, unsigned long arg)
993 {
994 }
995
996 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
997 {
998 if (!walker->stop) {
999 if (walker->count >= walker->skip)
1000 if (walker->fn(sch, 1, walker) < 0) {
1001 walker->stop = 1;
1002 return;
1003 }
1004 walker->count++;
1005 }
1006 }
1007
1008 static const struct Qdisc_class_ops netem_class_ops = {
1009 .graft = netem_graft,
1010 .leaf = netem_leaf,
1011 .get = netem_get,
1012 .put = netem_put,
1013 .walk = netem_walk,
1014 .dump = netem_dump_class,
1015 };
1016
1017 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1018 .id = "netem",
1019 .cl_ops = &netem_class_ops,
1020 .priv_size = sizeof(struct netem_sched_data),
1021 .enqueue = netem_enqueue,
1022 .dequeue = netem_dequeue,
1023 .peek = qdisc_peek_dequeued,
1024 .drop = netem_drop,
1025 .init = netem_init,
1026 .reset = netem_reset,
1027 .destroy = netem_destroy,
1028 .change = netem_change,
1029 .dump = netem_dump,
1030 .owner = THIS_MODULE,
1031 };
1032
1033
1034 static int __init netem_module_init(void)
1035 {
1036 pr_info("netem: version " VERSION "\n");
1037 return register_qdisc(&netem_qdisc_ops);
1038 }
1039 static void __exit netem_module_exit(void)
1040 {
1041 unregister_qdisc(&netem_qdisc_ops);
1042 }
1043 module_init(netem_module_init)
1044 module_exit(netem_module_exit)
1045 MODULE_LICENSE("GPL");
This page took 0.050892 seconds and 6 git commands to generate.