Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[deliverable/linux.git] / net / sched / sch_netem.c
1 /*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License.
8 *
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
11 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/skbuff.h>
23 #include <linux/vmalloc.h>
24 #include <linux/rtnetlink.h>
25 #include <linux/reciprocal_div.h>
26
27 #include <net/netlink.h>
28 #include <net/pkt_sched.h>
29
30 #define VERSION "1.3"
31
32 /* Network Emulation Queuing algorithm.
33 ====================================
34
35 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
36 Network Emulation Tool
37 [2] Luigi Rizzo, DummyNet for FreeBSD
38
39 ----------------------------------------------------------------
40
41 This started out as a simple way to delay outgoing packets to
42 test TCP but has grown to include most of the functionality
43 of a full blown network emulator like NISTnet. It can delay
44 packets and add random jitter (and correlation). The random
45 distribution can be loaded from a table as well to provide
46 normal, Pareto, or experimental curves. Packet loss,
47 duplication, and reordering can also be emulated.
48
49 This qdisc does not do classification that can be handled in
50 layering other disciplines. It does not need to do bandwidth
51 control either since that can be handled by using token
52 bucket or other rate control.
53
54 Correlated Loss Generator models
55
56 Added generation of correlated loss according to the
57 "Gilbert-Elliot" model, a 4-state markov model.
58
59 References:
60 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
61 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
62 and intuitive loss model for packet networks and its implementation
63 in the Netem module in the Linux kernel", available in [1]
64
65 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
66 Fabio Ludovici <fabio.ludovici at yahoo.it>
67 */
68
69 struct netem_sched_data {
70 struct Qdisc *qdisc;
71 struct qdisc_watchdog watchdog;
72
73 psched_tdiff_t latency;
74 psched_tdiff_t jitter;
75
76 u32 loss;
77 u32 limit;
78 u32 counter;
79 u32 gap;
80 u32 duplicate;
81 u32 reorder;
82 u32 corrupt;
83 u32 rate;
84 s32 packet_overhead;
85 u32 cell_size;
86 u32 cell_size_reciprocal;
87 s32 cell_overhead;
88
89 struct crndstate {
90 u32 last;
91 u32 rho;
92 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
93
94 struct disttable {
95 u32 size;
96 s16 table[0];
97 } *delay_dist;
98
99 enum {
100 CLG_RANDOM,
101 CLG_4_STATES,
102 CLG_GILB_ELL,
103 } loss_model;
104
105 /* Correlated Loss Generation models */
106 struct clgstate {
107 /* state of the Markov chain */
108 u8 state;
109
110 /* 4-states and Gilbert-Elliot models */
111 u32 a1; /* p13 for 4-states or p for GE */
112 u32 a2; /* p31 for 4-states or r for GE */
113 u32 a3; /* p32 for 4-states or h for GE */
114 u32 a4; /* p14 for 4-states or 1-k for GE */
115 u32 a5; /* p23 used only in 4-states */
116 } clg;
117
118 };
119
120 /* Time stamp put into socket buffer control block */
121 struct netem_skb_cb {
122 psched_time_t time_to_send;
123 };
124
125 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
126 {
127 BUILD_BUG_ON(sizeof(skb->cb) <
128 sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
129 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
130 }
131
132 /* init_crandom - initialize correlated random number generator
133 * Use entropy source for initial seed.
134 */
135 static void init_crandom(struct crndstate *state, unsigned long rho)
136 {
137 state->rho = rho;
138 state->last = net_random();
139 }
140
141 /* get_crandom - correlated random number generator
142 * Next number depends on last value.
143 * rho is scaled to avoid floating point.
144 */
145 static u32 get_crandom(struct crndstate *state)
146 {
147 u64 value, rho;
148 unsigned long answer;
149
150 if (state->rho == 0) /* no correlation */
151 return net_random();
152
153 value = net_random();
154 rho = (u64)state->rho + 1;
155 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
156 state->last = answer;
157 return answer;
158 }
159
160 /* loss_4state - 4-state model loss generator
161 * Generates losses according to the 4-state Markov chain adopted in
162 * the GI (General and Intuitive) loss model.
163 */
164 static bool loss_4state(struct netem_sched_data *q)
165 {
166 struct clgstate *clg = &q->clg;
167 u32 rnd = net_random();
168
169 /*
170 * Makes a comparison between rnd and the transition
171 * probabilities outgoing from the current state, then decides the
172 * next state and if the next packet has to be transmitted or lost.
173 * The four states correspond to:
174 * 1 => successfully transmitted packets within a gap period
175 * 4 => isolated losses within a gap period
176 * 3 => lost packets within a burst period
177 * 2 => successfully transmitted packets within a burst period
178 */
179 switch (clg->state) {
180 case 1:
181 if (rnd < clg->a4) {
182 clg->state = 4;
183 return true;
184 } else if (clg->a4 < rnd && rnd < clg->a1) {
185 clg->state = 3;
186 return true;
187 } else if (clg->a1 < rnd)
188 clg->state = 1;
189
190 break;
191 case 2:
192 if (rnd < clg->a5) {
193 clg->state = 3;
194 return true;
195 } else
196 clg->state = 2;
197
198 break;
199 case 3:
200 if (rnd < clg->a3)
201 clg->state = 2;
202 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
203 clg->state = 1;
204 return true;
205 } else if (clg->a2 + clg->a3 < rnd) {
206 clg->state = 3;
207 return true;
208 }
209 break;
210 case 4:
211 clg->state = 1;
212 break;
213 }
214
215 return false;
216 }
217
218 /* loss_gilb_ell - Gilbert-Elliot model loss generator
219 * Generates losses according to the Gilbert-Elliot loss model or
220 * its special cases (Gilbert or Simple Gilbert)
221 *
222 * Makes a comparison between random number and the transition
223 * probabilities outgoing from the current state, then decides the
224 * next state. A second random number is extracted and the comparison
225 * with the loss probability of the current state decides if the next
226 * packet will be transmitted or lost.
227 */
228 static bool loss_gilb_ell(struct netem_sched_data *q)
229 {
230 struct clgstate *clg = &q->clg;
231
232 switch (clg->state) {
233 case 1:
234 if (net_random() < clg->a1)
235 clg->state = 2;
236 if (net_random() < clg->a4)
237 return true;
238 case 2:
239 if (net_random() < clg->a2)
240 clg->state = 1;
241 if (clg->a3 > net_random())
242 return true;
243 }
244
245 return false;
246 }
247
248 static bool loss_event(struct netem_sched_data *q)
249 {
250 switch (q->loss_model) {
251 case CLG_RANDOM:
252 /* Random packet drop 0 => none, ~0 => all */
253 return q->loss && q->loss >= get_crandom(&q->loss_cor);
254
255 case CLG_4_STATES:
256 /* 4state loss model algorithm (used also for GI model)
257 * Extracts a value from the markov 4 state loss generator,
258 * if it is 1 drops a packet and if needed writes the event in
259 * the kernel logs
260 */
261 return loss_4state(q);
262
263 case CLG_GILB_ELL:
264 /* Gilbert-Elliot loss model algorithm
265 * Extracts a value from the Gilbert-Elliot loss generator,
266 * if it is 1 drops a packet and if needed writes the event in
267 * the kernel logs
268 */
269 return loss_gilb_ell(q);
270 }
271
272 return false; /* not reached */
273 }
274
275
276 /* tabledist - return a pseudo-randomly distributed value with mean mu and
277 * std deviation sigma. Uses table lookup to approximate the desired
278 * distribution, and a uniformly-distributed pseudo-random source.
279 */
280 static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
281 struct crndstate *state,
282 const struct disttable *dist)
283 {
284 psched_tdiff_t x;
285 long t;
286 u32 rnd;
287
288 if (sigma == 0)
289 return mu;
290
291 rnd = get_crandom(state);
292
293 /* default uniform distribution */
294 if (dist == NULL)
295 return (rnd % (2*sigma)) - sigma + mu;
296
297 t = dist->table[rnd % dist->size];
298 x = (sigma % NETEM_DIST_SCALE) * t;
299 if (x >= 0)
300 x += NETEM_DIST_SCALE/2;
301 else
302 x -= NETEM_DIST_SCALE/2;
303
304 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
305 }
306
307 static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
308 {
309 u64 ticks;
310
311 len += q->packet_overhead;
312
313 if (q->cell_size) {
314 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
315
316 if (len > cells * q->cell_size) /* extra cell needed for remainder */
317 cells++;
318 len = cells * (q->cell_size + q->cell_overhead);
319 }
320
321 ticks = (u64)len * NSEC_PER_SEC;
322
323 do_div(ticks, q->rate);
324 return PSCHED_NS2TICKS(ticks);
325 }
326
327 /*
328 * Insert one skb into qdisc.
329 * Note: parent depends on return value to account for queue length.
330 * NET_XMIT_DROP: queue length didn't change.
331 * NET_XMIT_SUCCESS: one skb was queued.
332 */
333 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
334 {
335 struct netem_sched_data *q = qdisc_priv(sch);
336 /* We don't fill cb now as skb_unshare() may invalidate it */
337 struct netem_skb_cb *cb;
338 struct sk_buff *skb2;
339 int ret;
340 int count = 1;
341
342 /* Random duplication */
343 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
344 ++count;
345
346 /* Drop packet? */
347 if (loss_event(q))
348 --count;
349
350 if (count == 0) {
351 sch->qstats.drops++;
352 kfree_skb(skb);
353 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
354 }
355
356 skb_orphan(skb);
357
358 /*
359 * If we need to duplicate packet, then re-insert at top of the
360 * qdisc tree, since parent queuer expects that only one
361 * skb will be queued.
362 */
363 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
364 struct Qdisc *rootq = qdisc_root(sch);
365 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
366 q->duplicate = 0;
367
368 qdisc_enqueue_root(skb2, rootq);
369 q->duplicate = dupsave;
370 }
371
372 /*
373 * Randomized packet corruption.
374 * Make copy if needed since we are modifying
375 * If packet is going to be hardware checksummed, then
376 * do it now in software before we mangle it.
377 */
378 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
379 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
380 (skb->ip_summed == CHECKSUM_PARTIAL &&
381 skb_checksum_help(skb))) {
382 sch->qstats.drops++;
383 return NET_XMIT_DROP;
384 }
385
386 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
387 }
388
389 cb = netem_skb_cb(skb);
390 if (q->gap == 0 || /* not doing reordering */
391 q->counter < q->gap || /* inside last reordering gap */
392 q->reorder < get_crandom(&q->reorder_cor)) {
393 psched_time_t now;
394 psched_tdiff_t delay;
395
396 delay = tabledist(q->latency, q->jitter,
397 &q->delay_cor, q->delay_dist);
398
399 now = psched_get_time();
400
401 if (q->rate) {
402 struct sk_buff_head *list = &q->qdisc->q;
403
404 delay += packet_len_2_sched_time(skb->len, q);
405
406 if (!skb_queue_empty(list)) {
407 /*
408 * Last packet in queue is reference point (now).
409 * First packet in queue is already in flight,
410 * calculate this time bonus and substract
411 * from delay.
412 */
413 delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
414 now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
415 }
416 }
417
418 cb->time_to_send = now + delay;
419 ++q->counter;
420 ret = qdisc_enqueue(skb, q->qdisc);
421 } else {
422 /*
423 * Do re-ordering by putting one out of N packets at the front
424 * of the queue.
425 */
426 cb->time_to_send = psched_get_time();
427 q->counter = 0;
428
429 __skb_queue_head(&q->qdisc->q, skb);
430 q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
431 q->qdisc->qstats.requeues++;
432 ret = NET_XMIT_SUCCESS;
433 }
434
435 if (ret != NET_XMIT_SUCCESS) {
436 if (net_xmit_drop_count(ret)) {
437 sch->qstats.drops++;
438 return ret;
439 }
440 }
441
442 sch->q.qlen++;
443 return NET_XMIT_SUCCESS;
444 }
445
446 static unsigned int netem_drop(struct Qdisc *sch)
447 {
448 struct netem_sched_data *q = qdisc_priv(sch);
449 unsigned int len = 0;
450
451 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
452 sch->q.qlen--;
453 sch->qstats.drops++;
454 }
455 return len;
456 }
457
458 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
459 {
460 struct netem_sched_data *q = qdisc_priv(sch);
461 struct sk_buff *skb;
462
463 if (qdisc_is_throttled(sch))
464 return NULL;
465
466 skb = q->qdisc->ops->peek(q->qdisc);
467 if (skb) {
468 const struct netem_skb_cb *cb = netem_skb_cb(skb);
469 psched_time_t now = psched_get_time();
470
471 /* if more time remaining? */
472 if (cb->time_to_send <= now) {
473 skb = qdisc_dequeue_peeked(q->qdisc);
474 if (unlikely(!skb))
475 return NULL;
476
477 #ifdef CONFIG_NET_CLS_ACT
478 /*
479 * If it's at ingress let's pretend the delay is
480 * from the network (tstamp will be updated).
481 */
482 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
483 skb->tstamp.tv64 = 0;
484 #endif
485
486 sch->q.qlen--;
487 qdisc_unthrottled(sch);
488 qdisc_bstats_update(sch, skb);
489 return skb;
490 }
491
492 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
493 }
494
495 return NULL;
496 }
497
498 static void netem_reset(struct Qdisc *sch)
499 {
500 struct netem_sched_data *q = qdisc_priv(sch);
501
502 qdisc_reset(q->qdisc);
503 sch->q.qlen = 0;
504 qdisc_watchdog_cancel(&q->watchdog);
505 }
506
507 static void dist_free(struct disttable *d)
508 {
509 if (d) {
510 if (is_vmalloc_addr(d))
511 vfree(d);
512 else
513 kfree(d);
514 }
515 }
516
517 /*
518 * Distribution data is a variable size payload containing
519 * signed 16 bit values.
520 */
521 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
522 {
523 struct netem_sched_data *q = qdisc_priv(sch);
524 size_t n = nla_len(attr)/sizeof(__s16);
525 const __s16 *data = nla_data(attr);
526 spinlock_t *root_lock;
527 struct disttable *d;
528 int i;
529 size_t s;
530
531 if (n > NETEM_DIST_MAX)
532 return -EINVAL;
533
534 s = sizeof(struct disttable) + n * sizeof(s16);
535 d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
536 if (!d)
537 d = vmalloc(s);
538 if (!d)
539 return -ENOMEM;
540
541 d->size = n;
542 for (i = 0; i < n; i++)
543 d->table[i] = data[i];
544
545 root_lock = qdisc_root_sleeping_lock(sch);
546
547 spin_lock_bh(root_lock);
548 swap(q->delay_dist, d);
549 spin_unlock_bh(root_lock);
550
551 dist_free(d);
552 return 0;
553 }
554
555 static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
556 {
557 struct netem_sched_data *q = qdisc_priv(sch);
558 const struct tc_netem_corr *c = nla_data(attr);
559
560 init_crandom(&q->delay_cor, c->delay_corr);
561 init_crandom(&q->loss_cor, c->loss_corr);
562 init_crandom(&q->dup_cor, c->dup_corr);
563 }
564
565 static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
566 {
567 struct netem_sched_data *q = qdisc_priv(sch);
568 const struct tc_netem_reorder *r = nla_data(attr);
569
570 q->reorder = r->probability;
571 init_crandom(&q->reorder_cor, r->correlation);
572 }
573
574 static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
575 {
576 struct netem_sched_data *q = qdisc_priv(sch);
577 const struct tc_netem_corrupt *r = nla_data(attr);
578
579 q->corrupt = r->probability;
580 init_crandom(&q->corrupt_cor, r->correlation);
581 }
582
583 static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
584 {
585 struct netem_sched_data *q = qdisc_priv(sch);
586 const struct tc_netem_rate *r = nla_data(attr);
587
588 q->rate = r->rate;
589 q->packet_overhead = r->packet_overhead;
590 q->cell_size = r->cell_size;
591 if (q->cell_size)
592 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
593 q->cell_overhead = r->cell_overhead;
594 }
595
596 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
597 {
598 struct netem_sched_data *q = qdisc_priv(sch);
599 const struct nlattr *la;
600 int rem;
601
602 nla_for_each_nested(la, attr, rem) {
603 u16 type = nla_type(la);
604
605 switch(type) {
606 case NETEM_LOSS_GI: {
607 const struct tc_netem_gimodel *gi = nla_data(la);
608
609 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
610 pr_info("netem: incorrect gi model size\n");
611 return -EINVAL;
612 }
613
614 q->loss_model = CLG_4_STATES;
615
616 q->clg.state = 1;
617 q->clg.a1 = gi->p13;
618 q->clg.a2 = gi->p31;
619 q->clg.a3 = gi->p32;
620 q->clg.a4 = gi->p14;
621 q->clg.a5 = gi->p23;
622 break;
623 }
624
625 case NETEM_LOSS_GE: {
626 const struct tc_netem_gemodel *ge = nla_data(la);
627
628 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
629 pr_info("netem: incorrect ge model size\n");
630 return -EINVAL;
631 }
632
633 q->loss_model = CLG_GILB_ELL;
634 q->clg.state = 1;
635 q->clg.a1 = ge->p;
636 q->clg.a2 = ge->r;
637 q->clg.a3 = ge->h;
638 q->clg.a4 = ge->k1;
639 break;
640 }
641
642 default:
643 pr_info("netem: unknown loss type %u\n", type);
644 return -EINVAL;
645 }
646 }
647
648 return 0;
649 }
650
651 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
652 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
653 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
654 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
655 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
656 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
657 };
658
659 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
660 const struct nla_policy *policy, int len)
661 {
662 int nested_len = nla_len(nla) - NLA_ALIGN(len);
663
664 if (nested_len < 0) {
665 pr_info("netem: invalid attributes len %d\n", nested_len);
666 return -EINVAL;
667 }
668
669 if (nested_len >= nla_attr_size(0))
670 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
671 nested_len, policy);
672
673 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
674 return 0;
675 }
676
677 /* Parse netlink message to set options */
678 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
679 {
680 struct netem_sched_data *q = qdisc_priv(sch);
681 struct nlattr *tb[TCA_NETEM_MAX + 1];
682 struct tc_netem_qopt *qopt;
683 int ret;
684
685 if (opt == NULL)
686 return -EINVAL;
687
688 qopt = nla_data(opt);
689 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
690 if (ret < 0)
691 return ret;
692
693 ret = fifo_set_limit(q->qdisc, qopt->limit);
694 if (ret) {
695 pr_info("netem: can't set fifo limit\n");
696 return ret;
697 }
698
699 q->latency = qopt->latency;
700 q->jitter = qopt->jitter;
701 q->limit = qopt->limit;
702 q->gap = qopt->gap;
703 q->counter = 0;
704 q->loss = qopt->loss;
705 q->duplicate = qopt->duplicate;
706
707 /* for compatibility with earlier versions.
708 * if gap is set, need to assume 100% probability
709 */
710 if (q->gap)
711 q->reorder = ~0;
712
713 if (tb[TCA_NETEM_CORR])
714 get_correlation(sch, tb[TCA_NETEM_CORR]);
715
716 if (tb[TCA_NETEM_DELAY_DIST]) {
717 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
718 if (ret)
719 return ret;
720 }
721
722 if (tb[TCA_NETEM_REORDER])
723 get_reorder(sch, tb[TCA_NETEM_REORDER]);
724
725 if (tb[TCA_NETEM_CORRUPT])
726 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
727
728 if (tb[TCA_NETEM_RATE])
729 get_rate(sch, tb[TCA_NETEM_RATE]);
730
731 q->loss_model = CLG_RANDOM;
732 if (tb[TCA_NETEM_LOSS])
733 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
734
735 return ret;
736 }
737
738 /*
739 * Special case version of FIFO queue for use by netem.
740 * It queues in order based on timestamps in skb's
741 */
742 struct fifo_sched_data {
743 u32 limit;
744 psched_time_t oldest;
745 };
746
747 static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
748 {
749 struct fifo_sched_data *q = qdisc_priv(sch);
750 struct sk_buff_head *list = &sch->q;
751 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
752 struct sk_buff *skb;
753
754 if (likely(skb_queue_len(list) < q->limit)) {
755 /* Optimize for add at tail */
756 if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
757 q->oldest = tnext;
758 return qdisc_enqueue_tail(nskb, sch);
759 }
760
761 skb_queue_reverse_walk(list, skb) {
762 const struct netem_skb_cb *cb = netem_skb_cb(skb);
763
764 if (tnext >= cb->time_to_send)
765 break;
766 }
767
768 __skb_queue_after(list, skb, nskb);
769
770 sch->qstats.backlog += qdisc_pkt_len(nskb);
771
772 return NET_XMIT_SUCCESS;
773 }
774
775 return qdisc_reshape_fail(nskb, sch);
776 }
777
778 static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
779 {
780 struct fifo_sched_data *q = qdisc_priv(sch);
781
782 if (opt) {
783 struct tc_fifo_qopt *ctl = nla_data(opt);
784 if (nla_len(opt) < sizeof(*ctl))
785 return -EINVAL;
786
787 q->limit = ctl->limit;
788 } else
789 q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
790
791 q->oldest = PSCHED_PASTPERFECT;
792 return 0;
793 }
794
795 static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
796 {
797 struct fifo_sched_data *q = qdisc_priv(sch);
798 struct tc_fifo_qopt opt = { .limit = q->limit };
799
800 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
801 return skb->len;
802
803 nla_put_failure:
804 return -1;
805 }
806
807 static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
808 .id = "tfifo",
809 .priv_size = sizeof(struct fifo_sched_data),
810 .enqueue = tfifo_enqueue,
811 .dequeue = qdisc_dequeue_head,
812 .peek = qdisc_peek_head,
813 .drop = qdisc_queue_drop,
814 .init = tfifo_init,
815 .reset = qdisc_reset_queue,
816 .change = tfifo_init,
817 .dump = tfifo_dump,
818 };
819
820 static int netem_init(struct Qdisc *sch, struct nlattr *opt)
821 {
822 struct netem_sched_data *q = qdisc_priv(sch);
823 int ret;
824
825 if (!opt)
826 return -EINVAL;
827
828 qdisc_watchdog_init(&q->watchdog, sch);
829
830 q->loss_model = CLG_RANDOM;
831 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
832 TC_H_MAKE(sch->handle, 1));
833 if (!q->qdisc) {
834 pr_notice("netem: qdisc create tfifo qdisc failed\n");
835 return -ENOMEM;
836 }
837
838 ret = netem_change(sch, opt);
839 if (ret) {
840 pr_info("netem: change failed\n");
841 qdisc_destroy(q->qdisc);
842 }
843 return ret;
844 }
845
846 static void netem_destroy(struct Qdisc *sch)
847 {
848 struct netem_sched_data *q = qdisc_priv(sch);
849
850 qdisc_watchdog_cancel(&q->watchdog);
851 qdisc_destroy(q->qdisc);
852 dist_free(q->delay_dist);
853 }
854
855 static int dump_loss_model(const struct netem_sched_data *q,
856 struct sk_buff *skb)
857 {
858 struct nlattr *nest;
859
860 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
861 if (nest == NULL)
862 goto nla_put_failure;
863
864 switch (q->loss_model) {
865 case CLG_RANDOM:
866 /* legacy loss model */
867 nla_nest_cancel(skb, nest);
868 return 0; /* no data */
869
870 case CLG_4_STATES: {
871 struct tc_netem_gimodel gi = {
872 .p13 = q->clg.a1,
873 .p31 = q->clg.a2,
874 .p32 = q->clg.a3,
875 .p14 = q->clg.a4,
876 .p23 = q->clg.a5,
877 };
878
879 NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
880 break;
881 }
882 case CLG_GILB_ELL: {
883 struct tc_netem_gemodel ge = {
884 .p = q->clg.a1,
885 .r = q->clg.a2,
886 .h = q->clg.a3,
887 .k1 = q->clg.a4,
888 };
889
890 NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
891 break;
892 }
893 }
894
895 nla_nest_end(skb, nest);
896 return 0;
897
898 nla_put_failure:
899 nla_nest_cancel(skb, nest);
900 return -1;
901 }
902
903 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
904 {
905 const struct netem_sched_data *q = qdisc_priv(sch);
906 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
907 struct tc_netem_qopt qopt;
908 struct tc_netem_corr cor;
909 struct tc_netem_reorder reorder;
910 struct tc_netem_corrupt corrupt;
911 struct tc_netem_rate rate;
912
913 qopt.latency = q->latency;
914 qopt.jitter = q->jitter;
915 qopt.limit = q->limit;
916 qopt.loss = q->loss;
917 qopt.gap = q->gap;
918 qopt.duplicate = q->duplicate;
919 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
920
921 cor.delay_corr = q->delay_cor.rho;
922 cor.loss_corr = q->loss_cor.rho;
923 cor.dup_corr = q->dup_cor.rho;
924 NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
925
926 reorder.probability = q->reorder;
927 reorder.correlation = q->reorder_cor.rho;
928 NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
929
930 corrupt.probability = q->corrupt;
931 corrupt.correlation = q->corrupt_cor.rho;
932 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
933
934 rate.rate = q->rate;
935 rate.packet_overhead = q->packet_overhead;
936 rate.cell_size = q->cell_size;
937 rate.cell_overhead = q->cell_overhead;
938 NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
939
940 if (dump_loss_model(q, skb) != 0)
941 goto nla_put_failure;
942
943 return nla_nest_end(skb, nla);
944
945 nla_put_failure:
946 nlmsg_trim(skb, nla);
947 return -1;
948 }
949
950 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
951 struct sk_buff *skb, struct tcmsg *tcm)
952 {
953 struct netem_sched_data *q = qdisc_priv(sch);
954
955 if (cl != 1) /* only one class */
956 return -ENOENT;
957
958 tcm->tcm_handle |= TC_H_MIN(1);
959 tcm->tcm_info = q->qdisc->handle;
960
961 return 0;
962 }
963
964 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
965 struct Qdisc **old)
966 {
967 struct netem_sched_data *q = qdisc_priv(sch);
968
969 if (new == NULL)
970 new = &noop_qdisc;
971
972 sch_tree_lock(sch);
973 *old = q->qdisc;
974 q->qdisc = new;
975 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
976 qdisc_reset(*old);
977 sch_tree_unlock(sch);
978
979 return 0;
980 }
981
982 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
983 {
984 struct netem_sched_data *q = qdisc_priv(sch);
985 return q->qdisc;
986 }
987
988 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
989 {
990 return 1;
991 }
992
993 static void netem_put(struct Qdisc *sch, unsigned long arg)
994 {
995 }
996
997 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
998 {
999 if (!walker->stop) {
1000 if (walker->count >= walker->skip)
1001 if (walker->fn(sch, 1, walker) < 0) {
1002 walker->stop = 1;
1003 return;
1004 }
1005 walker->count++;
1006 }
1007 }
1008
1009 static const struct Qdisc_class_ops netem_class_ops = {
1010 .graft = netem_graft,
1011 .leaf = netem_leaf,
1012 .get = netem_get,
1013 .put = netem_put,
1014 .walk = netem_walk,
1015 .dump = netem_dump_class,
1016 };
1017
1018 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1019 .id = "netem",
1020 .cl_ops = &netem_class_ops,
1021 .priv_size = sizeof(struct netem_sched_data),
1022 .enqueue = netem_enqueue,
1023 .dequeue = netem_dequeue,
1024 .peek = qdisc_peek_dequeued,
1025 .drop = netem_drop,
1026 .init = netem_init,
1027 .reset = netem_reset,
1028 .destroy = netem_destroy,
1029 .change = netem_change,
1030 .dump = netem_dump,
1031 .owner = THIS_MODULE,
1032 };
1033
1034
1035 static int __init netem_module_init(void)
1036 {
1037 pr_info("netem: version " VERSION "\n");
1038 return register_qdisc(&netem_qdisc_ops);
1039 }
1040 static void __exit netem_module_exit(void)
1041 {
1042 unregister_qdisc(&netem_qdisc_ops);
1043 }
1044 module_init(netem_module_init)
1045 module_exit(netem_module_exit)
1046 MODULE_LICENSE("GPL");
This page took 0.065237 seconds and 6 git commands to generate.