Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[deliverable/linux.git] / net / sched / sch_netem.c
1 /*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License.
8 *
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
11 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/skbuff.h>
23 #include <linux/vmalloc.h>
24 #include <linux/rtnetlink.h>
25
26 #include <net/netlink.h>
27 #include <net/pkt_sched.h>
28
29 #define VERSION "1.3"
30
31 /* Network Emulation Queuing algorithm.
32 ====================================
33
34 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
35 Network Emulation Tool
36 [2] Luigi Rizzo, DummyNet for FreeBSD
37
38 ----------------------------------------------------------------
39
40 This started out as a simple way to delay outgoing packets to
41 test TCP but has grown to include most of the functionality
42 of a full blown network emulator like NISTnet. It can delay
43 packets and add random jitter (and correlation). The random
44 distribution can be loaded from a table as well to provide
45 normal, Pareto, or experimental curves. Packet loss,
46 duplication, and reordering can also be emulated.
47
48 This qdisc does not do classification that can be handled in
49 layering other disciplines. It does not need to do bandwidth
50 control either since that can be handled by using token
51 bucket or other rate control.
52
53 Correlated Loss Generator models
54
55 Added generation of correlated loss according to the
56 "Gilbert-Elliot" model, a 4-state markov model.
57
58 References:
59 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
60 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
61 and intuitive loss model for packet networks and its implementation
62 in the Netem module in the Linux kernel", available in [1]
63
64 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
65 Fabio Ludovici <fabio.ludovici at yahoo.it>
66 */
67
68 struct netem_sched_data {
69 struct Qdisc *qdisc;
70 struct qdisc_watchdog watchdog;
71
72 psched_tdiff_t latency;
73 psched_tdiff_t jitter;
74
75 u32 loss;
76 u32 limit;
77 u32 counter;
78 u32 gap;
79 u32 duplicate;
80 u32 reorder;
81 u32 corrupt;
82 u32 rate;
83
84 struct crndstate {
85 u32 last;
86 u32 rho;
87 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
88
89 struct disttable {
90 u32 size;
91 s16 table[0];
92 } *delay_dist;
93
94 enum {
95 CLG_RANDOM,
96 CLG_4_STATES,
97 CLG_GILB_ELL,
98 } loss_model;
99
100 /* Correlated Loss Generation models */
101 struct clgstate {
102 /* state of the Markov chain */
103 u8 state;
104
105 /* 4-states and Gilbert-Elliot models */
106 u32 a1; /* p13 for 4-states or p for GE */
107 u32 a2; /* p31 for 4-states or r for GE */
108 u32 a3; /* p32 for 4-states or h for GE */
109 u32 a4; /* p14 for 4-states or 1-k for GE */
110 u32 a5; /* p23 used only in 4-states */
111 } clg;
112
113 };
114
115 /* Time stamp put into socket buffer control block */
116 struct netem_skb_cb {
117 psched_time_t time_to_send;
118 };
119
120 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
121 {
122 BUILD_BUG_ON(sizeof(skb->cb) <
123 sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
124 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
125 }
126
127 /* init_crandom - initialize correlated random number generator
128 * Use entropy source for initial seed.
129 */
130 static void init_crandom(struct crndstate *state, unsigned long rho)
131 {
132 state->rho = rho;
133 state->last = net_random();
134 }
135
136 /* get_crandom - correlated random number generator
137 * Next number depends on last value.
138 * rho is scaled to avoid floating point.
139 */
140 static u32 get_crandom(struct crndstate *state)
141 {
142 u64 value, rho;
143 unsigned long answer;
144
145 if (state->rho == 0) /* no correlation */
146 return net_random();
147
148 value = net_random();
149 rho = (u64)state->rho + 1;
150 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
151 state->last = answer;
152 return answer;
153 }
154
155 /* loss_4state - 4-state model loss generator
156 * Generates losses according to the 4-state Markov chain adopted in
157 * the GI (General and Intuitive) loss model.
158 */
159 static bool loss_4state(struct netem_sched_data *q)
160 {
161 struct clgstate *clg = &q->clg;
162 u32 rnd = net_random();
163
164 /*
165 * Makes a comparison between rnd and the transition
166 * probabilities outgoing from the current state, then decides the
167 * next state and if the next packet has to be transmitted or lost.
168 * The four states correspond to:
169 * 1 => successfully transmitted packets within a gap period
170 * 4 => isolated losses within a gap period
171 * 3 => lost packets within a burst period
172 * 2 => successfully transmitted packets within a burst period
173 */
174 switch (clg->state) {
175 case 1:
176 if (rnd < clg->a4) {
177 clg->state = 4;
178 return true;
179 } else if (clg->a4 < rnd && rnd < clg->a1) {
180 clg->state = 3;
181 return true;
182 } else if (clg->a1 < rnd)
183 clg->state = 1;
184
185 break;
186 case 2:
187 if (rnd < clg->a5) {
188 clg->state = 3;
189 return true;
190 } else
191 clg->state = 2;
192
193 break;
194 case 3:
195 if (rnd < clg->a3)
196 clg->state = 2;
197 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
198 clg->state = 1;
199 return true;
200 } else if (clg->a2 + clg->a3 < rnd) {
201 clg->state = 3;
202 return true;
203 }
204 break;
205 case 4:
206 clg->state = 1;
207 break;
208 }
209
210 return false;
211 }
212
213 /* loss_gilb_ell - Gilbert-Elliot model loss generator
214 * Generates losses according to the Gilbert-Elliot loss model or
215 * its special cases (Gilbert or Simple Gilbert)
216 *
217 * Makes a comparison between random number and the transition
218 * probabilities outgoing from the current state, then decides the
219 * next state. A second random number is extracted and the comparison
220 * with the loss probability of the current state decides if the next
221 * packet will be transmitted or lost.
222 */
223 static bool loss_gilb_ell(struct netem_sched_data *q)
224 {
225 struct clgstate *clg = &q->clg;
226
227 switch (clg->state) {
228 case 1:
229 if (net_random() < clg->a1)
230 clg->state = 2;
231 if (net_random() < clg->a4)
232 return true;
233 case 2:
234 if (net_random() < clg->a2)
235 clg->state = 1;
236 if (clg->a3 > net_random())
237 return true;
238 }
239
240 return false;
241 }
242
243 static bool loss_event(struct netem_sched_data *q)
244 {
245 switch (q->loss_model) {
246 case CLG_RANDOM:
247 /* Random packet drop 0 => none, ~0 => all */
248 return q->loss && q->loss >= get_crandom(&q->loss_cor);
249
250 case CLG_4_STATES:
251 /* 4state loss model algorithm (used also for GI model)
252 * Extracts a value from the markov 4 state loss generator,
253 * if it is 1 drops a packet and if needed writes the event in
254 * the kernel logs
255 */
256 return loss_4state(q);
257
258 case CLG_GILB_ELL:
259 /* Gilbert-Elliot loss model algorithm
260 * Extracts a value from the Gilbert-Elliot loss generator,
261 * if it is 1 drops a packet and if needed writes the event in
262 * the kernel logs
263 */
264 return loss_gilb_ell(q);
265 }
266
267 return false; /* not reached */
268 }
269
270
271 /* tabledist - return a pseudo-randomly distributed value with mean mu and
272 * std deviation sigma. Uses table lookup to approximate the desired
273 * distribution, and a uniformly-distributed pseudo-random source.
274 */
275 static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
276 struct crndstate *state,
277 const struct disttable *dist)
278 {
279 psched_tdiff_t x;
280 long t;
281 u32 rnd;
282
283 if (sigma == 0)
284 return mu;
285
286 rnd = get_crandom(state);
287
288 /* default uniform distribution */
289 if (dist == NULL)
290 return (rnd % (2*sigma)) - sigma + mu;
291
292 t = dist->table[rnd % dist->size];
293 x = (sigma % NETEM_DIST_SCALE) * t;
294 if (x >= 0)
295 x += NETEM_DIST_SCALE/2;
296 else
297 x -= NETEM_DIST_SCALE/2;
298
299 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
300 }
301
302 static psched_time_t packet_len_2_sched_time(unsigned int len, u32 rate)
303 {
304 u64 ticks = (u64)len * NSEC_PER_SEC;
305
306 do_div(ticks, rate);
307 return PSCHED_NS2TICKS(ticks);
308 }
309
310 /*
311 * Insert one skb into qdisc.
312 * Note: parent depends on return value to account for queue length.
313 * NET_XMIT_DROP: queue length didn't change.
314 * NET_XMIT_SUCCESS: one skb was queued.
315 */
316 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
317 {
318 struct netem_sched_data *q = qdisc_priv(sch);
319 /* We don't fill cb now as skb_unshare() may invalidate it */
320 struct netem_skb_cb *cb;
321 struct sk_buff *skb2;
322 int ret;
323 int count = 1;
324
325 /* Random duplication */
326 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
327 ++count;
328
329 /* Drop packet? */
330 if (loss_event(q))
331 --count;
332
333 if (count == 0) {
334 sch->qstats.drops++;
335 kfree_skb(skb);
336 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
337 }
338
339 skb_orphan(skb);
340
341 /*
342 * If we need to duplicate packet, then re-insert at top of the
343 * qdisc tree, since parent queuer expects that only one
344 * skb will be queued.
345 */
346 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
347 struct Qdisc *rootq = qdisc_root(sch);
348 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
349 q->duplicate = 0;
350
351 qdisc_enqueue_root(skb2, rootq);
352 q->duplicate = dupsave;
353 }
354
355 /*
356 * Randomized packet corruption.
357 * Make copy if needed since we are modifying
358 * If packet is going to be hardware checksummed, then
359 * do it now in software before we mangle it.
360 */
361 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
362 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
363 (skb->ip_summed == CHECKSUM_PARTIAL &&
364 skb_checksum_help(skb))) {
365 sch->qstats.drops++;
366 return NET_XMIT_DROP;
367 }
368
369 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
370 }
371
372 cb = netem_skb_cb(skb);
373 if (q->gap == 0 || /* not doing reordering */
374 q->counter < q->gap || /* inside last reordering gap */
375 q->reorder < get_crandom(&q->reorder_cor)) {
376 psched_time_t now;
377 psched_tdiff_t delay;
378
379 delay = tabledist(q->latency, q->jitter,
380 &q->delay_cor, q->delay_dist);
381
382 now = psched_get_time();
383
384 if (q->rate) {
385 struct sk_buff_head *list = &q->qdisc->q;
386
387 delay += packet_len_2_sched_time(skb->len, q->rate);
388
389 if (!skb_queue_empty(list)) {
390 /*
391 * Last packet in queue is reference point (now).
392 * First packet in queue is already in flight,
393 * calculate this time bonus and substract
394 * from delay.
395 */
396 delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
397 now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
398 }
399 }
400
401 cb->time_to_send = now + delay;
402 ++q->counter;
403 ret = qdisc_enqueue(skb, q->qdisc);
404 } else {
405 /*
406 * Do re-ordering by putting one out of N packets at the front
407 * of the queue.
408 */
409 cb->time_to_send = psched_get_time();
410 q->counter = 0;
411
412 __skb_queue_head(&q->qdisc->q, skb);
413 q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
414 q->qdisc->qstats.requeues++;
415 ret = NET_XMIT_SUCCESS;
416 }
417
418 if (ret != NET_XMIT_SUCCESS) {
419 if (net_xmit_drop_count(ret)) {
420 sch->qstats.drops++;
421 return ret;
422 }
423 }
424
425 sch->q.qlen++;
426 return NET_XMIT_SUCCESS;
427 }
428
429 static unsigned int netem_drop(struct Qdisc *sch)
430 {
431 struct netem_sched_data *q = qdisc_priv(sch);
432 unsigned int len = 0;
433
434 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
435 sch->q.qlen--;
436 sch->qstats.drops++;
437 }
438 return len;
439 }
440
441 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
442 {
443 struct netem_sched_data *q = qdisc_priv(sch);
444 struct sk_buff *skb;
445
446 if (qdisc_is_throttled(sch))
447 return NULL;
448
449 skb = q->qdisc->ops->peek(q->qdisc);
450 if (skb) {
451 const struct netem_skb_cb *cb = netem_skb_cb(skb);
452 psched_time_t now = psched_get_time();
453
454 /* if more time remaining? */
455 if (cb->time_to_send <= now) {
456 skb = qdisc_dequeue_peeked(q->qdisc);
457 if (unlikely(!skb))
458 return NULL;
459
460 #ifdef CONFIG_NET_CLS_ACT
461 /*
462 * If it's at ingress let's pretend the delay is
463 * from the network (tstamp will be updated).
464 */
465 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
466 skb->tstamp.tv64 = 0;
467 #endif
468
469 sch->q.qlen--;
470 qdisc_unthrottled(sch);
471 qdisc_bstats_update(sch, skb);
472 return skb;
473 }
474
475 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
476 }
477
478 return NULL;
479 }
480
481 static void netem_reset(struct Qdisc *sch)
482 {
483 struct netem_sched_data *q = qdisc_priv(sch);
484
485 qdisc_reset(q->qdisc);
486 sch->q.qlen = 0;
487 qdisc_watchdog_cancel(&q->watchdog);
488 }
489
490 static void dist_free(struct disttable *d)
491 {
492 if (d) {
493 if (is_vmalloc_addr(d))
494 vfree(d);
495 else
496 kfree(d);
497 }
498 }
499
500 /*
501 * Distribution data is a variable size payload containing
502 * signed 16 bit values.
503 */
504 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
505 {
506 struct netem_sched_data *q = qdisc_priv(sch);
507 size_t n = nla_len(attr)/sizeof(__s16);
508 const __s16 *data = nla_data(attr);
509 spinlock_t *root_lock;
510 struct disttable *d;
511 int i;
512 size_t s;
513
514 if (n > NETEM_DIST_MAX)
515 return -EINVAL;
516
517 s = sizeof(struct disttable) + n * sizeof(s16);
518 d = kmalloc(s, GFP_KERNEL);
519 if (!d)
520 d = vmalloc(s);
521 if (!d)
522 return -ENOMEM;
523
524 d->size = n;
525 for (i = 0; i < n; i++)
526 d->table[i] = data[i];
527
528 root_lock = qdisc_root_sleeping_lock(sch);
529
530 spin_lock_bh(root_lock);
531 dist_free(q->delay_dist);
532 q->delay_dist = d;
533 spin_unlock_bh(root_lock);
534 return 0;
535 }
536
537 static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
538 {
539 struct netem_sched_data *q = qdisc_priv(sch);
540 const struct tc_netem_corr *c = nla_data(attr);
541
542 init_crandom(&q->delay_cor, c->delay_corr);
543 init_crandom(&q->loss_cor, c->loss_corr);
544 init_crandom(&q->dup_cor, c->dup_corr);
545 }
546
547 static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
548 {
549 struct netem_sched_data *q = qdisc_priv(sch);
550 const struct tc_netem_reorder *r = nla_data(attr);
551
552 q->reorder = r->probability;
553 init_crandom(&q->reorder_cor, r->correlation);
554 }
555
556 static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
557 {
558 struct netem_sched_data *q = qdisc_priv(sch);
559 const struct tc_netem_corrupt *r = nla_data(attr);
560
561 q->corrupt = r->probability;
562 init_crandom(&q->corrupt_cor, r->correlation);
563 }
564
565 static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
566 {
567 struct netem_sched_data *q = qdisc_priv(sch);
568 const struct tc_netem_rate *r = nla_data(attr);
569
570 q->rate = r->rate;
571 }
572
573 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
574 {
575 struct netem_sched_data *q = qdisc_priv(sch);
576 const struct nlattr *la;
577 int rem;
578
579 nla_for_each_nested(la, attr, rem) {
580 u16 type = nla_type(la);
581
582 switch(type) {
583 case NETEM_LOSS_GI: {
584 const struct tc_netem_gimodel *gi = nla_data(la);
585
586 if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
587 pr_info("netem: incorrect gi model size\n");
588 return -EINVAL;
589 }
590
591 q->loss_model = CLG_4_STATES;
592
593 q->clg.state = 1;
594 q->clg.a1 = gi->p13;
595 q->clg.a2 = gi->p31;
596 q->clg.a3 = gi->p32;
597 q->clg.a4 = gi->p14;
598 q->clg.a5 = gi->p23;
599 break;
600 }
601
602 case NETEM_LOSS_GE: {
603 const struct tc_netem_gemodel *ge = nla_data(la);
604
605 if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
606 pr_info("netem: incorrect gi model size\n");
607 return -EINVAL;
608 }
609
610 q->loss_model = CLG_GILB_ELL;
611 q->clg.state = 1;
612 q->clg.a1 = ge->p;
613 q->clg.a2 = ge->r;
614 q->clg.a3 = ge->h;
615 q->clg.a4 = ge->k1;
616 break;
617 }
618
619 default:
620 pr_info("netem: unknown loss type %u\n", type);
621 return -EINVAL;
622 }
623 }
624
625 return 0;
626 }
627
628 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
629 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
630 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
631 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
632 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
633 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
634 };
635
636 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
637 const struct nla_policy *policy, int len)
638 {
639 int nested_len = nla_len(nla) - NLA_ALIGN(len);
640
641 if (nested_len < 0) {
642 pr_info("netem: invalid attributes len %d\n", nested_len);
643 return -EINVAL;
644 }
645
646 if (nested_len >= nla_attr_size(0))
647 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
648 nested_len, policy);
649
650 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
651 return 0;
652 }
653
654 /* Parse netlink message to set options */
655 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
656 {
657 struct netem_sched_data *q = qdisc_priv(sch);
658 struct nlattr *tb[TCA_NETEM_MAX + 1];
659 struct tc_netem_qopt *qopt;
660 int ret;
661
662 if (opt == NULL)
663 return -EINVAL;
664
665 qopt = nla_data(opt);
666 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
667 if (ret < 0)
668 return ret;
669
670 ret = fifo_set_limit(q->qdisc, qopt->limit);
671 if (ret) {
672 pr_info("netem: can't set fifo limit\n");
673 return ret;
674 }
675
676 q->latency = qopt->latency;
677 q->jitter = qopt->jitter;
678 q->limit = qopt->limit;
679 q->gap = qopt->gap;
680 q->counter = 0;
681 q->loss = qopt->loss;
682 q->duplicate = qopt->duplicate;
683
684 /* for compatibility with earlier versions.
685 * if gap is set, need to assume 100% probability
686 */
687 if (q->gap)
688 q->reorder = ~0;
689
690 if (tb[TCA_NETEM_CORR])
691 get_correlation(sch, tb[TCA_NETEM_CORR]);
692
693 if (tb[TCA_NETEM_DELAY_DIST]) {
694 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
695 if (ret)
696 return ret;
697 }
698
699 if (tb[TCA_NETEM_REORDER])
700 get_reorder(sch, tb[TCA_NETEM_REORDER]);
701
702 if (tb[TCA_NETEM_CORRUPT])
703 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
704
705 if (tb[TCA_NETEM_RATE])
706 get_rate(sch, tb[TCA_NETEM_RATE]);
707
708 q->loss_model = CLG_RANDOM;
709 if (tb[TCA_NETEM_LOSS])
710 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
711
712 return ret;
713 }
714
715 /*
716 * Special case version of FIFO queue for use by netem.
717 * It queues in order based on timestamps in skb's
718 */
719 struct fifo_sched_data {
720 u32 limit;
721 psched_time_t oldest;
722 };
723
724 static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
725 {
726 struct fifo_sched_data *q = qdisc_priv(sch);
727 struct sk_buff_head *list = &sch->q;
728 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
729 struct sk_buff *skb;
730
731 if (likely(skb_queue_len(list) < q->limit)) {
732 /* Optimize for add at tail */
733 if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
734 q->oldest = tnext;
735 return qdisc_enqueue_tail(nskb, sch);
736 }
737
738 skb_queue_reverse_walk(list, skb) {
739 const struct netem_skb_cb *cb = netem_skb_cb(skb);
740
741 if (tnext >= cb->time_to_send)
742 break;
743 }
744
745 __skb_queue_after(list, skb, nskb);
746
747 sch->qstats.backlog += qdisc_pkt_len(nskb);
748
749 return NET_XMIT_SUCCESS;
750 }
751
752 return qdisc_reshape_fail(nskb, sch);
753 }
754
755 static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
756 {
757 struct fifo_sched_data *q = qdisc_priv(sch);
758
759 if (opt) {
760 struct tc_fifo_qopt *ctl = nla_data(opt);
761 if (nla_len(opt) < sizeof(*ctl))
762 return -EINVAL;
763
764 q->limit = ctl->limit;
765 } else
766 q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
767
768 q->oldest = PSCHED_PASTPERFECT;
769 return 0;
770 }
771
772 static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
773 {
774 struct fifo_sched_data *q = qdisc_priv(sch);
775 struct tc_fifo_qopt opt = { .limit = q->limit };
776
777 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
778 return skb->len;
779
780 nla_put_failure:
781 return -1;
782 }
783
784 static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
785 .id = "tfifo",
786 .priv_size = sizeof(struct fifo_sched_data),
787 .enqueue = tfifo_enqueue,
788 .dequeue = qdisc_dequeue_head,
789 .peek = qdisc_peek_head,
790 .drop = qdisc_queue_drop,
791 .init = tfifo_init,
792 .reset = qdisc_reset_queue,
793 .change = tfifo_init,
794 .dump = tfifo_dump,
795 };
796
797 static int netem_init(struct Qdisc *sch, struct nlattr *opt)
798 {
799 struct netem_sched_data *q = qdisc_priv(sch);
800 int ret;
801
802 if (!opt)
803 return -EINVAL;
804
805 qdisc_watchdog_init(&q->watchdog, sch);
806
807 q->loss_model = CLG_RANDOM;
808 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
809 TC_H_MAKE(sch->handle, 1));
810 if (!q->qdisc) {
811 pr_notice("netem: qdisc create tfifo qdisc failed\n");
812 return -ENOMEM;
813 }
814
815 ret = netem_change(sch, opt);
816 if (ret) {
817 pr_info("netem: change failed\n");
818 qdisc_destroy(q->qdisc);
819 }
820 return ret;
821 }
822
823 static void netem_destroy(struct Qdisc *sch)
824 {
825 struct netem_sched_data *q = qdisc_priv(sch);
826
827 qdisc_watchdog_cancel(&q->watchdog);
828 qdisc_destroy(q->qdisc);
829 dist_free(q->delay_dist);
830 }
831
832 static int dump_loss_model(const struct netem_sched_data *q,
833 struct sk_buff *skb)
834 {
835 struct nlattr *nest;
836
837 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
838 if (nest == NULL)
839 goto nla_put_failure;
840
841 switch (q->loss_model) {
842 case CLG_RANDOM:
843 /* legacy loss model */
844 nla_nest_cancel(skb, nest);
845 return 0; /* no data */
846
847 case CLG_4_STATES: {
848 struct tc_netem_gimodel gi = {
849 .p13 = q->clg.a1,
850 .p31 = q->clg.a2,
851 .p32 = q->clg.a3,
852 .p14 = q->clg.a4,
853 .p23 = q->clg.a5,
854 };
855
856 NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
857 break;
858 }
859 case CLG_GILB_ELL: {
860 struct tc_netem_gemodel ge = {
861 .p = q->clg.a1,
862 .r = q->clg.a2,
863 .h = q->clg.a3,
864 .k1 = q->clg.a4,
865 };
866
867 NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
868 break;
869 }
870 }
871
872 nla_nest_end(skb, nest);
873 return 0;
874
875 nla_put_failure:
876 nla_nest_cancel(skb, nest);
877 return -1;
878 }
879
880 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
881 {
882 const struct netem_sched_data *q = qdisc_priv(sch);
883 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
884 struct tc_netem_qopt qopt;
885 struct tc_netem_corr cor;
886 struct tc_netem_reorder reorder;
887 struct tc_netem_corrupt corrupt;
888 struct tc_netem_rate rate;
889
890 qopt.latency = q->latency;
891 qopt.jitter = q->jitter;
892 qopt.limit = q->limit;
893 qopt.loss = q->loss;
894 qopt.gap = q->gap;
895 qopt.duplicate = q->duplicate;
896 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
897
898 cor.delay_corr = q->delay_cor.rho;
899 cor.loss_corr = q->loss_cor.rho;
900 cor.dup_corr = q->dup_cor.rho;
901 NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
902
903 reorder.probability = q->reorder;
904 reorder.correlation = q->reorder_cor.rho;
905 NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
906
907 corrupt.probability = q->corrupt;
908 corrupt.correlation = q->corrupt_cor.rho;
909 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
910
911 rate.rate = q->rate;
912 NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
913
914 if (dump_loss_model(q, skb) != 0)
915 goto nla_put_failure;
916
917 return nla_nest_end(skb, nla);
918
919 nla_put_failure:
920 nlmsg_trim(skb, nla);
921 return -1;
922 }
923
924 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
925 struct sk_buff *skb, struct tcmsg *tcm)
926 {
927 struct netem_sched_data *q = qdisc_priv(sch);
928
929 if (cl != 1) /* only one class */
930 return -ENOENT;
931
932 tcm->tcm_handle |= TC_H_MIN(1);
933 tcm->tcm_info = q->qdisc->handle;
934
935 return 0;
936 }
937
938 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
939 struct Qdisc **old)
940 {
941 struct netem_sched_data *q = qdisc_priv(sch);
942
943 if (new == NULL)
944 new = &noop_qdisc;
945
946 sch_tree_lock(sch);
947 *old = q->qdisc;
948 q->qdisc = new;
949 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
950 qdisc_reset(*old);
951 sch_tree_unlock(sch);
952
953 return 0;
954 }
955
956 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
957 {
958 struct netem_sched_data *q = qdisc_priv(sch);
959 return q->qdisc;
960 }
961
962 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
963 {
964 return 1;
965 }
966
967 static void netem_put(struct Qdisc *sch, unsigned long arg)
968 {
969 }
970
971 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
972 {
973 if (!walker->stop) {
974 if (walker->count >= walker->skip)
975 if (walker->fn(sch, 1, walker) < 0) {
976 walker->stop = 1;
977 return;
978 }
979 walker->count++;
980 }
981 }
982
983 static const struct Qdisc_class_ops netem_class_ops = {
984 .graft = netem_graft,
985 .leaf = netem_leaf,
986 .get = netem_get,
987 .put = netem_put,
988 .walk = netem_walk,
989 .dump = netem_dump_class,
990 };
991
992 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
993 .id = "netem",
994 .cl_ops = &netem_class_ops,
995 .priv_size = sizeof(struct netem_sched_data),
996 .enqueue = netem_enqueue,
997 .dequeue = netem_dequeue,
998 .peek = qdisc_peek_dequeued,
999 .drop = netem_drop,
1000 .init = netem_init,
1001 .reset = netem_reset,
1002 .destroy = netem_destroy,
1003 .change = netem_change,
1004 .dump = netem_dump,
1005 .owner = THIS_MODULE,
1006 };
1007
1008
1009 static int __init netem_module_init(void)
1010 {
1011 pr_info("netem: version " VERSION "\n");
1012 return register_qdisc(&netem_qdisc_ops);
1013 }
1014 static void __exit netem_module_exit(void)
1015 {
1016 unregister_qdisc(&netem_qdisc_ops);
1017 }
1018 module_init(netem_module_init)
1019 module_exit(netem_module_exit)
1020 MODULE_LICENSE("GPL");
This page took 0.05146 seconds and 6 git commands to generate.