net: rcu-ify tcf_proto
[deliverable/linux.git] / net / sched / sch_choke.c
1 /*
2 * net/sched/sch_choke.c CHOKE scheduler
3 *
4 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation.
10 *
11 */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <linux/vmalloc.h>
18 #include <net/pkt_sched.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 #include <net/flow_keys.h>
22
23 /*
24 CHOKe stateless AQM for fair bandwidth allocation
25 =================================================
26
27 CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
28 unresponsive flows) is a variant of RED that penalizes misbehaving flows but
29 maintains no flow state. The difference from RED is an additional step
30 during the enqueuing process. If average queue size is over the
31 low threshold (qmin), a packet is chosen at random from the queue.
32 If both the new and chosen packet are from the same flow, both
33 are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
34 needs to access packets in queue randomly. It has a minimal class
35 interface to allow overriding the builtin flow classifier with
36 filters.
37
38 Source:
39 R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
40 Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
41 IEEE INFOCOM, 2000.
42
43 A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
44 Characteristics", IEEE/ACM Transactions on Networking, 2004
45
46 */
47
48 /* Upper bound on size of sk_buff table (packets) */
49 #define CHOKE_MAX_QUEUE (128*1024 - 1)
50
51 struct choke_sched_data {
52 /* Parameters */
53 u32 limit;
54 unsigned char flags;
55
56 struct red_parms parms;
57
58 /* Variables */
59 struct red_vars vars;
60 struct tcf_proto __rcu *filter_list;
61 struct {
62 u32 prob_drop; /* Early probability drops */
63 u32 prob_mark; /* Early probability marks */
64 u32 forced_drop; /* Forced drops, qavg > max_thresh */
65 u32 forced_mark; /* Forced marks, qavg > max_thresh */
66 u32 pdrop; /* Drops due to queue limits */
67 u32 other; /* Drops due to drop() calls */
68 u32 matched; /* Drops to flow match */
69 } stats;
70
71 unsigned int head;
72 unsigned int tail;
73
74 unsigned int tab_mask; /* size - 1 */
75
76 struct sk_buff **tab;
77 };
78
79 /* number of elements in queue including holes */
80 static unsigned int choke_len(const struct choke_sched_data *q)
81 {
82 return (q->tail - q->head) & q->tab_mask;
83 }
84
85 /* Is ECN parameter configured */
86 static int use_ecn(const struct choke_sched_data *q)
87 {
88 return q->flags & TC_RED_ECN;
89 }
90
91 /* Should packets over max just be dropped (versus marked) */
92 static int use_harddrop(const struct choke_sched_data *q)
93 {
94 return q->flags & TC_RED_HARDDROP;
95 }
96
97 /* Move head pointer forward to skip over holes */
98 static void choke_zap_head_holes(struct choke_sched_data *q)
99 {
100 do {
101 q->head = (q->head + 1) & q->tab_mask;
102 if (q->head == q->tail)
103 break;
104 } while (q->tab[q->head] == NULL);
105 }
106
107 /* Move tail pointer backwards to reuse holes */
108 static void choke_zap_tail_holes(struct choke_sched_data *q)
109 {
110 do {
111 q->tail = (q->tail - 1) & q->tab_mask;
112 if (q->head == q->tail)
113 break;
114 } while (q->tab[q->tail] == NULL);
115 }
116
117 /* Drop packet from queue array by creating a "hole" */
118 static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
119 {
120 struct choke_sched_data *q = qdisc_priv(sch);
121 struct sk_buff *skb = q->tab[idx];
122
123 q->tab[idx] = NULL;
124
125 if (idx == q->head)
126 choke_zap_head_holes(q);
127 if (idx == q->tail)
128 choke_zap_tail_holes(q);
129
130 sch->qstats.backlog -= qdisc_pkt_len(skb);
131 qdisc_drop(skb, sch);
132 qdisc_tree_decrease_qlen(sch, 1);
133 --sch->q.qlen;
134 }
135
136 struct choke_skb_cb {
137 u16 classid;
138 u8 keys_valid;
139 struct flow_keys keys;
140 };
141
142 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
143 {
144 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
145 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
146 }
147
148 static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
149 {
150 choke_skb_cb(skb)->classid = classid;
151 }
152
153 static u16 choke_get_classid(const struct sk_buff *skb)
154 {
155 return choke_skb_cb(skb)->classid;
156 }
157
158 /*
159 * Compare flow of two packets
160 * Returns true only if source and destination address and port match.
161 * false for special cases
162 */
163 static bool choke_match_flow(struct sk_buff *skb1,
164 struct sk_buff *skb2)
165 {
166 if (skb1->protocol != skb2->protocol)
167 return false;
168
169 if (!choke_skb_cb(skb1)->keys_valid) {
170 choke_skb_cb(skb1)->keys_valid = 1;
171 skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys);
172 }
173
174 if (!choke_skb_cb(skb2)->keys_valid) {
175 choke_skb_cb(skb2)->keys_valid = 1;
176 skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys);
177 }
178
179 return !memcmp(&choke_skb_cb(skb1)->keys,
180 &choke_skb_cb(skb2)->keys,
181 sizeof(struct flow_keys));
182 }
183
184 /*
185 * Classify flow using either:
186 * 1. pre-existing classification result in skb
187 * 2. fast internal classification
188 * 3. use TC filter based classification
189 */
190 static bool choke_classify(struct sk_buff *skb,
191 struct Qdisc *sch, int *qerr)
192
193 {
194 struct choke_sched_data *q = qdisc_priv(sch);
195 struct tcf_result res;
196 struct tcf_proto *fl;
197 int result;
198
199 fl = rcu_dereference_bh(q->filter_list);
200 result = tc_classify(skb, fl, &res);
201 if (result >= 0) {
202 #ifdef CONFIG_NET_CLS_ACT
203 switch (result) {
204 case TC_ACT_STOLEN:
205 case TC_ACT_QUEUED:
206 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
207 case TC_ACT_SHOT:
208 return false;
209 }
210 #endif
211 choke_set_classid(skb, TC_H_MIN(res.classid));
212 return true;
213 }
214
215 return false;
216 }
217
218 /*
219 * Select a packet at random from queue
220 * HACK: since queue can have holes from previous deletion; retry several
221 * times to find a random skb but then just give up and return the head
222 * Will return NULL if queue is empty (q->head == q->tail)
223 */
224 static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
225 unsigned int *pidx)
226 {
227 struct sk_buff *skb;
228 int retrys = 3;
229
230 do {
231 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
232 skb = q->tab[*pidx];
233 if (skb)
234 return skb;
235 } while (--retrys > 0);
236
237 return q->tab[*pidx = q->head];
238 }
239
240 /*
241 * Compare new packet with random packet in queue
242 * returns true if matched and sets *pidx
243 */
244 static bool choke_match_random(const struct choke_sched_data *q,
245 struct sk_buff *nskb,
246 unsigned int *pidx)
247 {
248 struct sk_buff *oskb;
249
250 if (q->head == q->tail)
251 return false;
252
253 oskb = choke_peek_random(q, pidx);
254 if (rcu_access_pointer(q->filter_list))
255 return choke_get_classid(nskb) == choke_get_classid(oskb);
256
257 return choke_match_flow(oskb, nskb);
258 }
259
260 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
261 {
262 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
263 struct choke_sched_data *q = qdisc_priv(sch);
264 const struct red_parms *p = &q->parms;
265
266 if (rcu_access_pointer(q->filter_list)) {
267 /* If using external classifiers, get result and record it. */
268 if (!choke_classify(skb, sch, &ret))
269 goto other_drop; /* Packet was eaten by filter */
270 }
271
272 choke_skb_cb(skb)->keys_valid = 0;
273 /* Compute average queue usage (see RED) */
274 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
275 if (red_is_idling(&q->vars))
276 red_end_of_idle_period(&q->vars);
277
278 /* Is queue small? */
279 if (q->vars.qavg <= p->qth_min)
280 q->vars.qcount = -1;
281 else {
282 unsigned int idx;
283
284 /* Draw a packet at random from queue and compare flow */
285 if (choke_match_random(q, skb, &idx)) {
286 q->stats.matched++;
287 choke_drop_by_idx(sch, idx);
288 goto congestion_drop;
289 }
290
291 /* Queue is large, always mark/drop */
292 if (q->vars.qavg > p->qth_max) {
293 q->vars.qcount = -1;
294
295 sch->qstats.overlimits++;
296 if (use_harddrop(q) || !use_ecn(q) ||
297 !INET_ECN_set_ce(skb)) {
298 q->stats.forced_drop++;
299 goto congestion_drop;
300 }
301
302 q->stats.forced_mark++;
303 } else if (++q->vars.qcount) {
304 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
305 q->vars.qcount = 0;
306 q->vars.qR = red_random(p);
307
308 sch->qstats.overlimits++;
309 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
310 q->stats.prob_drop++;
311 goto congestion_drop;
312 }
313
314 q->stats.prob_mark++;
315 }
316 } else
317 q->vars.qR = red_random(p);
318 }
319
320 /* Admit new packet */
321 if (sch->q.qlen < q->limit) {
322 q->tab[q->tail] = skb;
323 q->tail = (q->tail + 1) & q->tab_mask;
324 ++sch->q.qlen;
325 sch->qstats.backlog += qdisc_pkt_len(skb);
326 return NET_XMIT_SUCCESS;
327 }
328
329 q->stats.pdrop++;
330 return qdisc_drop(skb, sch);
331
332 congestion_drop:
333 qdisc_drop(skb, sch);
334 return NET_XMIT_CN;
335
336 other_drop:
337 if (ret & __NET_XMIT_BYPASS)
338 sch->qstats.drops++;
339 kfree_skb(skb);
340 return ret;
341 }
342
343 static struct sk_buff *choke_dequeue(struct Qdisc *sch)
344 {
345 struct choke_sched_data *q = qdisc_priv(sch);
346 struct sk_buff *skb;
347
348 if (q->head == q->tail) {
349 if (!red_is_idling(&q->vars))
350 red_start_of_idle_period(&q->vars);
351 return NULL;
352 }
353
354 skb = q->tab[q->head];
355 q->tab[q->head] = NULL;
356 choke_zap_head_holes(q);
357 --sch->q.qlen;
358 sch->qstats.backlog -= qdisc_pkt_len(skb);
359 qdisc_bstats_update(sch, skb);
360
361 return skb;
362 }
363
364 static unsigned int choke_drop(struct Qdisc *sch)
365 {
366 struct choke_sched_data *q = qdisc_priv(sch);
367 unsigned int len;
368
369 len = qdisc_queue_drop(sch);
370 if (len > 0)
371 q->stats.other++;
372 else {
373 if (!red_is_idling(&q->vars))
374 red_start_of_idle_period(&q->vars);
375 }
376
377 return len;
378 }
379
380 static void choke_reset(struct Qdisc *sch)
381 {
382 struct choke_sched_data *q = qdisc_priv(sch);
383
384 red_restart(&q->vars);
385 }
386
387 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
388 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
389 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
390 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
391 };
392
393
394 static void choke_free(void *addr)
395 {
396 kvfree(addr);
397 }
398
399 static int choke_change(struct Qdisc *sch, struct nlattr *opt)
400 {
401 struct choke_sched_data *q = qdisc_priv(sch);
402 struct nlattr *tb[TCA_CHOKE_MAX + 1];
403 const struct tc_red_qopt *ctl;
404 int err;
405 struct sk_buff **old = NULL;
406 unsigned int mask;
407 u32 max_P;
408
409 if (opt == NULL)
410 return -EINVAL;
411
412 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
413 if (err < 0)
414 return err;
415
416 if (tb[TCA_CHOKE_PARMS] == NULL ||
417 tb[TCA_CHOKE_STAB] == NULL)
418 return -EINVAL;
419
420 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
421
422 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
423
424 if (ctl->limit > CHOKE_MAX_QUEUE)
425 return -EINVAL;
426
427 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
428 if (mask != q->tab_mask) {
429 struct sk_buff **ntab;
430
431 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
432 GFP_KERNEL | __GFP_NOWARN);
433 if (!ntab)
434 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
435 if (!ntab)
436 return -ENOMEM;
437
438 sch_tree_lock(sch);
439 old = q->tab;
440 if (old) {
441 unsigned int oqlen = sch->q.qlen, tail = 0;
442
443 while (q->head != q->tail) {
444 struct sk_buff *skb = q->tab[q->head];
445
446 q->head = (q->head + 1) & q->tab_mask;
447 if (!skb)
448 continue;
449 if (tail < mask) {
450 ntab[tail++] = skb;
451 continue;
452 }
453 sch->qstats.backlog -= qdisc_pkt_len(skb);
454 --sch->q.qlen;
455 qdisc_drop(skb, sch);
456 }
457 qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
458 q->head = 0;
459 q->tail = tail;
460 }
461
462 q->tab_mask = mask;
463 q->tab = ntab;
464 } else
465 sch_tree_lock(sch);
466
467 q->flags = ctl->flags;
468 q->limit = ctl->limit;
469
470 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
471 ctl->Plog, ctl->Scell_log,
472 nla_data(tb[TCA_CHOKE_STAB]),
473 max_P);
474 red_set_vars(&q->vars);
475
476 if (q->head == q->tail)
477 red_end_of_idle_period(&q->vars);
478
479 sch_tree_unlock(sch);
480 choke_free(old);
481 return 0;
482 }
483
484 static int choke_init(struct Qdisc *sch, struct nlattr *opt)
485 {
486 return choke_change(sch, opt);
487 }
488
489 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
490 {
491 struct choke_sched_data *q = qdisc_priv(sch);
492 struct nlattr *opts = NULL;
493 struct tc_red_qopt opt = {
494 .limit = q->limit,
495 .flags = q->flags,
496 .qth_min = q->parms.qth_min >> q->parms.Wlog,
497 .qth_max = q->parms.qth_max >> q->parms.Wlog,
498 .Wlog = q->parms.Wlog,
499 .Plog = q->parms.Plog,
500 .Scell_log = q->parms.Scell_log,
501 };
502
503 opts = nla_nest_start(skb, TCA_OPTIONS);
504 if (opts == NULL)
505 goto nla_put_failure;
506
507 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
508 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
509 goto nla_put_failure;
510 return nla_nest_end(skb, opts);
511
512 nla_put_failure:
513 nla_nest_cancel(skb, opts);
514 return -EMSGSIZE;
515 }
516
517 static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
518 {
519 struct choke_sched_data *q = qdisc_priv(sch);
520 struct tc_choke_xstats st = {
521 .early = q->stats.prob_drop + q->stats.forced_drop,
522 .marked = q->stats.prob_mark + q->stats.forced_mark,
523 .pdrop = q->stats.pdrop,
524 .other = q->stats.other,
525 .matched = q->stats.matched,
526 };
527
528 return gnet_stats_copy_app(d, &st, sizeof(st));
529 }
530
531 static void choke_destroy(struct Qdisc *sch)
532 {
533 struct choke_sched_data *q = qdisc_priv(sch);
534
535 tcf_destroy_chain(&q->filter_list);
536 choke_free(q->tab);
537 }
538
539 static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
540 {
541 return NULL;
542 }
543
544 static unsigned long choke_get(struct Qdisc *sch, u32 classid)
545 {
546 return 0;
547 }
548
549 static void choke_put(struct Qdisc *q, unsigned long cl)
550 {
551 }
552
553 static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
554 u32 classid)
555 {
556 return 0;
557 }
558
559 static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch,
560 unsigned long cl)
561 {
562 struct choke_sched_data *q = qdisc_priv(sch);
563
564 if (cl)
565 return NULL;
566 return &q->filter_list;
567 }
568
569 static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
570 struct sk_buff *skb, struct tcmsg *tcm)
571 {
572 tcm->tcm_handle |= TC_H_MIN(cl);
573 return 0;
574 }
575
576 static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
577 {
578 if (!arg->stop) {
579 if (arg->fn(sch, 1, arg) < 0) {
580 arg->stop = 1;
581 return;
582 }
583 arg->count++;
584 }
585 }
586
587 static const struct Qdisc_class_ops choke_class_ops = {
588 .leaf = choke_leaf,
589 .get = choke_get,
590 .put = choke_put,
591 .tcf_chain = choke_find_tcf,
592 .bind_tcf = choke_bind,
593 .unbind_tcf = choke_put,
594 .dump = choke_dump_class,
595 .walk = choke_walk,
596 };
597
598 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
599 {
600 struct choke_sched_data *q = qdisc_priv(sch);
601
602 return (q->head != q->tail) ? q->tab[q->head] : NULL;
603 }
604
605 static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
606 .id = "choke",
607 .priv_size = sizeof(struct choke_sched_data),
608
609 .enqueue = choke_enqueue,
610 .dequeue = choke_dequeue,
611 .peek = choke_peek_head,
612 .drop = choke_drop,
613 .init = choke_init,
614 .destroy = choke_destroy,
615 .reset = choke_reset,
616 .change = choke_change,
617 .dump = choke_dump,
618 .dump_stats = choke_dump_stats,
619 .owner = THIS_MODULE,
620 };
621
622 static int __init choke_module_init(void)
623 {
624 return register_qdisc(&choke_qdisc_ops);
625 }
626
627 static void __exit choke_module_exit(void)
628 {
629 unregister_qdisc(&choke_qdisc_ops);
630 }
631
632 module_init(choke_module_init)
633 module_exit(choke_module_exit)
634
635 MODULE_LICENSE("GPL");
This page took 0.060489 seconds and 5 git commands to generate.