Commit | Line | Data |
---|---|---|
aab03e05 DF |
1 | /* |
2 | * Deadline Scheduling Class (SCHED_DEADLINE) | |
3 | * | |
4 | * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS). | |
5 | * | |
6 | * Tasks that periodically executes their instances for less than their | |
7 | * runtime won't miss any of their deadlines. | |
8 | * Tasks that are not periodic or sporadic or that tries to execute more | |
9 | * than their reserved bandwidth will be slowed down (and may potentially | |
10 | * miss some of their deadlines), and won't affect any other task. | |
11 | * | |
12 | * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>, | |
1baca4ce | 13 | * Juri Lelli <juri.lelli@gmail.com>, |
aab03e05 DF |
14 | * Michael Trimarchi <michael@amarulasolutions.com>, |
15 | * Fabio Checconi <fchecconi@gmail.com> | |
16 | */ | |
17 | #include "sched.h" | |
18 | ||
19 | static inline int dl_time_before(u64 a, u64 b) | |
20 | { | |
21 | return (s64)(a - b) < 0; | |
22 | } | |
23 | ||
1baca4ce JL |
24 | /* |
25 | * Tells if entity @a should preempt entity @b. | |
26 | */ | |
27 | static inline | |
28 | int dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) | |
29 | { | |
30 | return dl_time_before(a->deadline, b->deadline); | |
31 | } | |
32 | ||
aab03e05 DF |
33 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) |
34 | { | |
35 | return container_of(dl_se, struct task_struct, dl); | |
36 | } | |
37 | ||
38 | static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq) | |
39 | { | |
40 | return container_of(dl_rq, struct rq, dl); | |
41 | } | |
42 | ||
43 | static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se) | |
44 | { | |
45 | struct task_struct *p = dl_task_of(dl_se); | |
46 | struct rq *rq = task_rq(p); | |
47 | ||
48 | return &rq->dl; | |
49 | } | |
50 | ||
51 | static inline int on_dl_rq(struct sched_dl_entity *dl_se) | |
52 | { | |
53 | return !RB_EMPTY_NODE(&dl_se->rb_node); | |
54 | } | |
55 | ||
56 | static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) | |
57 | { | |
58 | struct sched_dl_entity *dl_se = &p->dl; | |
59 | ||
60 | return dl_rq->rb_leftmost == &dl_se->rb_node; | |
61 | } | |
62 | ||
63 | void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq) | |
64 | { | |
65 | dl_rq->rb_root = RB_ROOT; | |
1baca4ce JL |
66 | |
67 | #ifdef CONFIG_SMP | |
68 | /* zero means no -deadline tasks */ | |
69 | dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0; | |
70 | ||
71 | dl_rq->dl_nr_migratory = 0; | |
72 | dl_rq->overloaded = 0; | |
73 | dl_rq->pushable_dl_tasks_root = RB_ROOT; | |
74 | #endif | |
75 | } | |
76 | ||
77 | #ifdef CONFIG_SMP | |
78 | ||
79 | static inline int dl_overloaded(struct rq *rq) | |
80 | { | |
81 | return atomic_read(&rq->rd->dlo_count); | |
82 | } | |
83 | ||
84 | static inline void dl_set_overload(struct rq *rq) | |
85 | { | |
86 | if (!rq->online) | |
87 | return; | |
88 | ||
89 | cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask); | |
90 | /* | |
91 | * Must be visible before the overload count is | |
92 | * set (as in sched_rt.c). | |
93 | * | |
94 | * Matched by the barrier in pull_dl_task(). | |
95 | */ | |
96 | smp_wmb(); | |
97 | atomic_inc(&rq->rd->dlo_count); | |
98 | } | |
99 | ||
100 | static inline void dl_clear_overload(struct rq *rq) | |
101 | { | |
102 | if (!rq->online) | |
103 | return; | |
104 | ||
105 | atomic_dec(&rq->rd->dlo_count); | |
106 | cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask); | |
107 | } | |
108 | ||
109 | static void update_dl_migration(struct dl_rq *dl_rq) | |
110 | { | |
111 | if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_total > 1) { | |
112 | if (!dl_rq->overloaded) { | |
113 | dl_set_overload(rq_of_dl_rq(dl_rq)); | |
114 | dl_rq->overloaded = 1; | |
115 | } | |
116 | } else if (dl_rq->overloaded) { | |
117 | dl_clear_overload(rq_of_dl_rq(dl_rq)); | |
118 | dl_rq->overloaded = 0; | |
119 | } | |
120 | } | |
121 | ||
122 | static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
123 | { | |
124 | struct task_struct *p = dl_task_of(dl_se); | |
125 | dl_rq = &rq_of_dl_rq(dl_rq)->dl; | |
126 | ||
127 | dl_rq->dl_nr_total++; | |
128 | if (p->nr_cpus_allowed > 1) | |
129 | dl_rq->dl_nr_migratory++; | |
130 | ||
131 | update_dl_migration(dl_rq); | |
132 | } | |
133 | ||
134 | static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
135 | { | |
136 | struct task_struct *p = dl_task_of(dl_se); | |
137 | dl_rq = &rq_of_dl_rq(dl_rq)->dl; | |
138 | ||
139 | dl_rq->dl_nr_total--; | |
140 | if (p->nr_cpus_allowed > 1) | |
141 | dl_rq->dl_nr_migratory--; | |
142 | ||
143 | update_dl_migration(dl_rq); | |
144 | } | |
145 | ||
146 | /* | |
147 | * The list of pushable -deadline task is not a plist, like in | |
148 | * sched_rt.c, it is an rb-tree with tasks ordered by deadline. | |
149 | */ | |
150 | static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) | |
151 | { | |
152 | struct dl_rq *dl_rq = &rq->dl; | |
153 | struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node; | |
154 | struct rb_node *parent = NULL; | |
155 | struct task_struct *entry; | |
156 | int leftmost = 1; | |
157 | ||
158 | BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); | |
159 | ||
160 | while (*link) { | |
161 | parent = *link; | |
162 | entry = rb_entry(parent, struct task_struct, | |
163 | pushable_dl_tasks); | |
164 | if (dl_entity_preempt(&p->dl, &entry->dl)) | |
165 | link = &parent->rb_left; | |
166 | else { | |
167 | link = &parent->rb_right; | |
168 | leftmost = 0; | |
169 | } | |
170 | } | |
171 | ||
172 | if (leftmost) | |
173 | dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks; | |
174 | ||
175 | rb_link_node(&p->pushable_dl_tasks, parent, link); | |
176 | rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); | |
aab03e05 DF |
177 | } |
178 | ||
1baca4ce JL |
179 | static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) |
180 | { | |
181 | struct dl_rq *dl_rq = &rq->dl; | |
182 | ||
183 | if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) | |
184 | return; | |
185 | ||
186 | if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) { | |
187 | struct rb_node *next_node; | |
188 | ||
189 | next_node = rb_next(&p->pushable_dl_tasks); | |
190 | dl_rq->pushable_dl_tasks_leftmost = next_node; | |
191 | } | |
192 | ||
193 | rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); | |
194 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | |
195 | } | |
196 | ||
197 | static inline int has_pushable_dl_tasks(struct rq *rq) | |
198 | { | |
199 | return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root); | |
200 | } | |
201 | ||
202 | static int push_dl_task(struct rq *rq); | |
203 | ||
204 | #else | |
205 | ||
206 | static inline | |
207 | void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) | |
208 | { | |
209 | } | |
210 | ||
211 | static inline | |
212 | void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) | |
213 | { | |
214 | } | |
215 | ||
216 | static inline | |
217 | void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
218 | { | |
219 | } | |
220 | ||
221 | static inline | |
222 | void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
223 | { | |
224 | } | |
225 | ||
226 | #endif /* CONFIG_SMP */ | |
227 | ||
aab03e05 DF |
228 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); |
229 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags); | |
230 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | |
231 | int flags); | |
232 | ||
233 | /* | |
234 | * We are being explicitly informed that a new instance is starting, | |
235 | * and this means that: | |
236 | * - the absolute deadline of the entity has to be placed at | |
237 | * current time + relative deadline; | |
238 | * - the runtime of the entity has to be set to the maximum value. | |
239 | * | |
240 | * The capability of specifying such event is useful whenever a -deadline | |
241 | * entity wants to (try to!) synchronize its behaviour with the scheduler's | |
242 | * one, and to (try to!) reconcile itself with its own scheduling | |
243 | * parameters. | |
244 | */ | |
245 | static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) | |
246 | { | |
247 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
248 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
249 | ||
250 | WARN_ON(!dl_se->dl_new || dl_se->dl_throttled); | |
251 | ||
252 | /* | |
253 | * We use the regular wall clock time to set deadlines in the | |
254 | * future; in fact, we must consider execution overheads (time | |
255 | * spent on hardirq context, etc.). | |
256 | */ | |
257 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | |
258 | dl_se->runtime = dl_se->dl_runtime; | |
259 | dl_se->dl_new = 0; | |
260 | } | |
261 | ||
262 | /* | |
263 | * Pure Earliest Deadline First (EDF) scheduling does not deal with the | |
264 | * possibility of a entity lasting more than what it declared, and thus | |
265 | * exhausting its runtime. | |
266 | * | |
267 | * Here we are interested in making runtime overrun possible, but we do | |
268 | * not want a entity which is misbehaving to affect the scheduling of all | |
269 | * other entities. | |
270 | * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS) | |
271 | * is used, in order to confine each entity within its own bandwidth. | |
272 | * | |
273 | * This function deals exactly with that, and ensures that when the runtime | |
274 | * of a entity is replenished, its deadline is also postponed. That ensures | |
275 | * the overrunning entity can't interfere with other entity in the system and | |
276 | * can't make them miss their deadlines. Reasons why this kind of overruns | |
277 | * could happen are, typically, a entity voluntarily trying to overcome its | |
278 | * runtime, or it just underestimated it during sched_setscheduler_ex(). | |
279 | */ | |
280 | static void replenish_dl_entity(struct sched_dl_entity *dl_se) | |
281 | { | |
282 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
283 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
284 | ||
285 | /* | |
286 | * We keep moving the deadline away until we get some | |
287 | * available runtime for the entity. This ensures correct | |
288 | * handling of situations where the runtime overrun is | |
289 | * arbitrary large. | |
290 | */ | |
291 | while (dl_se->runtime <= 0) { | |
292 | dl_se->deadline += dl_se->dl_deadline; | |
293 | dl_se->runtime += dl_se->dl_runtime; | |
294 | } | |
295 | ||
296 | /* | |
297 | * At this point, the deadline really should be "in | |
298 | * the future" with respect to rq->clock. If it's | |
299 | * not, we are, for some reason, lagging too much! | |
300 | * Anyway, after having warn userspace abut that, | |
301 | * we still try to keep the things running by | |
302 | * resetting the deadline and the budget of the | |
303 | * entity. | |
304 | */ | |
305 | if (dl_time_before(dl_se->deadline, rq_clock(rq))) { | |
306 | static bool lag_once = false; | |
307 | ||
308 | if (!lag_once) { | |
309 | lag_once = true; | |
310 | printk_sched("sched: DL replenish lagged to much\n"); | |
311 | } | |
312 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | |
313 | dl_se->runtime = dl_se->dl_runtime; | |
314 | } | |
315 | } | |
316 | ||
317 | /* | |
318 | * Here we check if --at time t-- an entity (which is probably being | |
319 | * [re]activated or, in general, enqueued) can use its remaining runtime | |
320 | * and its current deadline _without_ exceeding the bandwidth it is | |
321 | * assigned (function returns true if it can't). We are in fact applying | |
322 | * one of the CBS rules: when a task wakes up, if the residual runtime | |
323 | * over residual deadline fits within the allocated bandwidth, then we | |
324 | * can keep the current (absolute) deadline and residual budget without | |
325 | * disrupting the schedulability of the system. Otherwise, we should | |
326 | * refill the runtime and set the deadline a period in the future, | |
327 | * because keeping the current (absolute) deadline of the task would | |
328 | * result in breaking guarantees promised to other tasks. | |
329 | * | |
330 | * This function returns true if: | |
331 | * | |
332 | * runtime / (deadline - t) > dl_runtime / dl_deadline , | |
333 | * | |
334 | * IOW we can't recycle current parameters. | |
335 | */ | |
336 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) | |
337 | { | |
338 | u64 left, right; | |
339 | ||
340 | /* | |
341 | * left and right are the two sides of the equation above, | |
342 | * after a bit of shuffling to use multiplications instead | |
343 | * of divisions. | |
344 | * | |
345 | * Note that none of the time values involved in the two | |
346 | * multiplications are absolute: dl_deadline and dl_runtime | |
347 | * are the relative deadline and the maximum runtime of each | |
348 | * instance, runtime is the runtime left for the last instance | |
349 | * and (deadline - t), since t is rq->clock, is the time left | |
350 | * to the (absolute) deadline. Even if overflowing the u64 type | |
351 | * is very unlikely to occur in both cases, here we scale down | |
352 | * as we want to avoid that risk at all. Scaling down by 10 | |
353 | * means that we reduce granularity to 1us. We are fine with it, | |
354 | * since this is only a true/false check and, anyway, thinking | |
355 | * of anything below microseconds resolution is actually fiction | |
356 | * (but still we want to give the user that illusion >;). | |
357 | */ | |
358 | left = (dl_se->dl_deadline >> 10) * (dl_se->runtime >> 10); | |
359 | right = ((dl_se->deadline - t) >> 10) * (dl_se->dl_runtime >> 10); | |
360 | ||
361 | return dl_time_before(right, left); | |
362 | } | |
363 | ||
364 | /* | |
365 | * When a -deadline entity is queued back on the runqueue, its runtime and | |
366 | * deadline might need updating. | |
367 | * | |
368 | * The policy here is that we update the deadline of the entity only if: | |
369 | * - the current deadline is in the past, | |
370 | * - using the remaining runtime with the current deadline would make | |
371 | * the entity exceed its bandwidth. | |
372 | */ | |
373 | static void update_dl_entity(struct sched_dl_entity *dl_se) | |
374 | { | |
375 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
376 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
377 | ||
378 | /* | |
379 | * The arrival of a new instance needs special treatment, i.e., | |
380 | * the actual scheduling parameters have to be "renewed". | |
381 | */ | |
382 | if (dl_se->dl_new) { | |
383 | setup_new_dl_entity(dl_se); | |
384 | return; | |
385 | } | |
386 | ||
387 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) || | |
388 | dl_entity_overflow(dl_se, rq_clock(rq))) { | |
389 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | |
390 | dl_se->runtime = dl_se->dl_runtime; | |
391 | } | |
392 | } | |
393 | ||
394 | /* | |
395 | * If the entity depleted all its runtime, and if we want it to sleep | |
396 | * while waiting for some new execution time to become available, we | |
397 | * set the bandwidth enforcement timer to the replenishment instant | |
398 | * and try to activate it. | |
399 | * | |
400 | * Notice that it is important for the caller to know if the timer | |
401 | * actually started or not (i.e., the replenishment instant is in | |
402 | * the future or in the past). | |
403 | */ | |
404 | static int start_dl_timer(struct sched_dl_entity *dl_se) | |
405 | { | |
406 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
407 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
408 | ktime_t now, act; | |
409 | ktime_t soft, hard; | |
410 | unsigned long range; | |
411 | s64 delta; | |
412 | ||
413 | /* | |
414 | * We want the timer to fire at the deadline, but considering | |
415 | * that it is actually coming from rq->clock and not from | |
416 | * hrtimer's time base reading. | |
417 | */ | |
418 | act = ns_to_ktime(dl_se->deadline); | |
419 | now = hrtimer_cb_get_time(&dl_se->dl_timer); | |
420 | delta = ktime_to_ns(now) - rq_clock(rq); | |
421 | act = ktime_add_ns(act, delta); | |
422 | ||
423 | /* | |
424 | * If the expiry time already passed, e.g., because the value | |
425 | * chosen as the deadline is too small, don't even try to | |
426 | * start the timer in the past! | |
427 | */ | |
428 | if (ktime_us_delta(act, now) < 0) | |
429 | return 0; | |
430 | ||
431 | hrtimer_set_expires(&dl_se->dl_timer, act); | |
432 | ||
433 | soft = hrtimer_get_softexpires(&dl_se->dl_timer); | |
434 | hard = hrtimer_get_expires(&dl_se->dl_timer); | |
435 | range = ktime_to_ns(ktime_sub(hard, soft)); | |
436 | __hrtimer_start_range_ns(&dl_se->dl_timer, soft, | |
437 | range, HRTIMER_MODE_ABS, 0); | |
438 | ||
439 | return hrtimer_active(&dl_se->dl_timer); | |
440 | } | |
441 | ||
442 | /* | |
443 | * This is the bandwidth enforcement timer callback. If here, we know | |
444 | * a task is not on its dl_rq, since the fact that the timer was running | |
445 | * means the task is throttled and needs a runtime replenishment. | |
446 | * | |
447 | * However, what we actually do depends on the fact the task is active, | |
448 | * (it is on its rq) or has been removed from there by a call to | |
449 | * dequeue_task_dl(). In the former case we must issue the runtime | |
450 | * replenishment and add the task back to the dl_rq; in the latter, we just | |
451 | * do nothing but clearing dl_throttled, so that runtime and deadline | |
452 | * updating (and the queueing back to dl_rq) will be done by the | |
453 | * next call to enqueue_task_dl(). | |
454 | */ | |
455 | static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |
456 | { | |
457 | struct sched_dl_entity *dl_se = container_of(timer, | |
458 | struct sched_dl_entity, | |
459 | dl_timer); | |
460 | struct task_struct *p = dl_task_of(dl_se); | |
461 | struct rq *rq = task_rq(p); | |
462 | raw_spin_lock(&rq->lock); | |
463 | ||
464 | /* | |
465 | * We need to take care of a possible races here. In fact, the | |
466 | * task might have changed its scheduling policy to something | |
467 | * different from SCHED_DEADLINE or changed its reservation | |
468 | * parameters (through sched_setscheduler()). | |
469 | */ | |
470 | if (!dl_task(p) || dl_se->dl_new) | |
471 | goto unlock; | |
472 | ||
473 | sched_clock_tick(); | |
474 | update_rq_clock(rq); | |
475 | dl_se->dl_throttled = 0; | |
476 | if (p->on_rq) { | |
477 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | |
478 | if (task_has_dl_policy(rq->curr)) | |
479 | check_preempt_curr_dl(rq, p, 0); | |
480 | else | |
481 | resched_task(rq->curr); | |
1baca4ce JL |
482 | #ifdef CONFIG_SMP |
483 | /* | |
484 | * Queueing this task back might have overloaded rq, | |
485 | * check if we need to kick someone away. | |
486 | */ | |
487 | if (has_pushable_dl_tasks(rq)) | |
488 | push_dl_task(rq); | |
489 | #endif | |
aab03e05 DF |
490 | } |
491 | unlock: | |
492 | raw_spin_unlock(&rq->lock); | |
493 | ||
494 | return HRTIMER_NORESTART; | |
495 | } | |
496 | ||
497 | void init_dl_task_timer(struct sched_dl_entity *dl_se) | |
498 | { | |
499 | struct hrtimer *timer = &dl_se->dl_timer; | |
500 | ||
501 | if (hrtimer_active(timer)) { | |
502 | hrtimer_try_to_cancel(timer); | |
503 | return; | |
504 | } | |
505 | ||
506 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
507 | timer->function = dl_task_timer; | |
508 | } | |
509 | ||
510 | static | |
511 | int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) | |
512 | { | |
513 | int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq)); | |
514 | int rorun = dl_se->runtime <= 0; | |
515 | ||
516 | if (!rorun && !dmiss) | |
517 | return 0; | |
518 | ||
519 | /* | |
520 | * If we are beyond our current deadline and we are still | |
521 | * executing, then we have already used some of the runtime of | |
522 | * the next instance. Thus, if we do not account that, we are | |
523 | * stealing bandwidth from the system at each deadline miss! | |
524 | */ | |
525 | if (dmiss) { | |
526 | dl_se->runtime = rorun ? dl_se->runtime : 0; | |
527 | dl_se->runtime -= rq_clock(rq) - dl_se->deadline; | |
528 | } | |
529 | ||
530 | return 1; | |
531 | } | |
532 | ||
533 | /* | |
534 | * Update the current task's runtime statistics (provided it is still | |
535 | * a -deadline task and has not been removed from the dl_rq). | |
536 | */ | |
537 | static void update_curr_dl(struct rq *rq) | |
538 | { | |
539 | struct task_struct *curr = rq->curr; | |
540 | struct sched_dl_entity *dl_se = &curr->dl; | |
541 | u64 delta_exec; | |
542 | ||
543 | if (!dl_task(curr) || !on_dl_rq(dl_se)) | |
544 | return; | |
545 | ||
546 | /* | |
547 | * Consumed budget is computed considering the time as | |
548 | * observed by schedulable tasks (excluding time spent | |
549 | * in hardirq context, etc.). Deadlines are instead | |
550 | * computed using hard walltime. This seems to be the more | |
551 | * natural solution, but the full ramifications of this | |
552 | * approach need further study. | |
553 | */ | |
554 | delta_exec = rq_clock_task(rq) - curr->se.exec_start; | |
555 | if (unlikely((s64)delta_exec < 0)) | |
556 | delta_exec = 0; | |
557 | ||
558 | schedstat_set(curr->se.statistics.exec_max, | |
559 | max(curr->se.statistics.exec_max, delta_exec)); | |
560 | ||
561 | curr->se.sum_exec_runtime += delta_exec; | |
562 | account_group_exec_runtime(curr, delta_exec); | |
563 | ||
564 | curr->se.exec_start = rq_clock_task(rq); | |
565 | cpuacct_charge(curr, delta_exec); | |
566 | ||
239be4a9 DF |
567 | sched_rt_avg_update(rq, delta_exec); |
568 | ||
aab03e05 DF |
569 | dl_se->runtime -= delta_exec; |
570 | if (dl_runtime_exceeded(rq, dl_se)) { | |
571 | __dequeue_task_dl(rq, curr, 0); | |
572 | if (likely(start_dl_timer(dl_se))) | |
573 | dl_se->dl_throttled = 1; | |
574 | else | |
575 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | |
576 | ||
577 | if (!is_leftmost(curr, &rq->dl)) | |
578 | resched_task(curr); | |
579 | } | |
580 | } | |
581 | ||
1baca4ce JL |
582 | #ifdef CONFIG_SMP |
583 | ||
584 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu); | |
585 | ||
586 | static inline u64 next_deadline(struct rq *rq) | |
587 | { | |
588 | struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu); | |
589 | ||
590 | if (next && dl_prio(next->prio)) | |
591 | return next->dl.deadline; | |
592 | else | |
593 | return 0; | |
594 | } | |
595 | ||
596 | static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | |
597 | { | |
598 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
599 | ||
600 | if (dl_rq->earliest_dl.curr == 0 || | |
601 | dl_time_before(deadline, dl_rq->earliest_dl.curr)) { | |
602 | /* | |
603 | * If the dl_rq had no -deadline tasks, or if the new task | |
604 | * has shorter deadline than the current one on dl_rq, we | |
605 | * know that the previous earliest becomes our next earliest, | |
606 | * as the new task becomes the earliest itself. | |
607 | */ | |
608 | dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; | |
609 | dl_rq->earliest_dl.curr = deadline; | |
610 | } else if (dl_rq->earliest_dl.next == 0 || | |
611 | dl_time_before(deadline, dl_rq->earliest_dl.next)) { | |
612 | /* | |
613 | * On the other hand, if the new -deadline task has a | |
614 | * a later deadline than the earliest one on dl_rq, but | |
615 | * it is earlier than the next (if any), we must | |
616 | * recompute the next-earliest. | |
617 | */ | |
618 | dl_rq->earliest_dl.next = next_deadline(rq); | |
619 | } | |
620 | } | |
621 | ||
622 | static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | |
623 | { | |
624 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
625 | ||
626 | /* | |
627 | * Since we may have removed our earliest (and/or next earliest) | |
628 | * task we must recompute them. | |
629 | */ | |
630 | if (!dl_rq->dl_nr_running) { | |
631 | dl_rq->earliest_dl.curr = 0; | |
632 | dl_rq->earliest_dl.next = 0; | |
633 | } else { | |
634 | struct rb_node *leftmost = dl_rq->rb_leftmost; | |
635 | struct sched_dl_entity *entry; | |
636 | ||
637 | entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); | |
638 | dl_rq->earliest_dl.curr = entry->deadline; | |
639 | dl_rq->earliest_dl.next = next_deadline(rq); | |
640 | } | |
641 | } | |
642 | ||
643 | #else | |
644 | ||
645 | static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} | |
646 | static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} | |
647 | ||
648 | #endif /* CONFIG_SMP */ | |
649 | ||
650 | static inline | |
651 | void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
652 | { | |
653 | int prio = dl_task_of(dl_se)->prio; | |
654 | u64 deadline = dl_se->deadline; | |
655 | ||
656 | WARN_ON(!dl_prio(prio)); | |
657 | dl_rq->dl_nr_running++; | |
658 | ||
659 | inc_dl_deadline(dl_rq, deadline); | |
660 | inc_dl_migration(dl_se, dl_rq); | |
661 | } | |
662 | ||
663 | static inline | |
664 | void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
665 | { | |
666 | int prio = dl_task_of(dl_se)->prio; | |
667 | ||
668 | WARN_ON(!dl_prio(prio)); | |
669 | WARN_ON(!dl_rq->dl_nr_running); | |
670 | dl_rq->dl_nr_running--; | |
671 | ||
672 | dec_dl_deadline(dl_rq, dl_se->deadline); | |
673 | dec_dl_migration(dl_se, dl_rq); | |
674 | } | |
675 | ||
aab03e05 DF |
676 | static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) |
677 | { | |
678 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
679 | struct rb_node **link = &dl_rq->rb_root.rb_node; | |
680 | struct rb_node *parent = NULL; | |
681 | struct sched_dl_entity *entry; | |
682 | int leftmost = 1; | |
683 | ||
684 | BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); | |
685 | ||
686 | while (*link) { | |
687 | parent = *link; | |
688 | entry = rb_entry(parent, struct sched_dl_entity, rb_node); | |
689 | if (dl_time_before(dl_se->deadline, entry->deadline)) | |
690 | link = &parent->rb_left; | |
691 | else { | |
692 | link = &parent->rb_right; | |
693 | leftmost = 0; | |
694 | } | |
695 | } | |
696 | ||
697 | if (leftmost) | |
698 | dl_rq->rb_leftmost = &dl_se->rb_node; | |
699 | ||
700 | rb_link_node(&dl_se->rb_node, parent, link); | |
701 | rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root); | |
702 | ||
1baca4ce | 703 | inc_dl_tasks(dl_se, dl_rq); |
aab03e05 DF |
704 | } |
705 | ||
706 | static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) | |
707 | { | |
708 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
709 | ||
710 | if (RB_EMPTY_NODE(&dl_se->rb_node)) | |
711 | return; | |
712 | ||
713 | if (dl_rq->rb_leftmost == &dl_se->rb_node) { | |
714 | struct rb_node *next_node; | |
715 | ||
716 | next_node = rb_next(&dl_se->rb_node); | |
717 | dl_rq->rb_leftmost = next_node; | |
718 | } | |
719 | ||
720 | rb_erase(&dl_se->rb_node, &dl_rq->rb_root); | |
721 | RB_CLEAR_NODE(&dl_se->rb_node); | |
722 | ||
1baca4ce | 723 | dec_dl_tasks(dl_se, dl_rq); |
aab03e05 DF |
724 | } |
725 | ||
726 | static void | |
727 | enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) | |
728 | { | |
729 | BUG_ON(on_dl_rq(dl_se)); | |
730 | ||
731 | /* | |
732 | * If this is a wakeup or a new instance, the scheduling | |
733 | * parameters of the task might need updating. Otherwise, | |
734 | * we want a replenishment of its runtime. | |
735 | */ | |
736 | if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) | |
737 | replenish_dl_entity(dl_se); | |
738 | else | |
739 | update_dl_entity(dl_se); | |
740 | ||
741 | __enqueue_dl_entity(dl_se); | |
742 | } | |
743 | ||
744 | static void dequeue_dl_entity(struct sched_dl_entity *dl_se) | |
745 | { | |
746 | __dequeue_dl_entity(dl_se); | |
747 | } | |
748 | ||
749 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |
750 | { | |
751 | /* | |
752 | * If p is throttled, we do nothing. In fact, if it exhausted | |
753 | * its budget it needs a replenishment and, since it now is on | |
754 | * its rq, the bandwidth timer callback (which clearly has not | |
755 | * run yet) will take care of this. | |
756 | */ | |
757 | if (p->dl.dl_throttled) | |
758 | return; | |
759 | ||
760 | enqueue_dl_entity(&p->dl, flags); | |
1baca4ce JL |
761 | |
762 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | |
763 | enqueue_pushable_dl_task(rq, p); | |
764 | ||
aab03e05 DF |
765 | inc_nr_running(rq); |
766 | } | |
767 | ||
768 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |
769 | { | |
770 | dequeue_dl_entity(&p->dl); | |
1baca4ce | 771 | dequeue_pushable_dl_task(rq, p); |
aab03e05 DF |
772 | } |
773 | ||
774 | static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |
775 | { | |
776 | update_curr_dl(rq); | |
777 | __dequeue_task_dl(rq, p, flags); | |
778 | ||
779 | dec_nr_running(rq); | |
780 | } | |
781 | ||
782 | /* | |
783 | * Yield task semantic for -deadline tasks is: | |
784 | * | |
785 | * get off from the CPU until our next instance, with | |
786 | * a new runtime. This is of little use now, since we | |
787 | * don't have a bandwidth reclaiming mechanism. Anyway, | |
788 | * bandwidth reclaiming is planned for the future, and | |
789 | * yield_task_dl will indicate that some spare budget | |
790 | * is available for other task instances to use it. | |
791 | */ | |
792 | static void yield_task_dl(struct rq *rq) | |
793 | { | |
794 | struct task_struct *p = rq->curr; | |
795 | ||
796 | /* | |
797 | * We make the task go to sleep until its current deadline by | |
798 | * forcing its runtime to zero. This way, update_curr_dl() stops | |
799 | * it and the bandwidth timer will wake it up and will give it | |
800 | * new scheduling parameters (thanks to dl_new=1). | |
801 | */ | |
802 | if (p->dl.runtime > 0) { | |
803 | rq->curr->dl.dl_new = 1; | |
804 | p->dl.runtime = 0; | |
805 | } | |
806 | update_curr_dl(rq); | |
807 | } | |
808 | ||
1baca4ce JL |
809 | #ifdef CONFIG_SMP |
810 | ||
811 | static int find_later_rq(struct task_struct *task); | |
812 | static int latest_cpu_find(struct cpumask *span, | |
813 | struct task_struct *task, | |
814 | struct cpumask *later_mask); | |
815 | ||
816 | static int | |
817 | select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) | |
818 | { | |
819 | struct task_struct *curr; | |
820 | struct rq *rq; | |
821 | ||
822 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) | |
823 | goto out; | |
824 | ||
825 | rq = cpu_rq(cpu); | |
826 | ||
827 | rcu_read_lock(); | |
828 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | |
829 | ||
830 | /* | |
831 | * If we are dealing with a -deadline task, we must | |
832 | * decide where to wake it up. | |
833 | * If it has a later deadline and the current task | |
834 | * on this rq can't move (provided the waking task | |
835 | * can!) we prefer to send it somewhere else. On the | |
836 | * other hand, if it has a shorter deadline, we | |
837 | * try to make it stay here, it might be important. | |
838 | */ | |
839 | if (unlikely(dl_task(curr)) && | |
840 | (curr->nr_cpus_allowed < 2 || | |
841 | !dl_entity_preempt(&p->dl, &curr->dl)) && | |
842 | (p->nr_cpus_allowed > 1)) { | |
843 | int target = find_later_rq(p); | |
844 | ||
845 | if (target != -1) | |
846 | cpu = target; | |
847 | } | |
848 | rcu_read_unlock(); | |
849 | ||
850 | out: | |
851 | return cpu; | |
852 | } | |
853 | ||
854 | static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | |
855 | { | |
856 | /* | |
857 | * Current can't be migrated, useless to reschedule, | |
858 | * let's hope p can move out. | |
859 | */ | |
860 | if (rq->curr->nr_cpus_allowed == 1 || | |
861 | latest_cpu_find(rq->rd->span, rq->curr, NULL) == -1) | |
862 | return; | |
863 | ||
864 | /* | |
865 | * p is migratable, so let's not schedule it and | |
866 | * see if it is pushed or pulled somewhere else. | |
867 | */ | |
868 | if (p->nr_cpus_allowed != 1 && | |
869 | latest_cpu_find(rq->rd->span, p, NULL) != -1) | |
870 | return; | |
871 | ||
872 | resched_task(rq->curr); | |
873 | } | |
874 | ||
875 | #endif /* CONFIG_SMP */ | |
876 | ||
aab03e05 DF |
877 | /* |
878 | * Only called when both the current and waking task are -deadline | |
879 | * tasks. | |
880 | */ | |
881 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | |
882 | int flags) | |
883 | { | |
1baca4ce | 884 | if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { |
aab03e05 | 885 | resched_task(rq->curr); |
1baca4ce JL |
886 | return; |
887 | } | |
888 | ||
889 | #ifdef CONFIG_SMP | |
890 | /* | |
891 | * In the unlikely case current and p have the same deadline | |
892 | * let us try to decide what's the best thing to do... | |
893 | */ | |
894 | if ((s64)(p->dl.deadline - rq->curr->dl.deadline) == 0 && | |
895 | !need_resched()) | |
896 | check_preempt_equal_dl(rq, p); | |
897 | #endif /* CONFIG_SMP */ | |
aab03e05 DF |
898 | } |
899 | ||
900 | #ifdef CONFIG_SCHED_HRTICK | |
901 | static void start_hrtick_dl(struct rq *rq, struct task_struct *p) | |
902 | { | |
903 | s64 delta = p->dl.dl_runtime - p->dl.runtime; | |
904 | ||
905 | if (delta > 10000) | |
906 | hrtick_start(rq, p->dl.runtime); | |
907 | } | |
908 | #endif | |
909 | ||
910 | static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, | |
911 | struct dl_rq *dl_rq) | |
912 | { | |
913 | struct rb_node *left = dl_rq->rb_leftmost; | |
914 | ||
915 | if (!left) | |
916 | return NULL; | |
917 | ||
918 | return rb_entry(left, struct sched_dl_entity, rb_node); | |
919 | } | |
920 | ||
921 | struct task_struct *pick_next_task_dl(struct rq *rq) | |
922 | { | |
923 | struct sched_dl_entity *dl_se; | |
924 | struct task_struct *p; | |
925 | struct dl_rq *dl_rq; | |
926 | ||
927 | dl_rq = &rq->dl; | |
928 | ||
929 | if (unlikely(!dl_rq->dl_nr_running)) | |
930 | return NULL; | |
931 | ||
932 | dl_se = pick_next_dl_entity(rq, dl_rq); | |
933 | BUG_ON(!dl_se); | |
934 | ||
935 | p = dl_task_of(dl_se); | |
936 | p->se.exec_start = rq_clock_task(rq); | |
1baca4ce JL |
937 | |
938 | /* Running task will never be pushed. */ | |
939 | if (p) | |
940 | dequeue_pushable_dl_task(rq, p); | |
941 | ||
aab03e05 DF |
942 | #ifdef CONFIG_SCHED_HRTICK |
943 | if (hrtick_enabled(rq)) | |
944 | start_hrtick_dl(rq, p); | |
945 | #endif | |
1baca4ce JL |
946 | |
947 | #ifdef CONFIG_SMP | |
948 | rq->post_schedule = has_pushable_dl_tasks(rq); | |
949 | #endif /* CONFIG_SMP */ | |
950 | ||
aab03e05 DF |
951 | return p; |
952 | } | |
953 | ||
954 | static void put_prev_task_dl(struct rq *rq, struct task_struct *p) | |
955 | { | |
956 | update_curr_dl(rq); | |
1baca4ce JL |
957 | |
958 | if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) | |
959 | enqueue_pushable_dl_task(rq, p); | |
aab03e05 DF |
960 | } |
961 | ||
962 | static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) | |
963 | { | |
964 | update_curr_dl(rq); | |
965 | ||
966 | #ifdef CONFIG_SCHED_HRTICK | |
967 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) | |
968 | start_hrtick_dl(rq, p); | |
969 | #endif | |
970 | } | |
971 | ||
972 | static void task_fork_dl(struct task_struct *p) | |
973 | { | |
974 | /* | |
975 | * SCHED_DEADLINE tasks cannot fork and this is achieved through | |
976 | * sched_fork() | |
977 | */ | |
978 | } | |
979 | ||
980 | static void task_dead_dl(struct task_struct *p) | |
981 | { | |
982 | struct hrtimer *timer = &p->dl.dl_timer; | |
983 | ||
984 | if (hrtimer_active(timer)) | |
985 | hrtimer_try_to_cancel(timer); | |
986 | } | |
987 | ||
988 | static void set_curr_task_dl(struct rq *rq) | |
989 | { | |
990 | struct task_struct *p = rq->curr; | |
991 | ||
992 | p->se.exec_start = rq_clock_task(rq); | |
1baca4ce JL |
993 | |
994 | /* You can't push away the running task */ | |
995 | dequeue_pushable_dl_task(rq, p); | |
996 | } | |
997 | ||
998 | #ifdef CONFIG_SMP | |
999 | ||
1000 | /* Only try algorithms three times */ | |
1001 | #define DL_MAX_TRIES 3 | |
1002 | ||
1003 | static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) | |
1004 | { | |
1005 | if (!task_running(rq, p) && | |
1006 | (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && | |
1007 | (p->nr_cpus_allowed > 1)) | |
1008 | return 1; | |
1009 | ||
1010 | return 0; | |
1011 | } | |
1012 | ||
1013 | /* Returns the second earliest -deadline task, NULL otherwise */ | |
1014 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu) | |
1015 | { | |
1016 | struct rb_node *next_node = rq->dl.rb_leftmost; | |
1017 | struct sched_dl_entity *dl_se; | |
1018 | struct task_struct *p = NULL; | |
1019 | ||
1020 | next_node: | |
1021 | next_node = rb_next(next_node); | |
1022 | if (next_node) { | |
1023 | dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node); | |
1024 | p = dl_task_of(dl_se); | |
1025 | ||
1026 | if (pick_dl_task(rq, p, cpu)) | |
1027 | return p; | |
1028 | ||
1029 | goto next_node; | |
1030 | } | |
1031 | ||
1032 | return NULL; | |
1033 | } | |
1034 | ||
1035 | static int latest_cpu_find(struct cpumask *span, | |
1036 | struct task_struct *task, | |
1037 | struct cpumask *later_mask) | |
1038 | { | |
1039 | const struct sched_dl_entity *dl_se = &task->dl; | |
1040 | int cpu, found = -1, best = 0; | |
1041 | u64 max_dl = 0; | |
1042 | ||
1043 | for_each_cpu(cpu, span) { | |
1044 | struct rq *rq = cpu_rq(cpu); | |
1045 | struct dl_rq *dl_rq = &rq->dl; | |
1046 | ||
1047 | if (cpumask_test_cpu(cpu, &task->cpus_allowed) && | |
1048 | (!dl_rq->dl_nr_running || dl_time_before(dl_se->deadline, | |
1049 | dl_rq->earliest_dl.curr))) { | |
1050 | if (later_mask) | |
1051 | cpumask_set_cpu(cpu, later_mask); | |
1052 | if (!best && !dl_rq->dl_nr_running) { | |
1053 | best = 1; | |
1054 | found = cpu; | |
1055 | } else if (!best && | |
1056 | dl_time_before(max_dl, | |
1057 | dl_rq->earliest_dl.curr)) { | |
1058 | max_dl = dl_rq->earliest_dl.curr; | |
1059 | found = cpu; | |
1060 | } | |
1061 | } else if (later_mask) | |
1062 | cpumask_clear_cpu(cpu, later_mask); | |
1063 | } | |
1064 | ||
1065 | return found; | |
1066 | } | |
1067 | ||
1068 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); | |
1069 | ||
1070 | static int find_later_rq(struct task_struct *task) | |
1071 | { | |
1072 | struct sched_domain *sd; | |
1073 | struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); | |
1074 | int this_cpu = smp_processor_id(); | |
1075 | int best_cpu, cpu = task_cpu(task); | |
1076 | ||
1077 | /* Make sure the mask is initialized first */ | |
1078 | if (unlikely(!later_mask)) | |
1079 | return -1; | |
1080 | ||
1081 | if (task->nr_cpus_allowed == 1) | |
1082 | return -1; | |
1083 | ||
1084 | best_cpu = latest_cpu_find(task_rq(task)->rd->span, task, later_mask); | |
1085 | if (best_cpu == -1) | |
1086 | return -1; | |
1087 | ||
1088 | /* | |
1089 | * If we are here, some target has been found, | |
1090 | * the most suitable of which is cached in best_cpu. | |
1091 | * This is, among the runqueues where the current tasks | |
1092 | * have later deadlines than the task's one, the rq | |
1093 | * with the latest possible one. | |
1094 | * | |
1095 | * Now we check how well this matches with task's | |
1096 | * affinity and system topology. | |
1097 | * | |
1098 | * The last cpu where the task run is our first | |
1099 | * guess, since it is most likely cache-hot there. | |
1100 | */ | |
1101 | if (cpumask_test_cpu(cpu, later_mask)) | |
1102 | return cpu; | |
1103 | /* | |
1104 | * Check if this_cpu is to be skipped (i.e., it is | |
1105 | * not in the mask) or not. | |
1106 | */ | |
1107 | if (!cpumask_test_cpu(this_cpu, later_mask)) | |
1108 | this_cpu = -1; | |
1109 | ||
1110 | rcu_read_lock(); | |
1111 | for_each_domain(cpu, sd) { | |
1112 | if (sd->flags & SD_WAKE_AFFINE) { | |
1113 | ||
1114 | /* | |
1115 | * If possible, preempting this_cpu is | |
1116 | * cheaper than migrating. | |
1117 | */ | |
1118 | if (this_cpu != -1 && | |
1119 | cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { | |
1120 | rcu_read_unlock(); | |
1121 | return this_cpu; | |
1122 | } | |
1123 | ||
1124 | /* | |
1125 | * Last chance: if best_cpu is valid and is | |
1126 | * in the mask, that becomes our choice. | |
1127 | */ | |
1128 | if (best_cpu < nr_cpu_ids && | |
1129 | cpumask_test_cpu(best_cpu, sched_domain_span(sd))) { | |
1130 | rcu_read_unlock(); | |
1131 | return best_cpu; | |
1132 | } | |
1133 | } | |
1134 | } | |
1135 | rcu_read_unlock(); | |
1136 | ||
1137 | /* | |
1138 | * At this point, all our guesses failed, we just return | |
1139 | * 'something', and let the caller sort the things out. | |
1140 | */ | |
1141 | if (this_cpu != -1) | |
1142 | return this_cpu; | |
1143 | ||
1144 | cpu = cpumask_any(later_mask); | |
1145 | if (cpu < nr_cpu_ids) | |
1146 | return cpu; | |
1147 | ||
1148 | return -1; | |
1149 | } | |
1150 | ||
1151 | /* Locks the rq it finds */ | |
1152 | static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) | |
1153 | { | |
1154 | struct rq *later_rq = NULL; | |
1155 | int tries; | |
1156 | int cpu; | |
1157 | ||
1158 | for (tries = 0; tries < DL_MAX_TRIES; tries++) { | |
1159 | cpu = find_later_rq(task); | |
1160 | ||
1161 | if ((cpu == -1) || (cpu == rq->cpu)) | |
1162 | break; | |
1163 | ||
1164 | later_rq = cpu_rq(cpu); | |
1165 | ||
1166 | /* Retry if something changed. */ | |
1167 | if (double_lock_balance(rq, later_rq)) { | |
1168 | if (unlikely(task_rq(task) != rq || | |
1169 | !cpumask_test_cpu(later_rq->cpu, | |
1170 | &task->cpus_allowed) || | |
1171 | task_running(rq, task) || !task->on_rq)) { | |
1172 | double_unlock_balance(rq, later_rq); | |
1173 | later_rq = NULL; | |
1174 | break; | |
1175 | } | |
1176 | } | |
1177 | ||
1178 | /* | |
1179 | * If the rq we found has no -deadline task, or | |
1180 | * its earliest one has a later deadline than our | |
1181 | * task, the rq is a good one. | |
1182 | */ | |
1183 | if (!later_rq->dl.dl_nr_running || | |
1184 | dl_time_before(task->dl.deadline, | |
1185 | later_rq->dl.earliest_dl.curr)) | |
1186 | break; | |
1187 | ||
1188 | /* Otherwise we try again. */ | |
1189 | double_unlock_balance(rq, later_rq); | |
1190 | later_rq = NULL; | |
1191 | } | |
1192 | ||
1193 | return later_rq; | |
1194 | } | |
1195 | ||
1196 | static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) | |
1197 | { | |
1198 | struct task_struct *p; | |
1199 | ||
1200 | if (!has_pushable_dl_tasks(rq)) | |
1201 | return NULL; | |
1202 | ||
1203 | p = rb_entry(rq->dl.pushable_dl_tasks_leftmost, | |
1204 | struct task_struct, pushable_dl_tasks); | |
1205 | ||
1206 | BUG_ON(rq->cpu != task_cpu(p)); | |
1207 | BUG_ON(task_current(rq, p)); | |
1208 | BUG_ON(p->nr_cpus_allowed <= 1); | |
1209 | ||
1210 | BUG_ON(!p->se.on_rq); | |
1211 | BUG_ON(!dl_task(p)); | |
1212 | ||
1213 | return p; | |
1214 | } | |
1215 | ||
1216 | /* | |
1217 | * See if the non running -deadline tasks on this rq | |
1218 | * can be sent to some other CPU where they can preempt | |
1219 | * and start executing. | |
1220 | */ | |
1221 | static int push_dl_task(struct rq *rq) | |
1222 | { | |
1223 | struct task_struct *next_task; | |
1224 | struct rq *later_rq; | |
1225 | ||
1226 | if (!rq->dl.overloaded) | |
1227 | return 0; | |
1228 | ||
1229 | next_task = pick_next_pushable_dl_task(rq); | |
1230 | if (!next_task) | |
1231 | return 0; | |
1232 | ||
1233 | retry: | |
1234 | if (unlikely(next_task == rq->curr)) { | |
1235 | WARN_ON(1); | |
1236 | return 0; | |
1237 | } | |
1238 | ||
1239 | /* | |
1240 | * If next_task preempts rq->curr, and rq->curr | |
1241 | * can move away, it makes sense to just reschedule | |
1242 | * without going further in pushing next_task. | |
1243 | */ | |
1244 | if (dl_task(rq->curr) && | |
1245 | dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && | |
1246 | rq->curr->nr_cpus_allowed > 1) { | |
1247 | resched_task(rq->curr); | |
1248 | return 0; | |
1249 | } | |
1250 | ||
1251 | /* We might release rq lock */ | |
1252 | get_task_struct(next_task); | |
1253 | ||
1254 | /* Will lock the rq it'll find */ | |
1255 | later_rq = find_lock_later_rq(next_task, rq); | |
1256 | if (!later_rq) { | |
1257 | struct task_struct *task; | |
1258 | ||
1259 | /* | |
1260 | * We must check all this again, since | |
1261 | * find_lock_later_rq releases rq->lock and it is | |
1262 | * then possible that next_task has migrated. | |
1263 | */ | |
1264 | task = pick_next_pushable_dl_task(rq); | |
1265 | if (task_cpu(next_task) == rq->cpu && task == next_task) { | |
1266 | /* | |
1267 | * The task is still there. We don't try | |
1268 | * again, some other cpu will pull it when ready. | |
1269 | */ | |
1270 | dequeue_pushable_dl_task(rq, next_task); | |
1271 | goto out; | |
1272 | } | |
1273 | ||
1274 | if (!task) | |
1275 | /* No more tasks */ | |
1276 | goto out; | |
1277 | ||
1278 | put_task_struct(next_task); | |
1279 | next_task = task; | |
1280 | goto retry; | |
1281 | } | |
1282 | ||
1283 | deactivate_task(rq, next_task, 0); | |
1284 | set_task_cpu(next_task, later_rq->cpu); | |
1285 | activate_task(later_rq, next_task, 0); | |
1286 | ||
1287 | resched_task(later_rq->curr); | |
1288 | ||
1289 | double_unlock_balance(rq, later_rq); | |
1290 | ||
1291 | out: | |
1292 | put_task_struct(next_task); | |
1293 | ||
1294 | return 1; | |
1295 | } | |
1296 | ||
1297 | static void push_dl_tasks(struct rq *rq) | |
1298 | { | |
1299 | /* Terminates as it moves a -deadline task */ | |
1300 | while (push_dl_task(rq)) | |
1301 | ; | |
aab03e05 DF |
1302 | } |
1303 | ||
1baca4ce JL |
1304 | static int pull_dl_task(struct rq *this_rq) |
1305 | { | |
1306 | int this_cpu = this_rq->cpu, ret = 0, cpu; | |
1307 | struct task_struct *p; | |
1308 | struct rq *src_rq; | |
1309 | u64 dmin = LONG_MAX; | |
1310 | ||
1311 | if (likely(!dl_overloaded(this_rq))) | |
1312 | return 0; | |
1313 | ||
1314 | /* | |
1315 | * Match the barrier from dl_set_overloaded; this guarantees that if we | |
1316 | * see overloaded we must also see the dlo_mask bit. | |
1317 | */ | |
1318 | smp_rmb(); | |
1319 | ||
1320 | for_each_cpu(cpu, this_rq->rd->dlo_mask) { | |
1321 | if (this_cpu == cpu) | |
1322 | continue; | |
1323 | ||
1324 | src_rq = cpu_rq(cpu); | |
1325 | ||
1326 | /* | |
1327 | * It looks racy, abd it is! However, as in sched_rt.c, | |
1328 | * we are fine with this. | |
1329 | */ | |
1330 | if (this_rq->dl.dl_nr_running && | |
1331 | dl_time_before(this_rq->dl.earliest_dl.curr, | |
1332 | src_rq->dl.earliest_dl.next)) | |
1333 | continue; | |
1334 | ||
1335 | /* Might drop this_rq->lock */ | |
1336 | double_lock_balance(this_rq, src_rq); | |
1337 | ||
1338 | /* | |
1339 | * If there are no more pullable tasks on the | |
1340 | * rq, we're done with it. | |
1341 | */ | |
1342 | if (src_rq->dl.dl_nr_running <= 1) | |
1343 | goto skip; | |
1344 | ||
1345 | p = pick_next_earliest_dl_task(src_rq, this_cpu); | |
1346 | ||
1347 | /* | |
1348 | * We found a task to be pulled if: | |
1349 | * - it preempts our current (if there's one), | |
1350 | * - it will preempt the last one we pulled (if any). | |
1351 | */ | |
1352 | if (p && dl_time_before(p->dl.deadline, dmin) && | |
1353 | (!this_rq->dl.dl_nr_running || | |
1354 | dl_time_before(p->dl.deadline, | |
1355 | this_rq->dl.earliest_dl.curr))) { | |
1356 | WARN_ON(p == src_rq->curr); | |
1357 | WARN_ON(!p->se.on_rq); | |
1358 | ||
1359 | /* | |
1360 | * Then we pull iff p has actually an earlier | |
1361 | * deadline than the current task of its runqueue. | |
1362 | */ | |
1363 | if (dl_time_before(p->dl.deadline, | |
1364 | src_rq->curr->dl.deadline)) | |
1365 | goto skip; | |
1366 | ||
1367 | ret = 1; | |
1368 | ||
1369 | deactivate_task(src_rq, p, 0); | |
1370 | set_task_cpu(p, this_cpu); | |
1371 | activate_task(this_rq, p, 0); | |
1372 | dmin = p->dl.deadline; | |
1373 | ||
1374 | /* Is there any other task even earlier? */ | |
1375 | } | |
1376 | skip: | |
1377 | double_unlock_balance(this_rq, src_rq); | |
1378 | } | |
1379 | ||
1380 | return ret; | |
1381 | } | |
1382 | ||
1383 | static void pre_schedule_dl(struct rq *rq, struct task_struct *prev) | |
1384 | { | |
1385 | /* Try to pull other tasks here */ | |
1386 | if (dl_task(prev)) | |
1387 | pull_dl_task(rq); | |
1388 | } | |
1389 | ||
1390 | static void post_schedule_dl(struct rq *rq) | |
1391 | { | |
1392 | push_dl_tasks(rq); | |
1393 | } | |
1394 | ||
1395 | /* | |
1396 | * Since the task is not running and a reschedule is not going to happen | |
1397 | * anytime soon on its runqueue, we try pushing it away now. | |
1398 | */ | |
1399 | static void task_woken_dl(struct rq *rq, struct task_struct *p) | |
1400 | { | |
1401 | if (!task_running(rq, p) && | |
1402 | !test_tsk_need_resched(rq->curr) && | |
1403 | has_pushable_dl_tasks(rq) && | |
1404 | p->nr_cpus_allowed > 1 && | |
1405 | dl_task(rq->curr) && | |
1406 | (rq->curr->nr_cpus_allowed < 2 || | |
1407 | dl_entity_preempt(&rq->curr->dl, &p->dl))) { | |
1408 | push_dl_tasks(rq); | |
1409 | } | |
1410 | } | |
1411 | ||
1412 | static void set_cpus_allowed_dl(struct task_struct *p, | |
1413 | const struct cpumask *new_mask) | |
1414 | { | |
1415 | struct rq *rq; | |
1416 | int weight; | |
1417 | ||
1418 | BUG_ON(!dl_task(p)); | |
1419 | ||
1420 | /* | |
1421 | * Update only if the task is actually running (i.e., | |
1422 | * it is on the rq AND it is not throttled). | |
1423 | */ | |
1424 | if (!on_dl_rq(&p->dl)) | |
1425 | return; | |
1426 | ||
1427 | weight = cpumask_weight(new_mask); | |
1428 | ||
1429 | /* | |
1430 | * Only update if the process changes its state from whether it | |
1431 | * can migrate or not. | |
1432 | */ | |
1433 | if ((p->nr_cpus_allowed > 1) == (weight > 1)) | |
1434 | return; | |
1435 | ||
1436 | rq = task_rq(p); | |
1437 | ||
1438 | /* | |
1439 | * The process used to be able to migrate OR it can now migrate | |
1440 | */ | |
1441 | if (weight <= 1) { | |
1442 | if (!task_current(rq, p)) | |
1443 | dequeue_pushable_dl_task(rq, p); | |
1444 | BUG_ON(!rq->dl.dl_nr_migratory); | |
1445 | rq->dl.dl_nr_migratory--; | |
1446 | } else { | |
1447 | if (!task_current(rq, p)) | |
1448 | enqueue_pushable_dl_task(rq, p); | |
1449 | rq->dl.dl_nr_migratory++; | |
1450 | } | |
1451 | ||
1452 | update_dl_migration(&rq->dl); | |
1453 | } | |
1454 | ||
1455 | /* Assumes rq->lock is held */ | |
1456 | static void rq_online_dl(struct rq *rq) | |
1457 | { | |
1458 | if (rq->dl.overloaded) | |
1459 | dl_set_overload(rq); | |
1460 | } | |
1461 | ||
1462 | /* Assumes rq->lock is held */ | |
1463 | static void rq_offline_dl(struct rq *rq) | |
1464 | { | |
1465 | if (rq->dl.overloaded) | |
1466 | dl_clear_overload(rq); | |
1467 | } | |
1468 | ||
1469 | void init_sched_dl_class(void) | |
1470 | { | |
1471 | unsigned int i; | |
1472 | ||
1473 | for_each_possible_cpu(i) | |
1474 | zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i), | |
1475 | GFP_KERNEL, cpu_to_node(i)); | |
1476 | } | |
1477 | ||
1478 | #endif /* CONFIG_SMP */ | |
1479 | ||
aab03e05 DF |
1480 | static void switched_from_dl(struct rq *rq, struct task_struct *p) |
1481 | { | |
1baca4ce | 1482 | if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy)) |
aab03e05 | 1483 | hrtimer_try_to_cancel(&p->dl.dl_timer); |
1baca4ce JL |
1484 | |
1485 | #ifdef CONFIG_SMP | |
1486 | /* | |
1487 | * Since this might be the only -deadline task on the rq, | |
1488 | * this is the right place to try to pull some other one | |
1489 | * from an overloaded cpu, if any. | |
1490 | */ | |
1491 | if (!rq->dl.dl_nr_running) | |
1492 | pull_dl_task(rq); | |
1493 | #endif | |
aab03e05 DF |
1494 | } |
1495 | ||
1baca4ce JL |
1496 | /* |
1497 | * When switching to -deadline, we may overload the rq, then | |
1498 | * we try to push someone off, if possible. | |
1499 | */ | |
aab03e05 DF |
1500 | static void switched_to_dl(struct rq *rq, struct task_struct *p) |
1501 | { | |
1baca4ce JL |
1502 | int check_resched = 1; |
1503 | ||
aab03e05 DF |
1504 | /* |
1505 | * If p is throttled, don't consider the possibility | |
1506 | * of preempting rq->curr, the check will be done right | |
1507 | * after its runtime will get replenished. | |
1508 | */ | |
1509 | if (unlikely(p->dl.dl_throttled)) | |
1510 | return; | |
1511 | ||
1512 | if (p->on_rq || rq->curr != p) { | |
1baca4ce JL |
1513 | #ifdef CONFIG_SMP |
1514 | if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p)) | |
1515 | /* Only reschedule if pushing failed */ | |
1516 | check_resched = 0; | |
1517 | #endif /* CONFIG_SMP */ | |
1518 | if (check_resched && task_has_dl_policy(rq->curr)) | |
aab03e05 | 1519 | check_preempt_curr_dl(rq, p, 0); |
aab03e05 DF |
1520 | } |
1521 | } | |
1522 | ||
1baca4ce JL |
1523 | /* |
1524 | * If the scheduling parameters of a -deadline task changed, | |
1525 | * a push or pull operation might be needed. | |
1526 | */ | |
aab03e05 DF |
1527 | static void prio_changed_dl(struct rq *rq, struct task_struct *p, |
1528 | int oldprio) | |
1529 | { | |
1baca4ce | 1530 | if (p->on_rq || rq->curr == p) { |
aab03e05 | 1531 | #ifdef CONFIG_SMP |
1baca4ce JL |
1532 | /* |
1533 | * This might be too much, but unfortunately | |
1534 | * we don't have the old deadline value, and | |
1535 | * we can't argue if the task is increasing | |
1536 | * or lowering its prio, so... | |
1537 | */ | |
1538 | if (!rq->dl.overloaded) | |
1539 | pull_dl_task(rq); | |
1540 | ||
1541 | /* | |
1542 | * If we now have a earlier deadline task than p, | |
1543 | * then reschedule, provided p is still on this | |
1544 | * runqueue. | |
1545 | */ | |
1546 | if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) && | |
1547 | rq->curr == p) | |
1548 | resched_task(p); | |
1549 | #else | |
1550 | /* | |
1551 | * Again, we don't know if p has a earlier | |
1552 | * or later deadline, so let's blindly set a | |
1553 | * (maybe not needed) rescheduling point. | |
1554 | */ | |
1555 | resched_task(p); | |
1556 | #endif /* CONFIG_SMP */ | |
1557 | } else | |
1558 | switched_to_dl(rq, p); | |
aab03e05 | 1559 | } |
aab03e05 DF |
1560 | |
1561 | const struct sched_class dl_sched_class = { | |
1562 | .next = &rt_sched_class, | |
1563 | .enqueue_task = enqueue_task_dl, | |
1564 | .dequeue_task = dequeue_task_dl, | |
1565 | .yield_task = yield_task_dl, | |
1566 | ||
1567 | .check_preempt_curr = check_preempt_curr_dl, | |
1568 | ||
1569 | .pick_next_task = pick_next_task_dl, | |
1570 | .put_prev_task = put_prev_task_dl, | |
1571 | ||
1572 | #ifdef CONFIG_SMP | |
1573 | .select_task_rq = select_task_rq_dl, | |
1baca4ce JL |
1574 | .set_cpus_allowed = set_cpus_allowed_dl, |
1575 | .rq_online = rq_online_dl, | |
1576 | .rq_offline = rq_offline_dl, | |
1577 | .pre_schedule = pre_schedule_dl, | |
1578 | .post_schedule = post_schedule_dl, | |
1579 | .task_woken = task_woken_dl, | |
aab03e05 DF |
1580 | #endif |
1581 | ||
1582 | .set_curr_task = set_curr_task_dl, | |
1583 | .task_tick = task_tick_dl, | |
1584 | .task_fork = task_fork_dl, | |
1585 | .task_dead = task_dead_dl, | |
1586 | ||
1587 | .prio_changed = prio_changed_dl, | |
1588 | .switched_from = switched_from_dl, | |
1589 | .switched_to = switched_to_dl, | |
1590 | }; |