diff options
Diffstat (limited to 'kernel/sched/deadline.c')
-rw-r--r-- | kernel/sched/deadline.c | 1639 |
1 files changed, 1639 insertions, 0 deletions
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c new file mode 100644 index 000000000000..6e79b3faa4cd --- /dev/null +++ b/kernel/sched/deadline.c | |||
@@ -0,0 +1,1639 @@ | |||
1 | /* | ||
2 | * Deadline Scheduling Class (SCHED_DEADLINE) | ||
3 | * | ||
4 | * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS). | ||
5 | * | ||
6 | * Tasks that periodically executes their instances for less than their | ||
7 | * runtime won't miss any of their deadlines. | ||
8 | * Tasks that are not periodic or sporadic or that tries to execute more | ||
9 | * than their reserved bandwidth will be slowed down (and may potentially | ||
10 | * miss some of their deadlines), and won't affect any other task. | ||
11 | * | ||
12 | * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>, | ||
13 | * Juri Lelli <juri.lelli@gmail.com>, | ||
14 | * Michael Trimarchi <michael@amarulasolutions.com>, | ||
15 | * Fabio Checconi <fchecconi@gmail.com> | ||
16 | */ | ||
17 | #include "sched.h" | ||
18 | |||
19 | #include <linux/slab.h> | ||
20 | |||
21 | struct dl_bandwidth def_dl_bandwidth; | ||
22 | |||
23 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) | ||
24 | { | ||
25 | return container_of(dl_se, struct task_struct, dl); | ||
26 | } | ||
27 | |||
28 | static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq) | ||
29 | { | ||
30 | return container_of(dl_rq, struct rq, dl); | ||
31 | } | ||
32 | |||
33 | static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se) | ||
34 | { | ||
35 | struct task_struct *p = dl_task_of(dl_se); | ||
36 | struct rq *rq = task_rq(p); | ||
37 | |||
38 | return &rq->dl; | ||
39 | } | ||
40 | |||
41 | static inline int on_dl_rq(struct sched_dl_entity *dl_se) | ||
42 | { | ||
43 | return !RB_EMPTY_NODE(&dl_se->rb_node); | ||
44 | } | ||
45 | |||
46 | static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) | ||
47 | { | ||
48 | struct sched_dl_entity *dl_se = &p->dl; | ||
49 | |||
50 | return dl_rq->rb_leftmost == &dl_se->rb_node; | ||
51 | } | ||
52 | |||
53 | void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime) | ||
54 | { | ||
55 | raw_spin_lock_init(&dl_b->dl_runtime_lock); | ||
56 | dl_b->dl_period = period; | ||
57 | dl_b->dl_runtime = runtime; | ||
58 | } | ||
59 | |||
60 | extern unsigned long to_ratio(u64 period, u64 runtime); | ||
61 | |||
62 | void init_dl_bw(struct dl_bw *dl_b) | ||
63 | { | ||
64 | raw_spin_lock_init(&dl_b->lock); | ||
65 | raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock); | ||
66 | if (global_rt_runtime() == RUNTIME_INF) | ||
67 | dl_b->bw = -1; | ||
68 | else | ||
69 | dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime()); | ||
70 | raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock); | ||
71 | dl_b->total_bw = 0; | ||
72 | } | ||
73 | |||
74 | void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq) | ||
75 | { | ||
76 | dl_rq->rb_root = RB_ROOT; | ||
77 | |||
78 | #ifdef CONFIG_SMP | ||
79 | /* zero means no -deadline tasks */ | ||
80 | dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0; | ||
81 | |||
82 | dl_rq->dl_nr_migratory = 0; | ||
83 | dl_rq->overloaded = 0; | ||
84 | dl_rq->pushable_dl_tasks_root = RB_ROOT; | ||
85 | #else | ||
86 | init_dl_bw(&dl_rq->dl_bw); | ||
87 | #endif | ||
88 | } | ||
89 | |||
90 | #ifdef CONFIG_SMP | ||
91 | |||
92 | static inline int dl_overloaded(struct rq *rq) | ||
93 | { | ||
94 | return atomic_read(&rq->rd->dlo_count); | ||
95 | } | ||
96 | |||
97 | static inline void dl_set_overload(struct rq *rq) | ||
98 | { | ||
99 | if (!rq->online) | ||
100 | return; | ||
101 | |||
102 | cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask); | ||
103 | /* | ||
104 | * Must be visible before the overload count is | ||
105 | * set (as in sched_rt.c). | ||
106 | * | ||
107 | * Matched by the barrier in pull_dl_task(). | ||
108 | */ | ||
109 | smp_wmb(); | ||
110 | atomic_inc(&rq->rd->dlo_count); | ||
111 | } | ||
112 | |||
113 | static inline void dl_clear_overload(struct rq *rq) | ||
114 | { | ||
115 | if (!rq->online) | ||
116 | return; | ||
117 | |||
118 | atomic_dec(&rq->rd->dlo_count); | ||
119 | cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask); | ||
120 | } | ||
121 | |||
122 | static void update_dl_migration(struct dl_rq *dl_rq) | ||
123 | { | ||
124 | if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) { | ||
125 | if (!dl_rq->overloaded) { | ||
126 | dl_set_overload(rq_of_dl_rq(dl_rq)); | ||
127 | dl_rq->overloaded = 1; | ||
128 | } | ||
129 | } else if (dl_rq->overloaded) { | ||
130 | dl_clear_overload(rq_of_dl_rq(dl_rq)); | ||
131 | dl_rq->overloaded = 0; | ||
132 | } | ||
133 | } | ||
134 | |||
135 | static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
136 | { | ||
137 | struct task_struct *p = dl_task_of(dl_se); | ||
138 | |||
139 | if (p->nr_cpus_allowed > 1) | ||
140 | dl_rq->dl_nr_migratory++; | ||
141 | |||
142 | update_dl_migration(dl_rq); | ||
143 | } | ||
144 | |||
145 | static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
146 | { | ||
147 | struct task_struct *p = dl_task_of(dl_se); | ||
148 | |||
149 | if (p->nr_cpus_allowed > 1) | ||
150 | dl_rq->dl_nr_migratory--; | ||
151 | |||
152 | update_dl_migration(dl_rq); | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * The list of pushable -deadline task is not a plist, like in | ||
157 | * sched_rt.c, it is an rb-tree with tasks ordered by deadline. | ||
158 | */ | ||
159 | static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) | ||
160 | { | ||
161 | struct dl_rq *dl_rq = &rq->dl; | ||
162 | struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node; | ||
163 | struct rb_node *parent = NULL; | ||
164 | struct task_struct *entry; | ||
165 | int leftmost = 1; | ||
166 | |||
167 | BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); | ||
168 | |||
169 | while (*link) { | ||
170 | parent = *link; | ||
171 | entry = rb_entry(parent, struct task_struct, | ||
172 | pushable_dl_tasks); | ||
173 | if (dl_entity_preempt(&p->dl, &entry->dl)) | ||
174 | link = &parent->rb_left; | ||
175 | else { | ||
176 | link = &parent->rb_right; | ||
177 | leftmost = 0; | ||
178 | } | ||
179 | } | ||
180 | |||
181 | if (leftmost) | ||
182 | dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks; | ||
183 | |||
184 | rb_link_node(&p->pushable_dl_tasks, parent, link); | ||
185 | rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); | ||
186 | } | ||
187 | |||
188 | static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) | ||
189 | { | ||
190 | struct dl_rq *dl_rq = &rq->dl; | ||
191 | |||
192 | if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) | ||
193 | return; | ||
194 | |||
195 | if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) { | ||
196 | struct rb_node *next_node; | ||
197 | |||
198 | next_node = rb_next(&p->pushable_dl_tasks); | ||
199 | dl_rq->pushable_dl_tasks_leftmost = next_node; | ||
200 | } | ||
201 | |||
202 | rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); | ||
203 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | ||
204 | } | ||
205 | |||
206 | static inline int has_pushable_dl_tasks(struct rq *rq) | ||
207 | { | ||
208 | return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root); | ||
209 | } | ||
210 | |||
211 | static int push_dl_task(struct rq *rq); | ||
212 | |||
213 | #else | ||
214 | |||
215 | static inline | ||
216 | void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) | ||
217 | { | ||
218 | } | ||
219 | |||
220 | static inline | ||
221 | void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) | ||
222 | { | ||
223 | } | ||
224 | |||
225 | static inline | ||
226 | void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
227 | { | ||
228 | } | ||
229 | |||
230 | static inline | ||
231 | void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
232 | { | ||
233 | } | ||
234 | |||
235 | #endif /* CONFIG_SMP */ | ||
236 | |||
237 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); | ||
238 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags); | ||
239 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | ||
240 | int flags); | ||
241 | |||
242 | /* | ||
243 | * We are being explicitly informed that a new instance is starting, | ||
244 | * and this means that: | ||
245 | * - the absolute deadline of the entity has to be placed at | ||
246 | * current time + relative deadline; | ||
247 | * - the runtime of the entity has to be set to the maximum value. | ||
248 | * | ||
249 | * The capability of specifying such event is useful whenever a -deadline | ||
250 | * entity wants to (try to!) synchronize its behaviour with the scheduler's | ||
251 | * one, and to (try to!) reconcile itself with its own scheduling | ||
252 | * parameters. | ||
253 | */ | ||
254 | static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se, | ||
255 | struct sched_dl_entity *pi_se) | ||
256 | { | ||
257 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | ||
258 | struct rq *rq = rq_of_dl_rq(dl_rq); | ||
259 | |||
260 | WARN_ON(!dl_se->dl_new || dl_se->dl_throttled); | ||
261 | |||
262 | /* | ||
263 | * We use the regular wall clock time to set deadlines in the | ||
264 | * future; in fact, we must consider execution overheads (time | ||
265 | * spent on hardirq context, etc.). | ||
266 | */ | ||
267 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | ||
268 | dl_se->runtime = pi_se->dl_runtime; | ||
269 | dl_se->dl_new = 0; | ||
270 | } | ||
271 | |||
272 | /* | ||
273 | * Pure Earliest Deadline First (EDF) scheduling does not deal with the | ||
274 | * possibility of a entity lasting more than what it declared, and thus | ||
275 | * exhausting its runtime. | ||
276 | * | ||
277 | * Here we are interested in making runtime overrun possible, but we do | ||
278 | * not want a entity which is misbehaving to affect the scheduling of all | ||
279 | * other entities. | ||
280 | * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS) | ||
281 | * is used, in order to confine each entity within its own bandwidth. | ||
282 | * | ||
283 | * This function deals exactly with that, and ensures that when the runtime | ||
284 | * of a entity is replenished, its deadline is also postponed. That ensures | ||
285 | * the overrunning entity can't interfere with other entity in the system and | ||
286 | * can't make them miss their deadlines. Reasons why this kind of overruns | ||
287 | * could happen are, typically, a entity voluntarily trying to overcome its | ||
288 | * runtime, or it just underestimated it during sched_setscheduler_ex(). | ||
289 | */ | ||
290 | static void replenish_dl_entity(struct sched_dl_entity *dl_se, | ||
291 | struct sched_dl_entity *pi_se) | ||
292 | { | ||
293 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | ||
294 | struct rq *rq = rq_of_dl_rq(dl_rq); | ||
295 | |||
296 | BUG_ON(pi_se->dl_runtime <= 0); | ||
297 | |||
298 | /* | ||
299 | * This could be the case for a !-dl task that is boosted. | ||
300 | * Just go with full inherited parameters. | ||
301 | */ | ||
302 | if (dl_se->dl_deadline == 0) { | ||
303 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | ||
304 | dl_se->runtime = pi_se->dl_runtime; | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * We keep moving the deadline away until we get some | ||
309 | * available runtime for the entity. This ensures correct | ||
310 | * handling of situations where the runtime overrun is | ||
311 | * arbitrary large. | ||
312 | */ | ||
313 | while (dl_se->runtime <= 0) { | ||
314 | dl_se->deadline += pi_se->dl_period; | ||
315 | dl_se->runtime += pi_se->dl_runtime; | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * At this point, the deadline really should be "in | ||
320 | * the future" with respect to rq->clock. If it's | ||
321 | * not, we are, for some reason, lagging too much! | ||
322 | * Anyway, after having warn userspace abut that, | ||
323 | * we still try to keep the things running by | ||
324 | * resetting the deadline and the budget of the | ||
325 | * entity. | ||
326 | */ | ||
327 | if (dl_time_before(dl_se->deadline, rq_clock(rq))) { | ||
328 | static bool lag_once = false; | ||
329 | |||
330 | if (!lag_once) { | ||
331 | lag_once = true; | ||
332 | printk_sched("sched: DL replenish lagged to much\n"); | ||
333 | } | ||
334 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | ||
335 | dl_se->runtime = pi_se->dl_runtime; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * Here we check if --at time t-- an entity (which is probably being | ||
341 | * [re]activated or, in general, enqueued) can use its remaining runtime | ||
342 | * and its current deadline _without_ exceeding the bandwidth it is | ||
343 | * assigned (function returns true if it can't). We are in fact applying | ||
344 | * one of the CBS rules: when a task wakes up, if the residual runtime | ||
345 | * over residual deadline fits within the allocated bandwidth, then we | ||
346 | * can keep the current (absolute) deadline and residual budget without | ||
347 | * disrupting the schedulability of the system. Otherwise, we should | ||
348 | * refill the runtime and set the deadline a period in the future, | ||
349 | * because keeping the current (absolute) deadline of the task would | ||
350 | * result in breaking guarantees promised to other tasks (refer to | ||
351 | * Documentation/scheduler/sched-deadline.txt for more informations). | ||
352 | * | ||
353 | * This function returns true if: | ||
354 | * | ||
355 | * runtime / (deadline - t) > dl_runtime / dl_period , | ||
356 | * | ||
357 | * IOW we can't recycle current parameters. | ||
358 | * | ||
359 | * Notice that the bandwidth check is done against the period. For | ||
360 | * task with deadline equal to period this is the same of using | ||
361 | * dl_deadline instead of dl_period in the equation above. | ||
362 | */ | ||
363 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, | ||
364 | struct sched_dl_entity *pi_se, u64 t) | ||
365 | { | ||
366 | u64 left, right; | ||
367 | |||
368 | /* | ||
369 | * left and right are the two sides of the equation above, | ||
370 | * after a bit of shuffling to use multiplications instead | ||
371 | * of divisions. | ||
372 | * | ||
373 | * Note that none of the time values involved in the two | ||
374 | * multiplications are absolute: dl_deadline and dl_runtime | ||
375 | * are the relative deadline and the maximum runtime of each | ||
376 | * instance, runtime is the runtime left for the last instance | ||
377 | * and (deadline - t), since t is rq->clock, is the time left | ||
378 | * to the (absolute) deadline. Even if overflowing the u64 type | ||
379 | * is very unlikely to occur in both cases, here we scale down | ||
380 | * as we want to avoid that risk at all. Scaling down by 10 | ||
381 | * means that we reduce granularity to 1us. We are fine with it, | ||
382 | * since this is only a true/false check and, anyway, thinking | ||
383 | * of anything below microseconds resolution is actually fiction | ||
384 | * (but still we want to give the user that illusion >;). | ||
385 | */ | ||
386 | left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); | ||
387 | right = ((dl_se->deadline - t) >> DL_SCALE) * | ||
388 | (pi_se->dl_runtime >> DL_SCALE); | ||
389 | |||
390 | return dl_time_before(right, left); | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * When a -deadline entity is queued back on the runqueue, its runtime and | ||
395 | * deadline might need updating. | ||
396 | * | ||
397 | * The policy here is that we update the deadline of the entity only if: | ||
398 | * - the current deadline is in the past, | ||
399 | * - using the remaining runtime with the current deadline would make | ||
400 | * the entity exceed its bandwidth. | ||
401 | */ | ||
402 | static void update_dl_entity(struct sched_dl_entity *dl_se, | ||
403 | struct sched_dl_entity *pi_se) | ||
404 | { | ||
405 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | ||
406 | struct rq *rq = rq_of_dl_rq(dl_rq); | ||
407 | |||
408 | /* | ||
409 | * The arrival of a new instance needs special treatment, i.e., | ||
410 | * the actual scheduling parameters have to be "renewed". | ||
411 | */ | ||
412 | if (dl_se->dl_new) { | ||
413 | setup_new_dl_entity(dl_se, pi_se); | ||
414 | return; | ||
415 | } | ||
416 | |||
417 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) || | ||
418 | dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { | ||
419 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | ||
420 | dl_se->runtime = pi_se->dl_runtime; | ||
421 | } | ||
422 | } | ||
423 | |||
424 | /* | ||
425 | * If the entity depleted all its runtime, and if we want it to sleep | ||
426 | * while waiting for some new execution time to become available, we | ||
427 | * set the bandwidth enforcement timer to the replenishment instant | ||
428 | * and try to activate it. | ||
429 | * | ||
430 | * Notice that it is important for the caller to know if the timer | ||
431 | * actually started or not (i.e., the replenishment instant is in | ||
432 | * the future or in the past). | ||
433 | */ | ||
434 | static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted) | ||
435 | { | ||
436 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | ||
437 | struct rq *rq = rq_of_dl_rq(dl_rq); | ||
438 | ktime_t now, act; | ||
439 | ktime_t soft, hard; | ||
440 | unsigned long range; | ||
441 | s64 delta; | ||
442 | |||
443 | if (boosted) | ||
444 | return 0; | ||
445 | /* | ||
446 | * We want the timer to fire at the deadline, but considering | ||
447 | * that it is actually coming from rq->clock and not from | ||
448 | * hrtimer's time base reading. | ||
449 | */ | ||
450 | act = ns_to_ktime(dl_se->deadline); | ||
451 | now = hrtimer_cb_get_time(&dl_se->dl_timer); | ||
452 | delta = ktime_to_ns(now) - rq_clock(rq); | ||
453 | act = ktime_add_ns(act, delta); | ||
454 | |||
455 | /* | ||
456 | * If the expiry time already passed, e.g., because the value | ||
457 | * chosen as the deadline is too small, don't even try to | ||
458 | * start the timer in the past! | ||
459 | */ | ||
460 | if (ktime_us_delta(act, now) < 0) | ||
461 | return 0; | ||
462 | |||
463 | hrtimer_set_expires(&dl_se->dl_timer, act); | ||
464 | |||
465 | soft = hrtimer_get_softexpires(&dl_se->dl_timer); | ||
466 | hard = hrtimer_get_expires(&dl_se->dl_timer); | ||
467 | range = ktime_to_ns(ktime_sub(hard, soft)); | ||
468 | __hrtimer_start_range_ns(&dl_se->dl_timer, soft, | ||
469 | range, HRTIMER_MODE_ABS, 0); | ||
470 | |||
471 | return hrtimer_active(&dl_se->dl_timer); | ||
472 | } | ||
473 | |||
474 | /* | ||
475 | * This is the bandwidth enforcement timer callback. If here, we know | ||
476 | * a task is not on its dl_rq, since the fact that the timer was running | ||
477 | * means the task is throttled and needs a runtime replenishment. | ||
478 | * | ||
479 | * However, what we actually do depends on the fact the task is active, | ||
480 | * (it is on its rq) or has been removed from there by a call to | ||
481 | * dequeue_task_dl(). In the former case we must issue the runtime | ||
482 | * replenishment and add the task back to the dl_rq; in the latter, we just | ||
483 | * do nothing but clearing dl_throttled, so that runtime and deadline | ||
484 | * updating (and the queueing back to dl_rq) will be done by the | ||
485 | * next call to enqueue_task_dl(). | ||
486 | */ | ||
487 | static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | ||
488 | { | ||
489 | struct sched_dl_entity *dl_se = container_of(timer, | ||
490 | struct sched_dl_entity, | ||
491 | dl_timer); | ||
492 | struct task_struct *p = dl_task_of(dl_se); | ||
493 | struct rq *rq = task_rq(p); | ||
494 | raw_spin_lock(&rq->lock); | ||
495 | |||
496 | /* | ||
497 | * We need to take care of a possible races here. In fact, the | ||
498 | * task might have changed its scheduling policy to something | ||
499 | * different from SCHED_DEADLINE or changed its reservation | ||
500 | * parameters (through sched_setscheduler()). | ||
501 | */ | ||
502 | if (!dl_task(p) || dl_se->dl_new) | ||
503 | goto unlock; | ||
504 | |||
505 | sched_clock_tick(); | ||
506 | update_rq_clock(rq); | ||
507 | dl_se->dl_throttled = 0; | ||
508 | if (p->on_rq) { | ||
509 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | ||
510 | if (task_has_dl_policy(rq->curr)) | ||
511 | check_preempt_curr_dl(rq, p, 0); | ||
512 | else | ||
513 | resched_task(rq->curr); | ||
514 | #ifdef CONFIG_SMP | ||
515 | /* | ||
516 | * Queueing this task back might have overloaded rq, | ||
517 | * check if we need to kick someone away. | ||
518 | */ | ||
519 | if (has_pushable_dl_tasks(rq)) | ||
520 | push_dl_task(rq); | ||
521 | #endif | ||
522 | } | ||
523 | unlock: | ||
524 | raw_spin_unlock(&rq->lock); | ||
525 | |||
526 | return HRTIMER_NORESTART; | ||
527 | } | ||
528 | |||
529 | void init_dl_task_timer(struct sched_dl_entity *dl_se) | ||
530 | { | ||
531 | struct hrtimer *timer = &dl_se->dl_timer; | ||
532 | |||
533 | if (hrtimer_active(timer)) { | ||
534 | hrtimer_try_to_cancel(timer); | ||
535 | return; | ||
536 | } | ||
537 | |||
538 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
539 | timer->function = dl_task_timer; | ||
540 | } | ||
541 | |||
542 | static | ||
543 | int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) | ||
544 | { | ||
545 | int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq)); | ||
546 | int rorun = dl_se->runtime <= 0; | ||
547 | |||
548 | if (!rorun && !dmiss) | ||
549 | return 0; | ||
550 | |||
551 | /* | ||
552 | * If we are beyond our current deadline and we are still | ||
553 | * executing, then we have already used some of the runtime of | ||
554 | * the next instance. Thus, if we do not account that, we are | ||
555 | * stealing bandwidth from the system at each deadline miss! | ||
556 | */ | ||
557 | if (dmiss) { | ||
558 | dl_se->runtime = rorun ? dl_se->runtime : 0; | ||
559 | dl_se->runtime -= rq_clock(rq) - dl_se->deadline; | ||
560 | } | ||
561 | |||
562 | return 1; | ||
563 | } | ||
564 | |||
565 | extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); | ||
566 | |||
567 | /* | ||
568 | * Update the current task's runtime statistics (provided it is still | ||
569 | * a -deadline task and has not been removed from the dl_rq). | ||
570 | */ | ||
571 | static void update_curr_dl(struct rq *rq) | ||
572 | { | ||
573 | struct task_struct *curr = rq->curr; | ||
574 | struct sched_dl_entity *dl_se = &curr->dl; | ||
575 | u64 delta_exec; | ||
576 | |||
577 | if (!dl_task(curr) || !on_dl_rq(dl_se)) | ||
578 | return; | ||
579 | |||
580 | /* | ||
581 | * Consumed budget is computed considering the time as | ||
582 | * observed by schedulable tasks (excluding time spent | ||
583 | * in hardirq context, etc.). Deadlines are instead | ||
584 | * computed using hard walltime. This seems to be the more | ||
585 | * natural solution, but the full ramifications of this | ||
586 | * approach need further study. | ||
587 | */ | ||
588 | delta_exec = rq_clock_task(rq) - curr->se.exec_start; | ||
589 | if (unlikely((s64)delta_exec < 0)) | ||
590 | delta_exec = 0; | ||
591 | |||
592 | schedstat_set(curr->se.statistics.exec_max, | ||
593 | max(curr->se.statistics.exec_max, delta_exec)); | ||
594 | |||
595 | curr->se.sum_exec_runtime += delta_exec; | ||
596 | account_group_exec_runtime(curr, delta_exec); | ||
597 | |||
598 | curr->se.exec_start = rq_clock_task(rq); | ||
599 | cpuacct_charge(curr, delta_exec); | ||
600 | |||
601 | sched_rt_avg_update(rq, delta_exec); | ||
602 | |||
603 | dl_se->runtime -= delta_exec; | ||
604 | if (dl_runtime_exceeded(rq, dl_se)) { | ||
605 | __dequeue_task_dl(rq, curr, 0); | ||
606 | if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) | ||
607 | dl_se->dl_throttled = 1; | ||
608 | else | ||
609 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | ||
610 | |||
611 | if (!is_leftmost(curr, &rq->dl)) | ||
612 | resched_task(curr); | ||
613 | } | ||
614 | |||
615 | /* | ||
616 | * Because -- for now -- we share the rt bandwidth, we need to | ||
617 | * account our runtime there too, otherwise actual rt tasks | ||
618 | * would be able to exceed the shared quota. | ||
619 | * | ||
620 | * Account to the root rt group for now. | ||
621 | * | ||
622 | * The solution we're working towards is having the RT groups scheduled | ||
623 | * using deadline servers -- however there's a few nasties to figure | ||
624 | * out before that can happen. | ||
625 | */ | ||
626 | if (rt_bandwidth_enabled()) { | ||
627 | struct rt_rq *rt_rq = &rq->rt; | ||
628 | |||
629 | raw_spin_lock(&rt_rq->rt_runtime_lock); | ||
630 | /* | ||
631 | * We'll let actual RT tasks worry about the overflow here, we | ||
632 | * have our own CBS to keep us inline; only account when RT | ||
633 | * bandwidth is relevant. | ||
634 | */ | ||
635 | if (sched_rt_bandwidth_account(rt_rq)) | ||
636 | rt_rq->rt_time += delta_exec; | ||
637 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | ||
638 | } | ||
639 | } | ||
640 | |||
641 | #ifdef CONFIG_SMP | ||
642 | |||
643 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu); | ||
644 | |||
645 | static inline u64 next_deadline(struct rq *rq) | ||
646 | { | ||
647 | struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu); | ||
648 | |||
649 | if (next && dl_prio(next->prio)) | ||
650 | return next->dl.deadline; | ||
651 | else | ||
652 | return 0; | ||
653 | } | ||
654 | |||
655 | static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | ||
656 | { | ||
657 | struct rq *rq = rq_of_dl_rq(dl_rq); | ||
658 | |||
659 | if (dl_rq->earliest_dl.curr == 0 || | ||
660 | dl_time_before(deadline, dl_rq->earliest_dl.curr)) { | ||
661 | /* | ||
662 | * If the dl_rq had no -deadline tasks, or if the new task | ||
663 | * has shorter deadline than the current one on dl_rq, we | ||
664 | * know that the previous earliest becomes our next earliest, | ||
665 | * as the new task becomes the earliest itself. | ||
666 | */ | ||
667 | dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; | ||
668 | dl_rq->earliest_dl.curr = deadline; | ||
669 | cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1); | ||
670 | } else if (dl_rq->earliest_dl.next == 0 || | ||
671 | dl_time_before(deadline, dl_rq->earliest_dl.next)) { | ||
672 | /* | ||
673 | * On the other hand, if the new -deadline task has a | ||
674 | * a later deadline than the earliest one on dl_rq, but | ||
675 | * it is earlier than the next (if any), we must | ||
676 | * recompute the next-earliest. | ||
677 | */ | ||
678 | dl_rq->earliest_dl.next = next_deadline(rq); | ||
679 | } | ||
680 | } | ||
681 | |||
682 | static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | ||
683 | { | ||
684 | struct rq *rq = rq_of_dl_rq(dl_rq); | ||
685 | |||
686 | /* | ||
687 | * Since we may have removed our earliest (and/or next earliest) | ||
688 | * task we must recompute them. | ||
689 | */ | ||
690 | if (!dl_rq->dl_nr_running) { | ||
691 | dl_rq->earliest_dl.curr = 0; | ||
692 | dl_rq->earliest_dl.next = 0; | ||
693 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); | ||
694 | } else { | ||
695 | struct rb_node *leftmost = dl_rq->rb_leftmost; | ||
696 | struct sched_dl_entity *entry; | ||
697 | |||
698 | entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); | ||
699 | dl_rq->earliest_dl.curr = entry->deadline; | ||
700 | dl_rq->earliest_dl.next = next_deadline(rq); | ||
701 | cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1); | ||
702 | } | ||
703 | } | ||
704 | |||
705 | #else | ||
706 | |||
707 | static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} | ||
708 | static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} | ||
709 | |||
710 | #endif /* CONFIG_SMP */ | ||
711 | |||
712 | static inline | ||
713 | void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
714 | { | ||
715 | int prio = dl_task_of(dl_se)->prio; | ||
716 | u64 deadline = dl_se->deadline; | ||
717 | |||
718 | WARN_ON(!dl_prio(prio)); | ||
719 | dl_rq->dl_nr_running++; | ||
720 | inc_nr_running(rq_of_dl_rq(dl_rq)); | ||
721 | |||
722 | inc_dl_deadline(dl_rq, deadline); | ||
723 | inc_dl_migration(dl_se, dl_rq); | ||
724 | } | ||
725 | |||
726 | static inline | ||
727 | void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
728 | { | ||
729 | int prio = dl_task_of(dl_se)->prio; | ||
730 | |||
731 | WARN_ON(!dl_prio(prio)); | ||
732 | WARN_ON(!dl_rq->dl_nr_running); | ||
733 | dl_rq->dl_nr_running--; | ||
734 | dec_nr_running(rq_of_dl_rq(dl_rq)); | ||
735 | |||
736 | dec_dl_deadline(dl_rq, dl_se->deadline); | ||
737 | dec_dl_migration(dl_se, dl_rq); | ||
738 | } | ||
739 | |||
740 | static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) | ||
741 | { | ||
742 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | ||
743 | struct rb_node **link = &dl_rq->rb_root.rb_node; | ||
744 | struct rb_node *parent = NULL; | ||
745 | struct sched_dl_entity *entry; | ||
746 | int leftmost = 1; | ||
747 | |||
748 | BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); | ||
749 | |||
750 | while (*link) { | ||
751 | parent = *link; | ||
752 | entry = rb_entry(parent, struct sched_dl_entity, rb_node); | ||
753 | if (dl_time_before(dl_se->deadline, entry->deadline)) | ||
754 | link = &parent->rb_left; | ||
755 | else { | ||
756 | link = &parent->rb_right; | ||
757 | leftmost = 0; | ||
758 | } | ||
759 | } | ||
760 | |||
761 | if (leftmost) | ||
762 | dl_rq->rb_leftmost = &dl_se->rb_node; | ||
763 | |||
764 | rb_link_node(&dl_se->rb_node, parent, link); | ||
765 | rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root); | ||
766 | |||
767 | inc_dl_tasks(dl_se, dl_rq); | ||
768 | } | ||
769 | |||
770 | static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) | ||
771 | { | ||
772 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | ||
773 | |||
774 | if (RB_EMPTY_NODE(&dl_se->rb_node)) | ||
775 | return; | ||
776 | |||
777 | if (dl_rq->rb_leftmost == &dl_se->rb_node) { | ||
778 | struct rb_node *next_node; | ||
779 | |||
780 | next_node = rb_next(&dl_se->rb_node); | ||
781 | dl_rq->rb_leftmost = next_node; | ||
782 | } | ||
783 | |||
784 | rb_erase(&dl_se->rb_node, &dl_rq->rb_root); | ||
785 | RB_CLEAR_NODE(&dl_se->rb_node); | ||
786 | |||
787 | dec_dl_tasks(dl_se, dl_rq); | ||
788 | } | ||
789 | |||
790 | static void | ||
791 | enqueue_dl_entity(struct sched_dl_entity *dl_se, | ||
792 | struct sched_dl_entity *pi_se, int flags) | ||
793 | { | ||
794 | BUG_ON(on_dl_rq(dl_se)); | ||
795 | |||
796 | /* | ||
797 | * If this is a wakeup or a new instance, the scheduling | ||
798 | * parameters of the task might need updating. Otherwise, | ||
799 | * we want a replenishment of its runtime. | ||
800 | */ | ||
801 | if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) | ||
802 | replenish_dl_entity(dl_se, pi_se); | ||
803 | else | ||
804 | update_dl_entity(dl_se, pi_se); | ||
805 | |||
806 | __enqueue_dl_entity(dl_se); | ||
807 | } | ||
808 | |||
809 | static void dequeue_dl_entity(struct sched_dl_entity *dl_se) | ||
810 | { | ||
811 | __dequeue_dl_entity(dl_se); | ||
812 | } | ||
813 | |||
814 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | ||
815 | { | ||
816 | struct task_struct *pi_task = rt_mutex_get_top_task(p); | ||
817 | struct sched_dl_entity *pi_se = &p->dl; | ||
818 | |||
819 | /* | ||
820 | * Use the scheduling parameters of the top pi-waiter | ||
821 | * task if we have one and its (relative) deadline is | ||
822 | * smaller than our one... OTW we keep our runtime and | ||
823 | * deadline. | ||
824 | */ | ||
825 | if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) | ||
826 | pi_se = &pi_task->dl; | ||
827 | |||
828 | /* | ||
829 | * If p is throttled, we do nothing. In fact, if it exhausted | ||
830 | * its budget it needs a replenishment and, since it now is on | ||
831 | * its rq, the bandwidth timer callback (which clearly has not | ||
832 | * run yet) will take care of this. | ||
833 | */ | ||
834 | if (p->dl.dl_throttled) | ||
835 | return; | ||
836 | |||
837 | enqueue_dl_entity(&p->dl, pi_se, flags); | ||
838 | |||
839 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | ||
840 | enqueue_pushable_dl_task(rq, p); | ||
841 | } | ||
842 | |||
843 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | ||
844 | { | ||
845 | dequeue_dl_entity(&p->dl); | ||
846 | dequeue_pushable_dl_task(rq, p); | ||
847 | } | ||
848 | |||
849 | static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | ||
850 | { | ||
851 | update_curr_dl(rq); | ||
852 | __dequeue_task_dl(rq, p, flags); | ||
853 | } | ||
854 | |||
855 | /* | ||
856 | * Yield task semantic for -deadline tasks is: | ||
857 | * | ||
858 | * get off from the CPU until our next instance, with | ||
859 | * a new runtime. This is of little use now, since we | ||
860 | * don't have a bandwidth reclaiming mechanism. Anyway, | ||
861 | * bandwidth reclaiming is planned for the future, and | ||
862 | * yield_task_dl will indicate that some spare budget | ||
863 | * is available for other task instances to use it. | ||
864 | */ | ||
865 | static void yield_task_dl(struct rq *rq) | ||
866 | { | ||
867 | struct task_struct *p = rq->curr; | ||
868 | |||
869 | /* | ||
870 | * We make the task go to sleep until its current deadline by | ||
871 | * forcing its runtime to zero. This way, update_curr_dl() stops | ||
872 | * it and the bandwidth timer will wake it up and will give it | ||
873 | * new scheduling parameters (thanks to dl_new=1). | ||
874 | */ | ||
875 | if (p->dl.runtime > 0) { | ||
876 | rq->curr->dl.dl_new = 1; | ||
877 | p->dl.runtime = 0; | ||
878 | } | ||
879 | update_curr_dl(rq); | ||
880 | } | ||
881 | |||
882 | #ifdef CONFIG_SMP | ||
883 | |||
884 | static int find_later_rq(struct task_struct *task); | ||
885 | |||
886 | static int | ||
887 | select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) | ||
888 | { | ||
889 | struct task_struct *curr; | ||
890 | struct rq *rq; | ||
891 | |||
892 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) | ||
893 | goto out; | ||
894 | |||
895 | rq = cpu_rq(cpu); | ||
896 | |||
897 | rcu_read_lock(); | ||
898 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | ||
899 | |||
900 | /* | ||
901 | * If we are dealing with a -deadline task, we must | ||
902 | * decide where to wake it up. | ||
903 | * If it has a later deadline and the current task | ||
904 | * on this rq can't move (provided the waking task | ||
905 | * can!) we prefer to send it somewhere else. On the | ||
906 | * other hand, if it has a shorter deadline, we | ||
907 | * try to make it stay here, it might be important. | ||
908 | */ | ||
909 | if (unlikely(dl_task(curr)) && | ||
910 | (curr->nr_cpus_allowed < 2 || | ||
911 | !dl_entity_preempt(&p->dl, &curr->dl)) && | ||
912 | (p->nr_cpus_allowed > 1)) { | ||
913 | int target = find_later_rq(p); | ||
914 | |||
915 | if (target != -1) | ||
916 | cpu = target; | ||
917 | } | ||
918 | rcu_read_unlock(); | ||
919 | |||
920 | out: | ||
921 | return cpu; | ||
922 | } | ||
923 | |||
924 | static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | ||
925 | { | ||
926 | /* | ||
927 | * Current can't be migrated, useless to reschedule, | ||
928 | * let's hope p can move out. | ||
929 | */ | ||
930 | if (rq->curr->nr_cpus_allowed == 1 || | ||
931 | cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) | ||
932 | return; | ||
933 | |||
934 | /* | ||
935 | * p is migratable, so let's not schedule it and | ||
936 | * see if it is pushed or pulled somewhere else. | ||
937 | */ | ||
938 | if (p->nr_cpus_allowed != 1 && | ||
939 | cpudl_find(&rq->rd->cpudl, p, NULL) != -1) | ||
940 | return; | ||
941 | |||
942 | resched_task(rq->curr); | ||
943 | } | ||
944 | |||
945 | #endif /* CONFIG_SMP */ | ||
946 | |||
947 | /* | ||
948 | * Only called when both the current and waking task are -deadline | ||
949 | * tasks. | ||
950 | */ | ||
951 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | ||
952 | int flags) | ||
953 | { | ||
954 | if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { | ||
955 | resched_task(rq->curr); | ||
956 | return; | ||
957 | } | ||
958 | |||
959 | #ifdef CONFIG_SMP | ||
960 | /* | ||
961 | * In the unlikely case current and p have the same deadline | ||
962 | * let us try to decide what's the best thing to do... | ||
963 | */ | ||
964 | if ((p->dl.deadline == rq->curr->dl.deadline) && | ||
965 | !test_tsk_need_resched(rq->curr)) | ||
966 | check_preempt_equal_dl(rq, p); | ||
967 | #endif /* CONFIG_SMP */ | ||
968 | } | ||
969 | |||
970 | #ifdef CONFIG_SCHED_HRTICK | ||
971 | static void start_hrtick_dl(struct rq *rq, struct task_struct *p) | ||
972 | { | ||
973 | s64 delta = p->dl.dl_runtime - p->dl.runtime; | ||
974 | |||
975 | if (delta > 10000) | ||
976 | hrtick_start(rq, p->dl.runtime); | ||
977 | } | ||
978 | #endif | ||
979 | |||
980 | static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, | ||
981 | struct dl_rq *dl_rq) | ||
982 | { | ||
983 | struct rb_node *left = dl_rq->rb_leftmost; | ||
984 | |||
985 | if (!left) | ||
986 | return NULL; | ||
987 | |||
988 | return rb_entry(left, struct sched_dl_entity, rb_node); | ||
989 | } | ||
990 | |||
991 | struct task_struct *pick_next_task_dl(struct rq *rq) | ||
992 | { | ||
993 | struct sched_dl_entity *dl_se; | ||
994 | struct task_struct *p; | ||
995 | struct dl_rq *dl_rq; | ||
996 | |||
997 | dl_rq = &rq->dl; | ||
998 | |||
999 | if (unlikely(!dl_rq->dl_nr_running)) | ||
1000 | return NULL; | ||
1001 | |||
1002 | dl_se = pick_next_dl_entity(rq, dl_rq); | ||
1003 | BUG_ON(!dl_se); | ||
1004 | |||
1005 | p = dl_task_of(dl_se); | ||
1006 | p->se.exec_start = rq_clock_task(rq); | ||
1007 | |||
1008 | /* Running task will never be pushed. */ | ||
1009 | dequeue_pushable_dl_task(rq, p); | ||
1010 | |||
1011 | #ifdef CONFIG_SCHED_HRTICK | ||
1012 | if (hrtick_enabled(rq)) | ||
1013 | start_hrtick_dl(rq, p); | ||
1014 | #endif | ||
1015 | |||
1016 | #ifdef CONFIG_SMP | ||
1017 | rq->post_schedule = has_pushable_dl_tasks(rq); | ||
1018 | #endif /* CONFIG_SMP */ | ||
1019 | |||
1020 | return p; | ||
1021 | } | ||
1022 | |||
1023 | static void put_prev_task_dl(struct rq *rq, struct task_struct *p) | ||
1024 | { | ||
1025 | update_curr_dl(rq); | ||
1026 | |||
1027 | if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) | ||
1028 | enqueue_pushable_dl_task(rq, p); | ||
1029 | } | ||
1030 | |||
1031 | static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) | ||
1032 | { | ||
1033 | update_curr_dl(rq); | ||
1034 | |||
1035 | #ifdef CONFIG_SCHED_HRTICK | ||
1036 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) | ||
1037 | start_hrtick_dl(rq, p); | ||
1038 | #endif | ||
1039 | } | ||
1040 | |||
1041 | static void task_fork_dl(struct task_struct *p) | ||
1042 | { | ||
1043 | /* | ||
1044 | * SCHED_DEADLINE tasks cannot fork and this is achieved through | ||
1045 | * sched_fork() | ||
1046 | */ | ||
1047 | } | ||
1048 | |||
1049 | static void task_dead_dl(struct task_struct *p) | ||
1050 | { | ||
1051 | struct hrtimer *timer = &p->dl.dl_timer; | ||
1052 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | ||
1053 | |||
1054 | /* | ||
1055 | * Since we are TASK_DEAD we won't slip out of the domain! | ||
1056 | */ | ||
1057 | raw_spin_lock_irq(&dl_b->lock); | ||
1058 | dl_b->total_bw -= p->dl.dl_bw; | ||
1059 | raw_spin_unlock_irq(&dl_b->lock); | ||
1060 | |||
1061 | hrtimer_cancel(timer); | ||
1062 | } | ||
1063 | |||
1064 | static void set_curr_task_dl(struct rq *rq) | ||
1065 | { | ||
1066 | struct task_struct *p = rq->curr; | ||
1067 | |||
1068 | p->se.exec_start = rq_clock_task(rq); | ||
1069 | |||
1070 | /* You can't push away the running task */ | ||
1071 | dequeue_pushable_dl_task(rq, p); | ||
1072 | } | ||
1073 | |||
1074 | #ifdef CONFIG_SMP | ||
1075 | |||
1076 | /* Only try algorithms three times */ | ||
1077 | #define DL_MAX_TRIES 3 | ||
1078 | |||
1079 | static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) | ||
1080 | { | ||
1081 | if (!task_running(rq, p) && | ||
1082 | (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && | ||
1083 | (p->nr_cpus_allowed > 1)) | ||
1084 | return 1; | ||
1085 | |||
1086 | return 0; | ||
1087 | } | ||
1088 | |||
1089 | /* Returns the second earliest -deadline task, NULL otherwise */ | ||
1090 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu) | ||
1091 | { | ||
1092 | struct rb_node *next_node = rq->dl.rb_leftmost; | ||
1093 | struct sched_dl_entity *dl_se; | ||
1094 | struct task_struct *p = NULL; | ||
1095 | |||
1096 | next_node: | ||
1097 | next_node = rb_next(next_node); | ||
1098 | if (next_node) { | ||
1099 | dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node); | ||
1100 | p = dl_task_of(dl_se); | ||
1101 | |||
1102 | if (pick_dl_task(rq, p, cpu)) | ||
1103 | return p; | ||
1104 | |||
1105 | goto next_node; | ||
1106 | } | ||
1107 | |||
1108 | return NULL; | ||
1109 | } | ||
1110 | |||
1111 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); | ||
1112 | |||
1113 | static int find_later_rq(struct task_struct *task) | ||
1114 | { | ||
1115 | struct sched_domain *sd; | ||
1116 | struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); | ||
1117 | int this_cpu = smp_processor_id(); | ||
1118 | int best_cpu, cpu = task_cpu(task); | ||
1119 | |||
1120 | /* Make sure the mask is initialized first */ | ||
1121 | if (unlikely(!later_mask)) | ||
1122 | return -1; | ||
1123 | |||
1124 | if (task->nr_cpus_allowed == 1) | ||
1125 | return -1; | ||
1126 | |||
1127 | best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, | ||
1128 | task, later_mask); | ||
1129 | if (best_cpu == -1) | ||
1130 | return -1; | ||
1131 | |||
1132 | /* | ||
1133 | * If we are here, some target has been found, | ||
1134 | * the most suitable of which is cached in best_cpu. | ||
1135 | * This is, among the runqueues where the current tasks | ||
1136 | * have later deadlines than the task's one, the rq | ||
1137 | * with the latest possible one. | ||
1138 | * | ||
1139 | * Now we check how well this matches with task's | ||
1140 | * affinity and system topology. | ||
1141 | * | ||
1142 | * The last cpu where the task run is our first | ||
1143 | * guess, since it is most likely cache-hot there. | ||
1144 | */ | ||
1145 | if (cpumask_test_cpu(cpu, later_mask)) | ||
1146 | return cpu; | ||
1147 | /* | ||
1148 | * Check if this_cpu is to be skipped (i.e., it is | ||
1149 | * not in the mask) or not. | ||
1150 | */ | ||
1151 | if (!cpumask_test_cpu(this_cpu, later_mask)) | ||
1152 | this_cpu = -1; | ||
1153 | |||
1154 | rcu_read_lock(); | ||
1155 | for_each_domain(cpu, sd) { | ||
1156 | if (sd->flags & SD_WAKE_AFFINE) { | ||
1157 | |||
1158 | /* | ||
1159 | * If possible, preempting this_cpu is | ||
1160 | * cheaper than migrating. | ||
1161 | */ | ||
1162 | if (this_cpu != -1 && | ||
1163 | cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { | ||
1164 | rcu_read_unlock(); | ||
1165 | return this_cpu; | ||
1166 | } | ||
1167 | |||
1168 | /* | ||
1169 | * Last chance: if best_cpu is valid and is | ||
1170 | * in the mask, that becomes our choice. | ||
1171 | */ | ||
1172 | if (best_cpu < nr_cpu_ids && | ||
1173 | cpumask_test_cpu(best_cpu, sched_domain_span(sd))) { | ||
1174 | rcu_read_unlock(); | ||
1175 | return best_cpu; | ||
1176 | } | ||
1177 | } | ||
1178 | } | ||
1179 | rcu_read_unlock(); | ||
1180 | |||
1181 | /* | ||
1182 | * At this point, all our guesses failed, we just return | ||
1183 | * 'something', and let the caller sort the things out. | ||
1184 | */ | ||
1185 | if (this_cpu != -1) | ||
1186 | return this_cpu; | ||
1187 | |||
1188 | cpu = cpumask_any(later_mask); | ||
1189 | if (cpu < nr_cpu_ids) | ||
1190 | return cpu; | ||
1191 | |||
1192 | return -1; | ||
1193 | } | ||
1194 | |||
1195 | /* Locks the rq it finds */ | ||
1196 | static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) | ||
1197 | { | ||
1198 | struct rq *later_rq = NULL; | ||
1199 | int tries; | ||
1200 | int cpu; | ||
1201 | |||
1202 | for (tries = 0; tries < DL_MAX_TRIES; tries++) { | ||
1203 | cpu = find_later_rq(task); | ||
1204 | |||
1205 | if ((cpu == -1) || (cpu == rq->cpu)) | ||
1206 | break; | ||
1207 | |||
1208 | later_rq = cpu_rq(cpu); | ||
1209 | |||
1210 | /* Retry if something changed. */ | ||
1211 | if (double_lock_balance(rq, later_rq)) { | ||
1212 | if (unlikely(task_rq(task) != rq || | ||
1213 | !cpumask_test_cpu(later_rq->cpu, | ||
1214 | &task->cpus_allowed) || | ||
1215 | task_running(rq, task) || !task->on_rq)) { | ||
1216 | double_unlock_balance(rq, later_rq); | ||
1217 | later_rq = NULL; | ||
1218 | break; | ||
1219 | } | ||
1220 | } | ||
1221 | |||
1222 | /* | ||
1223 | * If the rq we found has no -deadline task, or | ||
1224 | * its earliest one has a later deadline than our | ||
1225 | * task, the rq is a good one. | ||
1226 | */ | ||
1227 | if (!later_rq->dl.dl_nr_running || | ||
1228 | dl_time_before(task->dl.deadline, | ||
1229 | later_rq->dl.earliest_dl.curr)) | ||
1230 | break; | ||
1231 | |||
1232 | /* Otherwise we try again. */ | ||
1233 | double_unlock_balance(rq, later_rq); | ||
1234 | later_rq = NULL; | ||
1235 | } | ||
1236 | |||
1237 | return later_rq; | ||
1238 | } | ||
1239 | |||
1240 | static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) | ||
1241 | { | ||
1242 | struct task_struct *p; | ||
1243 | |||
1244 | if (!has_pushable_dl_tasks(rq)) | ||
1245 | return NULL; | ||
1246 | |||
1247 | p = rb_entry(rq->dl.pushable_dl_tasks_leftmost, | ||
1248 | struct task_struct, pushable_dl_tasks); | ||
1249 | |||
1250 | BUG_ON(rq->cpu != task_cpu(p)); | ||
1251 | BUG_ON(task_current(rq, p)); | ||
1252 | BUG_ON(p->nr_cpus_allowed <= 1); | ||
1253 | |||
1254 | BUG_ON(!p->on_rq); | ||
1255 | BUG_ON(!dl_task(p)); | ||
1256 | |||
1257 | return p; | ||
1258 | } | ||
1259 | |||
1260 | /* | ||
1261 | * See if the non running -deadline tasks on this rq | ||
1262 | * can be sent to some other CPU where they can preempt | ||
1263 | * and start executing. | ||
1264 | */ | ||
1265 | static int push_dl_task(struct rq *rq) | ||
1266 | { | ||
1267 | struct task_struct *next_task; | ||
1268 | struct rq *later_rq; | ||
1269 | |||
1270 | if (!rq->dl.overloaded) | ||
1271 | return 0; | ||
1272 | |||
1273 | next_task = pick_next_pushable_dl_task(rq); | ||
1274 | if (!next_task) | ||
1275 | return 0; | ||
1276 | |||
1277 | retry: | ||
1278 | if (unlikely(next_task == rq->curr)) { | ||
1279 | WARN_ON(1); | ||
1280 | return 0; | ||
1281 | } | ||
1282 | |||
1283 | /* | ||
1284 | * If next_task preempts rq->curr, and rq->curr | ||
1285 | * can move away, it makes sense to just reschedule | ||
1286 | * without going further in pushing next_task. | ||
1287 | */ | ||
1288 | if (dl_task(rq->curr) && | ||
1289 | dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && | ||
1290 | rq->curr->nr_cpus_allowed > 1) { | ||
1291 | resched_task(rq->curr); | ||
1292 | return 0; | ||
1293 | } | ||
1294 | |||
1295 | /* We might release rq lock */ | ||
1296 | get_task_struct(next_task); | ||
1297 | |||
1298 | /* Will lock the rq it'll find */ | ||
1299 | later_rq = find_lock_later_rq(next_task, rq); | ||
1300 | if (!later_rq) { | ||
1301 | struct task_struct *task; | ||
1302 | |||
1303 | /* | ||
1304 | * We must check all this again, since | ||
1305 | * find_lock_later_rq releases rq->lock and it is | ||
1306 | * then possible that next_task has migrated. | ||
1307 | */ | ||
1308 | task = pick_next_pushable_dl_task(rq); | ||
1309 | if (task_cpu(next_task) == rq->cpu && task == next_task) { | ||
1310 | /* | ||
1311 | * The task is still there. We don't try | ||
1312 | * again, some other cpu will pull it when ready. | ||
1313 | */ | ||
1314 | dequeue_pushable_dl_task(rq, next_task); | ||
1315 | goto out; | ||
1316 | } | ||
1317 | |||
1318 | if (!task) | ||
1319 | /* No more tasks */ | ||
1320 | goto out; | ||
1321 | |||
1322 | put_task_struct(next_task); | ||
1323 | next_task = task; | ||
1324 | goto retry; | ||
1325 | } | ||
1326 | |||
1327 | deactivate_task(rq, next_task, 0); | ||
1328 | set_task_cpu(next_task, later_rq->cpu); | ||
1329 | activate_task(later_rq, next_task, 0); | ||
1330 | |||
1331 | resched_task(later_rq->curr); | ||
1332 | |||
1333 | double_unlock_balance(rq, later_rq); | ||
1334 | |||
1335 | out: | ||
1336 | put_task_struct(next_task); | ||
1337 | |||
1338 | return 1; | ||
1339 | } | ||
1340 | |||
1341 | static void push_dl_tasks(struct rq *rq) | ||
1342 | { | ||
1343 | /* Terminates as it moves a -deadline task */ | ||
1344 | while (push_dl_task(rq)) | ||
1345 | ; | ||
1346 | } | ||
1347 | |||
1348 | static int pull_dl_task(struct rq *this_rq) | ||
1349 | { | ||
1350 | int this_cpu = this_rq->cpu, ret = 0, cpu; | ||
1351 | struct task_struct *p; | ||
1352 | struct rq *src_rq; | ||
1353 | u64 dmin = LONG_MAX; | ||
1354 | |||
1355 | if (likely(!dl_overloaded(this_rq))) | ||
1356 | return 0; | ||
1357 | |||
1358 | /* | ||
1359 | * Match the barrier from dl_set_overloaded; this guarantees that if we | ||
1360 | * see overloaded we must also see the dlo_mask bit. | ||
1361 | */ | ||
1362 | smp_rmb(); | ||
1363 | |||
1364 | for_each_cpu(cpu, this_rq->rd->dlo_mask) { | ||
1365 | if (this_cpu == cpu) | ||
1366 | continue; | ||
1367 | |||
1368 | src_rq = cpu_rq(cpu); | ||
1369 | |||
1370 | /* | ||
1371 | * It looks racy, abd it is! However, as in sched_rt.c, | ||
1372 | * we are fine with this. | ||
1373 | */ | ||
1374 | if (this_rq->dl.dl_nr_running && | ||
1375 | dl_time_before(this_rq->dl.earliest_dl.curr, | ||
1376 | src_rq->dl.earliest_dl.next)) | ||
1377 | continue; | ||
1378 | |||
1379 | /* Might drop this_rq->lock */ | ||
1380 | double_lock_balance(this_rq, src_rq); | ||
1381 | |||
1382 | /* | ||
1383 | * If there are no more pullable tasks on the | ||
1384 | * rq, we're done with it. | ||
1385 | */ | ||
1386 | if (src_rq->dl.dl_nr_running <= 1) | ||
1387 | goto skip; | ||
1388 | |||
1389 | p = pick_next_earliest_dl_task(src_rq, this_cpu); | ||
1390 | |||
1391 | /* | ||
1392 | * We found a task to be pulled if: | ||
1393 | * - it preempts our current (if there's one), | ||
1394 | * - it will preempt the last one we pulled (if any). | ||
1395 | */ | ||
1396 | if (p && dl_time_before(p->dl.deadline, dmin) && | ||
1397 | (!this_rq->dl.dl_nr_running || | ||
1398 | dl_time_before(p->dl.deadline, | ||
1399 | this_rq->dl.earliest_dl.curr))) { | ||
1400 | WARN_ON(p == src_rq->curr); | ||
1401 | WARN_ON(!p->on_rq); | ||
1402 | |||
1403 | /* | ||
1404 | * Then we pull iff p has actually an earlier | ||
1405 | * deadline than the current task of its runqueue. | ||
1406 | */ | ||
1407 | if (dl_time_before(p->dl.deadline, | ||
1408 | src_rq->curr->dl.deadline)) | ||
1409 | goto skip; | ||
1410 | |||
1411 | ret = 1; | ||
1412 | |||
1413 | deactivate_task(src_rq, p, 0); | ||
1414 | set_task_cpu(p, this_cpu); | ||
1415 | activate_task(this_rq, p, 0); | ||
1416 | dmin = p->dl.deadline; | ||
1417 | |||
1418 | /* Is there any other task even earlier? */ | ||
1419 | } | ||
1420 | skip: | ||
1421 | double_unlock_balance(this_rq, src_rq); | ||
1422 | } | ||
1423 | |||
1424 | return ret; | ||
1425 | } | ||
1426 | |||
1427 | static void pre_schedule_dl(struct rq *rq, struct task_struct *prev) | ||
1428 | { | ||
1429 | /* Try to pull other tasks here */ | ||
1430 | if (dl_task(prev)) | ||
1431 | pull_dl_task(rq); | ||
1432 | } | ||
1433 | |||
1434 | static void post_schedule_dl(struct rq *rq) | ||
1435 | { | ||
1436 | push_dl_tasks(rq); | ||
1437 | } | ||
1438 | |||
1439 | /* | ||
1440 | * Since the task is not running and a reschedule is not going to happen | ||
1441 | * anytime soon on its runqueue, we try pushing it away now. | ||
1442 | */ | ||
1443 | static void task_woken_dl(struct rq *rq, struct task_struct *p) | ||
1444 | { | ||
1445 | if (!task_running(rq, p) && | ||
1446 | !test_tsk_need_resched(rq->curr) && | ||
1447 | has_pushable_dl_tasks(rq) && | ||
1448 | p->nr_cpus_allowed > 1 && | ||
1449 | dl_task(rq->curr) && | ||
1450 | (rq->curr->nr_cpus_allowed < 2 || | ||
1451 | dl_entity_preempt(&rq->curr->dl, &p->dl))) { | ||
1452 | push_dl_tasks(rq); | ||
1453 | } | ||
1454 | } | ||
1455 | |||
1456 | static void set_cpus_allowed_dl(struct task_struct *p, | ||
1457 | const struct cpumask *new_mask) | ||
1458 | { | ||
1459 | struct rq *rq; | ||
1460 | int weight; | ||
1461 | |||
1462 | BUG_ON(!dl_task(p)); | ||
1463 | |||
1464 | /* | ||
1465 | * Update only if the task is actually running (i.e., | ||
1466 | * it is on the rq AND it is not throttled). | ||
1467 | */ | ||
1468 | if (!on_dl_rq(&p->dl)) | ||
1469 | return; | ||
1470 | |||
1471 | weight = cpumask_weight(new_mask); | ||
1472 | |||
1473 | /* | ||
1474 | * Only update if the process changes its state from whether it | ||
1475 | * can migrate or not. | ||
1476 | */ | ||
1477 | if ((p->nr_cpus_allowed > 1) == (weight > 1)) | ||
1478 | return; | ||
1479 | |||
1480 | rq = task_rq(p); | ||
1481 | |||
1482 | /* | ||
1483 | * The process used to be able to migrate OR it can now migrate | ||
1484 | */ | ||
1485 | if (weight <= 1) { | ||
1486 | if (!task_current(rq, p)) | ||
1487 | dequeue_pushable_dl_task(rq, p); | ||
1488 | BUG_ON(!rq->dl.dl_nr_migratory); | ||
1489 | rq->dl.dl_nr_migratory--; | ||
1490 | } else { | ||
1491 | if (!task_current(rq, p)) | ||
1492 | enqueue_pushable_dl_task(rq, p); | ||
1493 | rq->dl.dl_nr_migratory++; | ||
1494 | } | ||
1495 | |||
1496 | update_dl_migration(&rq->dl); | ||
1497 | } | ||
1498 | |||
1499 | /* Assumes rq->lock is held */ | ||
1500 | static void rq_online_dl(struct rq *rq) | ||
1501 | { | ||
1502 | if (rq->dl.overloaded) | ||
1503 | dl_set_overload(rq); | ||
1504 | |||
1505 | if (rq->dl.dl_nr_running > 0) | ||
1506 | cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); | ||
1507 | } | ||
1508 | |||
1509 | /* Assumes rq->lock is held */ | ||
1510 | static void rq_offline_dl(struct rq *rq) | ||
1511 | { | ||
1512 | if (rq->dl.overloaded) | ||
1513 | dl_clear_overload(rq); | ||
1514 | |||
1515 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); | ||
1516 | } | ||
1517 | |||
1518 | void init_sched_dl_class(void) | ||
1519 | { | ||
1520 | unsigned int i; | ||
1521 | |||
1522 | for_each_possible_cpu(i) | ||
1523 | zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i), | ||
1524 | GFP_KERNEL, cpu_to_node(i)); | ||
1525 | } | ||
1526 | |||
1527 | #endif /* CONFIG_SMP */ | ||
1528 | |||
1529 | static void switched_from_dl(struct rq *rq, struct task_struct *p) | ||
1530 | { | ||
1531 | if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy)) | ||
1532 | hrtimer_try_to_cancel(&p->dl.dl_timer); | ||
1533 | |||
1534 | #ifdef CONFIG_SMP | ||
1535 | /* | ||
1536 | * Since this might be the only -deadline task on the rq, | ||
1537 | * this is the right place to try to pull some other one | ||
1538 | * from an overloaded cpu, if any. | ||
1539 | */ | ||
1540 | if (!rq->dl.dl_nr_running) | ||
1541 | pull_dl_task(rq); | ||
1542 | #endif | ||
1543 | } | ||
1544 | |||
1545 | /* | ||
1546 | * When switching to -deadline, we may overload the rq, then | ||
1547 | * we try to push someone off, if possible. | ||
1548 | */ | ||
1549 | static void switched_to_dl(struct rq *rq, struct task_struct *p) | ||
1550 | { | ||
1551 | int check_resched = 1; | ||
1552 | |||
1553 | /* | ||
1554 | * If p is throttled, don't consider the possibility | ||
1555 | * of preempting rq->curr, the check will be done right | ||
1556 | * after its runtime will get replenished. | ||
1557 | */ | ||
1558 | if (unlikely(p->dl.dl_throttled)) | ||
1559 | return; | ||
1560 | |||
1561 | if (p->on_rq || rq->curr != p) { | ||
1562 | #ifdef CONFIG_SMP | ||
1563 | if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p)) | ||
1564 | /* Only reschedule if pushing failed */ | ||
1565 | check_resched = 0; | ||
1566 | #endif /* CONFIG_SMP */ | ||
1567 | if (check_resched && task_has_dl_policy(rq->curr)) | ||
1568 | check_preempt_curr_dl(rq, p, 0); | ||
1569 | } | ||
1570 | } | ||
1571 | |||
1572 | /* | ||
1573 | * If the scheduling parameters of a -deadline task changed, | ||
1574 | * a push or pull operation might be needed. | ||
1575 | */ | ||
1576 | static void prio_changed_dl(struct rq *rq, struct task_struct *p, | ||
1577 | int oldprio) | ||
1578 | { | ||
1579 | if (p->on_rq || rq->curr == p) { | ||
1580 | #ifdef CONFIG_SMP | ||
1581 | /* | ||
1582 | * This might be too much, but unfortunately | ||
1583 | * we don't have the old deadline value, and | ||
1584 | * we can't argue if the task is increasing | ||
1585 | * or lowering its prio, so... | ||
1586 | */ | ||
1587 | if (!rq->dl.overloaded) | ||
1588 | pull_dl_task(rq); | ||
1589 | |||
1590 | /* | ||
1591 | * If we now have a earlier deadline task than p, | ||
1592 | * then reschedule, provided p is still on this | ||
1593 | * runqueue. | ||
1594 | */ | ||
1595 | if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) && | ||
1596 | rq->curr == p) | ||
1597 | resched_task(p); | ||
1598 | #else | ||
1599 | /* | ||
1600 | * Again, we don't know if p has a earlier | ||
1601 | * or later deadline, so let's blindly set a | ||
1602 | * (maybe not needed) rescheduling point. | ||
1603 | */ | ||
1604 | resched_task(p); | ||
1605 | #endif /* CONFIG_SMP */ | ||
1606 | } else | ||
1607 | switched_to_dl(rq, p); | ||
1608 | } | ||
1609 | |||
1610 | const struct sched_class dl_sched_class = { | ||
1611 | .next = &rt_sched_class, | ||
1612 | .enqueue_task = enqueue_task_dl, | ||
1613 | .dequeue_task = dequeue_task_dl, | ||
1614 | .yield_task = yield_task_dl, | ||
1615 | |||
1616 | .check_preempt_curr = check_preempt_curr_dl, | ||
1617 | |||
1618 | .pick_next_task = pick_next_task_dl, | ||
1619 | .put_prev_task = put_prev_task_dl, | ||
1620 | |||
1621 | #ifdef CONFIG_SMP | ||
1622 | .select_task_rq = select_task_rq_dl, | ||
1623 | .set_cpus_allowed = set_cpus_allowed_dl, | ||
1624 | .rq_online = rq_online_dl, | ||
1625 | .rq_offline = rq_offline_dl, | ||
1626 | .pre_schedule = pre_schedule_dl, | ||
1627 | .post_schedule = post_schedule_dl, | ||
1628 | .task_woken = task_woken_dl, | ||
1629 | #endif | ||
1630 | |||
1631 | .set_curr_task = set_curr_task_dl, | ||
1632 | .task_tick = task_tick_dl, | ||
1633 | .task_fork = task_fork_dl, | ||
1634 | .task_dead = task_dead_dl, | ||
1635 | |||
1636 | .prio_changed = prio_changed_dl, | ||
1637 | .switched_from = switched_from_dl, | ||
1638 | .switched_to = switched_to_dl, | ||
1639 | }; | ||