diff options
author | Dario Faggioli <raistlin@linux.it> | 2013-11-07 08:43:44 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-01-13 07:42:56 -0500 |
commit | 2d3d891d3344159d5b452a645e355bbe29591e8b (patch) | |
tree | ab7c4ef00b48d68efa2d57cabf8c3c86160f2406 /kernel/sched | |
parent | fb00aca474405f4fa8a8519c3179fed722eabd83 (diff) |
sched/deadline: Add SCHED_DEADLINE inheritance logic
Some method to deal with rt-mutexes and make sched_dl interact with
the current PI-coded is needed, raising all but trivial issues, that
needs (according to us) to be solved with some restructuring of
the pi-code (i.e., going toward a proxy execution-ish implementation).
This is under development, in the meanwhile, as a temporary solution,
what this commits does is:
- ensure a pi-lock owner with waiters is never throttled down. Instead,
when it runs out of runtime, it immediately gets replenished and it's
deadline is postponed;
- the scheduling parameters (relative deadline and default runtime)
used for that replenishments --during the whole period it holds the
pi-lock-- are the ones of the waiting task with earliest deadline.
Acting this way, we provide some kind of boosting to the lock-owner,
still by using the existing (actually, slightly modified by the previous
commit) pi-architecture.
We would stress the fact that this is only a surely needed, all but
clean solution to the problem. In the end it's only a way to re-start
discussion within the community. So, as always, comments, ideas, rants,
etc.. are welcome! :-)
Signed-off-by: Dario Faggioli <raistlin@linux.it>
Signed-off-by: Juri Lelli <juri.lelli@gmail.com>
[ Added !RT_MUTEXES build fix. ]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1383831828-15501-11-git-send-email-juri.lelli@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 36 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 91 | ||||
-rw-r--r-- | kernel/sched/sched.h | 14 |
3 files changed, 97 insertions, 44 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index aebcc70b5c93..599ee3b11b44 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -947,7 +947,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
947 | if (prev_class->switched_from) | 947 | if (prev_class->switched_from) |
948 | prev_class->switched_from(rq, p); | 948 | prev_class->switched_from(rq, p); |
949 | p->sched_class->switched_to(rq, p); | 949 | p->sched_class->switched_to(rq, p); |
950 | } else if (oldprio != p->prio) | 950 | } else if (oldprio != p->prio || dl_task(p)) |
951 | p->sched_class->prio_changed(rq, p, oldprio); | 951 | p->sched_class->prio_changed(rq, p, oldprio); |
952 | } | 952 | } |
953 | 953 | ||
@@ -2781,7 +2781,7 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
2781 | */ | 2781 | */ |
2782 | void rt_mutex_setprio(struct task_struct *p, int prio) | 2782 | void rt_mutex_setprio(struct task_struct *p, int prio) |
2783 | { | 2783 | { |
2784 | int oldprio, on_rq, running; | 2784 | int oldprio, on_rq, running, enqueue_flag = 0; |
2785 | struct rq *rq; | 2785 | struct rq *rq; |
2786 | const struct sched_class *prev_class; | 2786 | const struct sched_class *prev_class; |
2787 | 2787 | ||
@@ -2808,6 +2808,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
2808 | } | 2808 | } |
2809 | 2809 | ||
2810 | trace_sched_pi_setprio(p, prio); | 2810 | trace_sched_pi_setprio(p, prio); |
2811 | p->pi_top_task = rt_mutex_get_top_task(p); | ||
2811 | oldprio = p->prio; | 2812 | oldprio = p->prio; |
2812 | prev_class = p->sched_class; | 2813 | prev_class = p->sched_class; |
2813 | on_rq = p->on_rq; | 2814 | on_rq = p->on_rq; |
@@ -2817,19 +2818,42 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
2817 | if (running) | 2818 | if (running) |
2818 | p->sched_class->put_prev_task(rq, p); | 2819 | p->sched_class->put_prev_task(rq, p); |
2819 | 2820 | ||
2820 | if (dl_prio(prio)) | 2821 | /* |
2822 | * Boosting condition are: | ||
2823 | * 1. -rt task is running and holds mutex A | ||
2824 | * --> -dl task blocks on mutex A | ||
2825 | * | ||
2826 | * 2. -dl task is running and holds mutex A | ||
2827 | * --> -dl task blocks on mutex A and could preempt the | ||
2828 | * running task | ||
2829 | */ | ||
2830 | if (dl_prio(prio)) { | ||
2831 | if (!dl_prio(p->normal_prio) || (p->pi_top_task && | ||
2832 | dl_entity_preempt(&p->pi_top_task->dl, &p->dl))) { | ||
2833 | p->dl.dl_boosted = 1; | ||
2834 | p->dl.dl_throttled = 0; | ||
2835 | enqueue_flag = ENQUEUE_REPLENISH; | ||
2836 | } else | ||
2837 | p->dl.dl_boosted = 0; | ||
2821 | p->sched_class = &dl_sched_class; | 2838 | p->sched_class = &dl_sched_class; |
2822 | else if (rt_prio(prio)) | 2839 | } else if (rt_prio(prio)) { |
2840 | if (dl_prio(oldprio)) | ||
2841 | p->dl.dl_boosted = 0; | ||
2842 | if (oldprio < prio) | ||
2843 | enqueue_flag = ENQUEUE_HEAD; | ||
2823 | p->sched_class = &rt_sched_class; | 2844 | p->sched_class = &rt_sched_class; |
2824 | else | 2845 | } else { |
2846 | if (dl_prio(oldprio)) | ||
2847 | p->dl.dl_boosted = 0; | ||
2825 | p->sched_class = &fair_sched_class; | 2848 | p->sched_class = &fair_sched_class; |
2849 | } | ||
2826 | 2850 | ||
2827 | p->prio = prio; | 2851 | p->prio = prio; |
2828 | 2852 | ||
2829 | if (running) | 2853 | if (running) |
2830 | p->sched_class->set_curr_task(rq); | 2854 | p->sched_class->set_curr_task(rq); |
2831 | if (on_rq) | 2855 | if (on_rq) |
2832 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 2856 | enqueue_task(rq, p, enqueue_flag); |
2833 | 2857 | ||
2834 | check_class_changed(rq, p, prev_class, oldprio); | 2858 | check_class_changed(rq, p, prev_class, oldprio); |
2835 | out_unlock: | 2859 | out_unlock: |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 3958bc576d67..7f6de4316990 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -16,20 +16,6 @@ | |||
16 | */ | 16 | */ |
17 | #include "sched.h" | 17 | #include "sched.h" |
18 | 18 | ||
19 | static inline int dl_time_before(u64 a, u64 b) | ||
20 | { | ||
21 | return (s64)(a - b) < 0; | ||
22 | } | ||
23 | |||
24 | /* | ||
25 | * Tells if entity @a should preempt entity @b. | ||
26 | */ | ||
27 | static inline | ||
28 | int dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) | ||
29 | { | ||
30 | return dl_time_before(a->deadline, b->deadline); | ||
31 | } | ||
32 | |||
33 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) | 19 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) |
34 | { | 20 | { |
35 | return container_of(dl_se, struct task_struct, dl); | 21 | return container_of(dl_se, struct task_struct, dl); |
@@ -242,7 +228,8 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | |||
242 | * one, and to (try to!) reconcile itself with its own scheduling | 228 | * one, and to (try to!) reconcile itself with its own scheduling |
243 | * parameters. | 229 | * parameters. |
244 | */ | 230 | */ |
245 | static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) | 231 | static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se, |
232 | struct sched_dl_entity *pi_se) | ||
246 | { | 233 | { |
247 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 234 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
248 | struct rq *rq = rq_of_dl_rq(dl_rq); | 235 | struct rq *rq = rq_of_dl_rq(dl_rq); |
@@ -254,8 +241,8 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) | |||
254 | * future; in fact, we must consider execution overheads (time | 241 | * future; in fact, we must consider execution overheads (time |
255 | * spent on hardirq context, etc.). | 242 | * spent on hardirq context, etc.). |
256 | */ | 243 | */ |
257 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | 244 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
258 | dl_se->runtime = dl_se->dl_runtime; | 245 | dl_se->runtime = pi_se->dl_runtime; |
259 | dl_se->dl_new = 0; | 246 | dl_se->dl_new = 0; |
260 | } | 247 | } |
261 | 248 | ||
@@ -277,11 +264,23 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) | |||
277 | * could happen are, typically, a entity voluntarily trying to overcome its | 264 | * could happen are, typically, a entity voluntarily trying to overcome its |
278 | * runtime, or it just underestimated it during sched_setscheduler_ex(). | 265 | * runtime, or it just underestimated it during sched_setscheduler_ex(). |
279 | */ | 266 | */ |
280 | static void replenish_dl_entity(struct sched_dl_entity *dl_se) | 267 | static void replenish_dl_entity(struct sched_dl_entity *dl_se, |
268 | struct sched_dl_entity *pi_se) | ||
281 | { | 269 | { |
282 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 270 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
283 | struct rq *rq = rq_of_dl_rq(dl_rq); | 271 | struct rq *rq = rq_of_dl_rq(dl_rq); |
284 | 272 | ||
273 | BUG_ON(pi_se->dl_runtime <= 0); | ||
274 | |||
275 | /* | ||
276 | * This could be the case for a !-dl task that is boosted. | ||
277 | * Just go with full inherited parameters. | ||
278 | */ | ||
279 | if (dl_se->dl_deadline == 0) { | ||
280 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | ||
281 | dl_se->runtime = pi_se->dl_runtime; | ||
282 | } | ||
283 | |||
285 | /* | 284 | /* |
286 | * We keep moving the deadline away until we get some | 285 | * We keep moving the deadline away until we get some |
287 | * available runtime for the entity. This ensures correct | 286 | * available runtime for the entity. This ensures correct |
@@ -289,8 +288,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) | |||
289 | * arbitrary large. | 288 | * arbitrary large. |
290 | */ | 289 | */ |
291 | while (dl_se->runtime <= 0) { | 290 | while (dl_se->runtime <= 0) { |
292 | dl_se->deadline += dl_se->dl_period; | 291 | dl_se->deadline += pi_se->dl_period; |
293 | dl_se->runtime += dl_se->dl_runtime; | 292 | dl_se->runtime += pi_se->dl_runtime; |
294 | } | 293 | } |
295 | 294 | ||
296 | /* | 295 | /* |
@@ -309,8 +308,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) | |||
309 | lag_once = true; | 308 | lag_once = true; |
310 | printk_sched("sched: DL replenish lagged to much\n"); | 309 | printk_sched("sched: DL replenish lagged to much\n"); |
311 | } | 310 | } |
312 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | 311 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
313 | dl_se->runtime = dl_se->dl_runtime; | 312 | dl_se->runtime = pi_se->dl_runtime; |
314 | } | 313 | } |
315 | } | 314 | } |
316 | 315 | ||
@@ -337,7 +336,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) | |||
337 | * task with deadline equal to period this is the same of using | 336 | * task with deadline equal to period this is the same of using |
338 | * dl_deadline instead of dl_period in the equation above. | 337 | * dl_deadline instead of dl_period in the equation above. |
339 | */ | 338 | */ |
340 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) | 339 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, |
340 | struct sched_dl_entity *pi_se, u64 t) | ||
341 | { | 341 | { |
342 | u64 left, right; | 342 | u64 left, right; |
343 | 343 | ||
@@ -359,8 +359,8 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) | |||
359 | * of anything below microseconds resolution is actually fiction | 359 | * of anything below microseconds resolution is actually fiction |
360 | * (but still we want to give the user that illusion >;). | 360 | * (but still we want to give the user that illusion >;). |
361 | */ | 361 | */ |
362 | left = (dl_se->dl_period >> 10) * (dl_se->runtime >> 10); | 362 | left = (pi_se->dl_period >> 10) * (dl_se->runtime >> 10); |
363 | right = ((dl_se->deadline - t) >> 10) * (dl_se->dl_runtime >> 10); | 363 | right = ((dl_se->deadline - t) >> 10) * (pi_se->dl_runtime >> 10); |
364 | 364 | ||
365 | return dl_time_before(right, left); | 365 | return dl_time_before(right, left); |
366 | } | 366 | } |
@@ -374,7 +374,8 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) | |||
374 | * - using the remaining runtime with the current deadline would make | 374 | * - using the remaining runtime with the current deadline would make |
375 | * the entity exceed its bandwidth. | 375 | * the entity exceed its bandwidth. |
376 | */ | 376 | */ |
377 | static void update_dl_entity(struct sched_dl_entity *dl_se) | 377 | static void update_dl_entity(struct sched_dl_entity *dl_se, |
378 | struct sched_dl_entity *pi_se) | ||
378 | { | 379 | { |
379 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 380 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
380 | struct rq *rq = rq_of_dl_rq(dl_rq); | 381 | struct rq *rq = rq_of_dl_rq(dl_rq); |
@@ -384,14 +385,14 @@ static void update_dl_entity(struct sched_dl_entity *dl_se) | |||
384 | * the actual scheduling parameters have to be "renewed". | 385 | * the actual scheduling parameters have to be "renewed". |
385 | */ | 386 | */ |
386 | if (dl_se->dl_new) { | 387 | if (dl_se->dl_new) { |
387 | setup_new_dl_entity(dl_se); | 388 | setup_new_dl_entity(dl_se, pi_se); |
388 | return; | 389 | return; |
389 | } | 390 | } |
390 | 391 | ||
391 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) || | 392 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) || |
392 | dl_entity_overflow(dl_se, rq_clock(rq))) { | 393 | dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { |
393 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | 394 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
394 | dl_se->runtime = dl_se->dl_runtime; | 395 | dl_se->runtime = pi_se->dl_runtime; |
395 | } | 396 | } |
396 | } | 397 | } |
397 | 398 | ||
@@ -405,7 +406,7 @@ static void update_dl_entity(struct sched_dl_entity *dl_se) | |||
405 | * actually started or not (i.e., the replenishment instant is in | 406 | * actually started or not (i.e., the replenishment instant is in |
406 | * the future or in the past). | 407 | * the future or in the past). |
407 | */ | 408 | */ |
408 | static int start_dl_timer(struct sched_dl_entity *dl_se) | 409 | static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted) |
409 | { | 410 | { |
410 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 411 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
411 | struct rq *rq = rq_of_dl_rq(dl_rq); | 412 | struct rq *rq = rq_of_dl_rq(dl_rq); |
@@ -414,6 +415,8 @@ static int start_dl_timer(struct sched_dl_entity *dl_se) | |||
414 | unsigned long range; | 415 | unsigned long range; |
415 | s64 delta; | 416 | s64 delta; |
416 | 417 | ||
418 | if (boosted) | ||
419 | return 0; | ||
417 | /* | 420 | /* |
418 | * We want the timer to fire at the deadline, but considering | 421 | * We want the timer to fire at the deadline, but considering |
419 | * that it is actually coming from rq->clock and not from | 422 | * that it is actually coming from rq->clock and not from |
@@ -573,7 +576,7 @@ static void update_curr_dl(struct rq *rq) | |||
573 | dl_se->runtime -= delta_exec; | 576 | dl_se->runtime -= delta_exec; |
574 | if (dl_runtime_exceeded(rq, dl_se)) { | 577 | if (dl_runtime_exceeded(rq, dl_se)) { |
575 | __dequeue_task_dl(rq, curr, 0); | 578 | __dequeue_task_dl(rq, curr, 0); |
576 | if (likely(start_dl_timer(dl_se))) | 579 | if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) |
577 | dl_se->dl_throttled = 1; | 580 | dl_se->dl_throttled = 1; |
578 | else | 581 | else |
579 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | 582 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); |
@@ -728,7 +731,8 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) | |||
728 | } | 731 | } |
729 | 732 | ||
730 | static void | 733 | static void |
731 | enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) | 734 | enqueue_dl_entity(struct sched_dl_entity *dl_se, |
735 | struct sched_dl_entity *pi_se, int flags) | ||
732 | { | 736 | { |
733 | BUG_ON(on_dl_rq(dl_se)); | 737 | BUG_ON(on_dl_rq(dl_se)); |
734 | 738 | ||
@@ -738,9 +742,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) | |||
738 | * we want a replenishment of its runtime. | 742 | * we want a replenishment of its runtime. |
739 | */ | 743 | */ |
740 | if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) | 744 | if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) |
741 | replenish_dl_entity(dl_se); | 745 | replenish_dl_entity(dl_se, pi_se); |
742 | else | 746 | else |
743 | update_dl_entity(dl_se); | 747 | update_dl_entity(dl_se, pi_se); |
744 | 748 | ||
745 | __enqueue_dl_entity(dl_se); | 749 | __enqueue_dl_entity(dl_se); |
746 | } | 750 | } |
@@ -752,6 +756,18 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se) | |||
752 | 756 | ||
753 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | 757 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) |
754 | { | 758 | { |
759 | struct task_struct *pi_task = rt_mutex_get_top_task(p); | ||
760 | struct sched_dl_entity *pi_se = &p->dl; | ||
761 | |||
762 | /* | ||
763 | * Use the scheduling parameters of the top pi-waiter | ||
764 | * task if we have one and its (relative) deadline is | ||
765 | * smaller than our one... OTW we keep our runtime and | ||
766 | * deadline. | ||
767 | */ | ||
768 | if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) | ||
769 | pi_se = &pi_task->dl; | ||
770 | |||
755 | /* | 771 | /* |
756 | * If p is throttled, we do nothing. In fact, if it exhausted | 772 | * If p is throttled, we do nothing. In fact, if it exhausted |
757 | * its budget it needs a replenishment and, since it now is on | 773 | * its budget it needs a replenishment and, since it now is on |
@@ -761,7 +777,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
761 | if (p->dl.dl_throttled) | 777 | if (p->dl.dl_throttled) |
762 | return; | 778 | return; |
763 | 779 | ||
764 | enqueue_dl_entity(&p->dl, flags); | 780 | enqueue_dl_entity(&p->dl, pi_se, flags); |
765 | 781 | ||
766 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | 782 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) |
767 | enqueue_pushable_dl_task(rq, p); | 783 | enqueue_pushable_dl_task(rq, p); |
@@ -985,8 +1001,7 @@ static void task_dead_dl(struct task_struct *p) | |||
985 | { | 1001 | { |
986 | struct hrtimer *timer = &p->dl.dl_timer; | 1002 | struct hrtimer *timer = &p->dl.dl_timer; |
987 | 1003 | ||
988 | if (hrtimer_active(timer)) | 1004 | hrtimer_cancel(timer); |
989 | hrtimer_try_to_cancel(timer); | ||
990 | } | 1005 | } |
991 | 1006 | ||
992 | static void set_curr_task_dl(struct rq *rq) | 1007 | static void set_curr_task_dl(struct rq *rq) |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 93ea62754f11..52453a2d0a79 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -107,6 +107,20 @@ static inline int task_has_dl_policy(struct task_struct *p) | |||
107 | return dl_policy(p->policy); | 107 | return dl_policy(p->policy); |
108 | } | 108 | } |
109 | 109 | ||
110 | static inline int dl_time_before(u64 a, u64 b) | ||
111 | { | ||
112 | return (s64)(a - b) < 0; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Tells if entity @a should preempt entity @b. | ||
117 | */ | ||
118 | static inline | ||
119 | int dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) | ||
120 | { | ||
121 | return dl_time_before(a->deadline, b->deadline); | ||
122 | } | ||
123 | |||
110 | /* | 124 | /* |
111 | * This is the priority-queue data structure of the RT scheduling class: | 125 | * This is the priority-queue data structure of the RT scheduling class: |
112 | */ | 126 | */ |