diff options
author | Dario Faggioli <raistlin@linux.it> | 2013-11-07 08:43:44 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-01-13 07:42:56 -0500 |
commit | 2d3d891d3344159d5b452a645e355bbe29591e8b (patch) | |
tree | ab7c4ef00b48d68efa2d57cabf8c3c86160f2406 | |
parent | fb00aca474405f4fa8a8519c3179fed722eabd83 (diff) |
sched/deadline: Add SCHED_DEADLINE inheritance logic
Some method to deal with rt-mutexes and make sched_dl interact with
the current PI-coded is needed, raising all but trivial issues, that
needs (according to us) to be solved with some restructuring of
the pi-code (i.e., going toward a proxy execution-ish implementation).
This is under development, in the meanwhile, as a temporary solution,
what this commits does is:
- ensure a pi-lock owner with waiters is never throttled down. Instead,
when it runs out of runtime, it immediately gets replenished and it's
deadline is postponed;
- the scheduling parameters (relative deadline and default runtime)
used for that replenishments --during the whole period it holds the
pi-lock-- are the ones of the waiting task with earliest deadline.
Acting this way, we provide some kind of boosting to the lock-owner,
still by using the existing (actually, slightly modified by the previous
commit) pi-architecture.
We would stress the fact that this is only a surely needed, all but
clean solution to the problem. In the end it's only a way to re-start
discussion within the community. So, as always, comments, ideas, rants,
etc.. are welcome! :-)
Signed-off-by: Dario Faggioli <raistlin@linux.it>
Signed-off-by: Juri Lelli <juri.lelli@gmail.com>
[ Added !RT_MUTEXES build fix. ]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1383831828-15501-11-git-send-email-juri.lelli@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/linux/sched.h | 8 | ||||
-rw-r--r-- | include/linux/sched/rt.h | 5 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 31 | ||||
-rw-r--r-- | kernel/locking/rtmutex_common.h | 1 | ||||
-rw-r--r-- | kernel/sched/core.c | 36 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 91 | ||||
-rw-r--r-- | kernel/sched/sched.h | 14 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 1 |
9 files changed, 134 insertions, 54 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 9ea15019a5b6..13c53a99920f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1124,8 +1124,12 @@ struct sched_dl_entity { | |||
1124 | * @dl_new tells if a new instance arrived. If so we must | 1124 | * @dl_new tells if a new instance arrived. If so we must |
1125 | * start executing it with full runtime and reset its absolute | 1125 | * start executing it with full runtime and reset its absolute |
1126 | * deadline; | 1126 | * deadline; |
1127 | * | ||
1128 | * @dl_boosted tells if we are boosted due to DI. If so we are | ||
1129 | * outside bandwidth enforcement mechanism (but only until we | ||
1130 | * exit the critical section). | ||
1127 | */ | 1131 | */ |
1128 | int dl_throttled, dl_new; | 1132 | int dl_throttled, dl_new, dl_boosted; |
1129 | 1133 | ||
1130 | /* | 1134 | /* |
1131 | * Bandwidth enforcement timer. Each -deadline task has its | 1135 | * Bandwidth enforcement timer. Each -deadline task has its |
@@ -1359,6 +1363,8 @@ struct task_struct { | |||
1359 | struct rb_node *pi_waiters_leftmost; | 1363 | struct rb_node *pi_waiters_leftmost; |
1360 | /* Deadlock detection and priority inheritance handling */ | 1364 | /* Deadlock detection and priority inheritance handling */ |
1361 | struct rt_mutex_waiter *pi_blocked_on; | 1365 | struct rt_mutex_waiter *pi_blocked_on; |
1366 | /* Top pi_waiters task */ | ||
1367 | struct task_struct *pi_top_task; | ||
1362 | #endif | 1368 | #endif |
1363 | 1369 | ||
1364 | #ifdef CONFIG_DEBUG_MUTEXES | 1370 | #ifdef CONFIG_DEBUG_MUTEXES |
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 440434df3627..34e4ebea8fce 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h | |||
@@ -35,6 +35,7 @@ static inline int rt_task(struct task_struct *p) | |||
35 | #ifdef CONFIG_RT_MUTEXES | 35 | #ifdef CONFIG_RT_MUTEXES |
36 | extern int rt_mutex_getprio(struct task_struct *p); | 36 | extern int rt_mutex_getprio(struct task_struct *p); |
37 | extern void rt_mutex_setprio(struct task_struct *p, int prio); | 37 | extern void rt_mutex_setprio(struct task_struct *p, int prio); |
38 | extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); | ||
38 | extern void rt_mutex_adjust_pi(struct task_struct *p); | 39 | extern void rt_mutex_adjust_pi(struct task_struct *p); |
39 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) | 40 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) |
40 | { | 41 | { |
@@ -45,6 +46,10 @@ static inline int rt_mutex_getprio(struct task_struct *p) | |||
45 | { | 46 | { |
46 | return p->normal_prio; | 47 | return p->normal_prio; |
47 | } | 48 | } |
49 | static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) | ||
50 | { | ||
51 | return NULL; | ||
52 | } | ||
48 | # define rt_mutex_adjust_pi(p) do { } while (0) | 53 | # define rt_mutex_adjust_pi(p) do { } while (0) |
49 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) | 54 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) |
50 | { | 55 | { |
diff --git a/kernel/fork.c b/kernel/fork.c index 7049ae526a54..01b450a61abd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1090,6 +1090,7 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
1090 | p->pi_waiters = RB_ROOT; | 1090 | p->pi_waiters = RB_ROOT; |
1091 | p->pi_waiters_leftmost = NULL; | 1091 | p->pi_waiters_leftmost = NULL; |
1092 | p->pi_blocked_on = NULL; | 1092 | p->pi_blocked_on = NULL; |
1093 | p->pi_top_task = NULL; | ||
1093 | #endif | 1094 | #endif |
1094 | } | 1095 | } |
1095 | 1096 | ||
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 3bf0aa68dd3f..2e960a2bab81 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
@@ -96,13 +96,16 @@ static inline int | |||
96 | rt_mutex_waiter_less(struct rt_mutex_waiter *left, | 96 | rt_mutex_waiter_less(struct rt_mutex_waiter *left, |
97 | struct rt_mutex_waiter *right) | 97 | struct rt_mutex_waiter *right) |
98 | { | 98 | { |
99 | if (left->task->prio < right->task->prio) | 99 | if (left->prio < right->prio) |
100 | return 1; | 100 | return 1; |
101 | 101 | ||
102 | /* | 102 | /* |
103 | * If both tasks are dl_task(), we check their deadlines. | 103 | * If both waiters have dl_prio(), we check the deadlines of the |
104 | * associated tasks. | ||
105 | * If left waiter has a dl_prio(), and we didn't return 1 above, | ||
106 | * then right waiter has a dl_prio() too. | ||
104 | */ | 107 | */ |
105 | if (dl_prio(left->task->prio) && dl_prio(right->task->prio)) | 108 | if (dl_prio(left->prio)) |
106 | return (left->task->dl.deadline < right->task->dl.deadline); | 109 | return (left->task->dl.deadline < right->task->dl.deadline); |
107 | 110 | ||
108 | return 0; | 111 | return 0; |
@@ -197,10 +200,18 @@ int rt_mutex_getprio(struct task_struct *task) | |||
197 | if (likely(!task_has_pi_waiters(task))) | 200 | if (likely(!task_has_pi_waiters(task))) |
198 | return task->normal_prio; | 201 | return task->normal_prio; |
199 | 202 | ||
200 | return min(task_top_pi_waiter(task)->task->prio, | 203 | return min(task_top_pi_waiter(task)->prio, |
201 | task->normal_prio); | 204 | task->normal_prio); |
202 | } | 205 | } |
203 | 206 | ||
207 | struct task_struct *rt_mutex_get_top_task(struct task_struct *task) | ||
208 | { | ||
209 | if (likely(!task_has_pi_waiters(task))) | ||
210 | return NULL; | ||
211 | |||
212 | return task_top_pi_waiter(task)->task; | ||
213 | } | ||
214 | |||
204 | /* | 215 | /* |
205 | * Adjust the priority of a task, after its pi_waiters got modified. | 216 | * Adjust the priority of a task, after its pi_waiters got modified. |
206 | * | 217 | * |
@@ -210,7 +221,7 @@ static void __rt_mutex_adjust_prio(struct task_struct *task) | |||
210 | { | 221 | { |
211 | int prio = rt_mutex_getprio(task); | 222 | int prio = rt_mutex_getprio(task); |
212 | 223 | ||
213 | if (task->prio != prio) | 224 | if (task->prio != prio || dl_prio(prio)) |
214 | rt_mutex_setprio(task, prio); | 225 | rt_mutex_setprio(task, prio); |
215 | } | 226 | } |
216 | 227 | ||
@@ -328,7 +339,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
328 | * When deadlock detection is off then we check, if further | 339 | * When deadlock detection is off then we check, if further |
329 | * priority adjustment is necessary. | 340 | * priority adjustment is necessary. |
330 | */ | 341 | */ |
331 | if (!detect_deadlock && waiter->task->prio == task->prio) | 342 | if (!detect_deadlock && waiter->prio == task->prio) |
332 | goto out_unlock_pi; | 343 | goto out_unlock_pi; |
333 | 344 | ||
334 | lock = waiter->lock; | 345 | lock = waiter->lock; |
@@ -350,7 +361,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
350 | 361 | ||
351 | /* Requeue the waiter */ | 362 | /* Requeue the waiter */ |
352 | rt_mutex_dequeue(lock, waiter); | 363 | rt_mutex_dequeue(lock, waiter); |
353 | waiter->task->prio = task->prio; | 364 | waiter->prio = task->prio; |
354 | rt_mutex_enqueue(lock, waiter); | 365 | rt_mutex_enqueue(lock, waiter); |
355 | 366 | ||
356 | /* Release the task */ | 367 | /* Release the task */ |
@@ -448,7 +459,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |||
448 | * 3) it is top waiter | 459 | * 3) it is top waiter |
449 | */ | 460 | */ |
450 | if (rt_mutex_has_waiters(lock)) { | 461 | if (rt_mutex_has_waiters(lock)) { |
451 | if (task->prio >= rt_mutex_top_waiter(lock)->task->prio) { | 462 | if (task->prio >= rt_mutex_top_waiter(lock)->prio) { |
452 | if (!waiter || waiter != rt_mutex_top_waiter(lock)) | 463 | if (!waiter || waiter != rt_mutex_top_waiter(lock)) |
453 | return 0; | 464 | return 0; |
454 | } | 465 | } |
@@ -508,6 +519,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
508 | __rt_mutex_adjust_prio(task); | 519 | __rt_mutex_adjust_prio(task); |
509 | waiter->task = task; | 520 | waiter->task = task; |
510 | waiter->lock = lock; | 521 | waiter->lock = lock; |
522 | waiter->prio = task->prio; | ||
511 | 523 | ||
512 | /* Get the top priority waiter on the lock */ | 524 | /* Get the top priority waiter on the lock */ |
513 | if (rt_mutex_has_waiters(lock)) | 525 | if (rt_mutex_has_waiters(lock)) |
@@ -653,7 +665,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) | |||
653 | raw_spin_lock_irqsave(&task->pi_lock, flags); | 665 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
654 | 666 | ||
655 | waiter = task->pi_blocked_on; | 667 | waiter = task->pi_blocked_on; |
656 | if (!waiter || waiter->task->prio == task->prio) { | 668 | if (!waiter || (waiter->prio == task->prio && |
669 | !dl_prio(task->prio))) { | ||
657 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | 670 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
658 | return; | 671 | return; |
659 | } | 672 | } |
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index b65442fe5ade..7431a9c86f35 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h | |||
@@ -54,6 +54,7 @@ struct rt_mutex_waiter { | |||
54 | struct pid *deadlock_task_pid; | 54 | struct pid *deadlock_task_pid; |
55 | struct rt_mutex *deadlock_lock; | 55 | struct rt_mutex *deadlock_lock; |
56 | #endif | 56 | #endif |
57 | int prio; | ||
57 | }; | 58 | }; |
58 | 59 | ||
59 | /* | 60 | /* |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index aebcc70b5c93..599ee3b11b44 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -947,7 +947,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
947 | if (prev_class->switched_from) | 947 | if (prev_class->switched_from) |
948 | prev_class->switched_from(rq, p); | 948 | prev_class->switched_from(rq, p); |
949 | p->sched_class->switched_to(rq, p); | 949 | p->sched_class->switched_to(rq, p); |
950 | } else if (oldprio != p->prio) | 950 | } else if (oldprio != p->prio || dl_task(p)) |
951 | p->sched_class->prio_changed(rq, p, oldprio); | 951 | p->sched_class->prio_changed(rq, p, oldprio); |
952 | } | 952 | } |
953 | 953 | ||
@@ -2781,7 +2781,7 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
2781 | */ | 2781 | */ |
2782 | void rt_mutex_setprio(struct task_struct *p, int prio) | 2782 | void rt_mutex_setprio(struct task_struct *p, int prio) |
2783 | { | 2783 | { |
2784 | int oldprio, on_rq, running; | 2784 | int oldprio, on_rq, running, enqueue_flag = 0; |
2785 | struct rq *rq; | 2785 | struct rq *rq; |
2786 | const struct sched_class *prev_class; | 2786 | const struct sched_class *prev_class; |
2787 | 2787 | ||
@@ -2808,6 +2808,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
2808 | } | 2808 | } |
2809 | 2809 | ||
2810 | trace_sched_pi_setprio(p, prio); | 2810 | trace_sched_pi_setprio(p, prio); |
2811 | p->pi_top_task = rt_mutex_get_top_task(p); | ||
2811 | oldprio = p->prio; | 2812 | oldprio = p->prio; |
2812 | prev_class = p->sched_class; | 2813 | prev_class = p->sched_class; |
2813 | on_rq = p->on_rq; | 2814 | on_rq = p->on_rq; |
@@ -2817,19 +2818,42 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
2817 | if (running) | 2818 | if (running) |
2818 | p->sched_class->put_prev_task(rq, p); | 2819 | p->sched_class->put_prev_task(rq, p); |
2819 | 2820 | ||
2820 | if (dl_prio(prio)) | 2821 | /* |
2822 | * Boosting condition are: | ||
2823 | * 1. -rt task is running and holds mutex A | ||
2824 | * --> -dl task blocks on mutex A | ||
2825 | * | ||
2826 | * 2. -dl task is running and holds mutex A | ||
2827 | * --> -dl task blocks on mutex A and could preempt the | ||
2828 | * running task | ||
2829 | */ | ||
2830 | if (dl_prio(prio)) { | ||
2831 | if (!dl_prio(p->normal_prio) || (p->pi_top_task && | ||
2832 | dl_entity_preempt(&p->pi_top_task->dl, &p->dl))) { | ||
2833 | p->dl.dl_boosted = 1; | ||
2834 | p->dl.dl_throttled = 0; | ||
2835 | enqueue_flag = ENQUEUE_REPLENISH; | ||
2836 | } else | ||
2837 | p->dl.dl_boosted = 0; | ||
2821 | p->sched_class = &dl_sched_class; | 2838 | p->sched_class = &dl_sched_class; |
2822 | else if (rt_prio(prio)) | 2839 | } else if (rt_prio(prio)) { |
2840 | if (dl_prio(oldprio)) | ||
2841 | p->dl.dl_boosted = 0; | ||
2842 | if (oldprio < prio) | ||
2843 | enqueue_flag = ENQUEUE_HEAD; | ||
2823 | p->sched_class = &rt_sched_class; | 2844 | p->sched_class = &rt_sched_class; |
2824 | else | 2845 | } else { |
2846 | if (dl_prio(oldprio)) | ||
2847 | p->dl.dl_boosted = 0; | ||
2825 | p->sched_class = &fair_sched_class; | 2848 | p->sched_class = &fair_sched_class; |
2849 | } | ||
2826 | 2850 | ||
2827 | p->prio = prio; | 2851 | p->prio = prio; |
2828 | 2852 | ||
2829 | if (running) | 2853 | if (running) |
2830 | p->sched_class->set_curr_task(rq); | 2854 | p->sched_class->set_curr_task(rq); |
2831 | if (on_rq) | 2855 | if (on_rq) |
2832 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 2856 | enqueue_task(rq, p, enqueue_flag); |
2833 | 2857 | ||
2834 | check_class_changed(rq, p, prev_class, oldprio); | 2858 | check_class_changed(rq, p, prev_class, oldprio); |
2835 | out_unlock: | 2859 | out_unlock: |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 3958bc576d67..7f6de4316990 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -16,20 +16,6 @@ | |||
16 | */ | 16 | */ |
17 | #include "sched.h" | 17 | #include "sched.h" |
18 | 18 | ||
19 | static inline int dl_time_before(u64 a, u64 b) | ||
20 | { | ||
21 | return (s64)(a - b) < 0; | ||
22 | } | ||
23 | |||
24 | /* | ||
25 | * Tells if entity @a should preempt entity @b. | ||
26 | */ | ||
27 | static inline | ||
28 | int dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) | ||
29 | { | ||
30 | return dl_time_before(a->deadline, b->deadline); | ||
31 | } | ||
32 | |||
33 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) | 19 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) |
34 | { | 20 | { |
35 | return container_of(dl_se, struct task_struct, dl); | 21 | return container_of(dl_se, struct task_struct, dl); |
@@ -242,7 +228,8 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | |||
242 | * one, and to (try to!) reconcile itself with its own scheduling | 228 | * one, and to (try to!) reconcile itself with its own scheduling |
243 | * parameters. | 229 | * parameters. |
244 | */ | 230 | */ |
245 | static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) | 231 | static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se, |
232 | struct sched_dl_entity *pi_se) | ||
246 | { | 233 | { |
247 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 234 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
248 | struct rq *rq = rq_of_dl_rq(dl_rq); | 235 | struct rq *rq = rq_of_dl_rq(dl_rq); |
@@ -254,8 +241,8 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) | |||
254 | * future; in fact, we must consider execution overheads (time | 241 | * future; in fact, we must consider execution overheads (time |
255 | * spent on hardirq context, etc.). | 242 | * spent on hardirq context, etc.). |
256 | */ | 243 | */ |
257 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | 244 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
258 | dl_se->runtime = dl_se->dl_runtime; | 245 | dl_se->runtime = pi_se->dl_runtime; |
259 | dl_se->dl_new = 0; | 246 | dl_se->dl_new = 0; |
260 | } | 247 | } |
261 | 248 | ||
@@ -277,11 +264,23 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) | |||
277 | * could happen are, typically, a entity voluntarily trying to overcome its | 264 | * could happen are, typically, a entity voluntarily trying to overcome its |
278 | * runtime, or it just underestimated it during sched_setscheduler_ex(). | 265 | * runtime, or it just underestimated it during sched_setscheduler_ex(). |
279 | */ | 266 | */ |
280 | static void replenish_dl_entity(struct sched_dl_entity *dl_se) | 267 | static void replenish_dl_entity(struct sched_dl_entity *dl_se, |
268 | struct sched_dl_entity *pi_se) | ||
281 | { | 269 | { |
282 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 270 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
283 | struct rq *rq = rq_of_dl_rq(dl_rq); | 271 | struct rq *rq = rq_of_dl_rq(dl_rq); |
284 | 272 | ||
273 | BUG_ON(pi_se->dl_runtime <= 0); | ||
274 | |||
275 | /* | ||
276 | * This could be the case for a !-dl task that is boosted. | ||
277 | * Just go with full inherited parameters. | ||
278 | */ | ||
279 | if (dl_se->dl_deadline == 0) { | ||
280 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | ||
281 | dl_se->runtime = pi_se->dl_runtime; | ||
282 | } | ||
283 | |||
285 | /* | 284 | /* |
286 | * We keep moving the deadline away until we get some | 285 | * We keep moving the deadline away until we get some |
287 | * available runtime for the entity. This ensures correct | 286 | * available runtime for the entity. This ensures correct |
@@ -289,8 +288,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) | |||
289 | * arbitrary large. | 288 | * arbitrary large. |
290 | */ | 289 | */ |
291 | while (dl_se->runtime <= 0) { | 290 | while (dl_se->runtime <= 0) { |
292 | dl_se->deadline += dl_se->dl_period; | 291 | dl_se->deadline += pi_se->dl_period; |
293 | dl_se->runtime += dl_se->dl_runtime; | 292 | dl_se->runtime += pi_se->dl_runtime; |
294 | } | 293 | } |
295 | 294 | ||
296 | /* | 295 | /* |
@@ -309,8 +308,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) | |||
309 | lag_once = true; | 308 | lag_once = true; |
310 | printk_sched("sched: DL replenish lagged to much\n"); | 309 | printk_sched("sched: DL replenish lagged to much\n"); |
311 | } | 310 | } |
312 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | 311 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
313 | dl_se->runtime = dl_se->dl_runtime; | 312 | dl_se->runtime = pi_se->dl_runtime; |
314 | } | 313 | } |
315 | } | 314 | } |
316 | 315 | ||
@@ -337,7 +336,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) | |||
337 | * task with deadline equal to period this is the same of using | 336 | * task with deadline equal to period this is the same of using |
338 | * dl_deadline instead of dl_period in the equation above. | 337 | * dl_deadline instead of dl_period in the equation above. |
339 | */ | 338 | */ |
340 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) | 339 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, |
340 | struct sched_dl_entity *pi_se, u64 t) | ||
341 | { | 341 | { |
342 | u64 left, right; | 342 | u64 left, right; |
343 | 343 | ||
@@ -359,8 +359,8 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) | |||
359 | * of anything below microseconds resolution is actually fiction | 359 | * of anything below microseconds resolution is actually fiction |
360 | * (but still we want to give the user that illusion >;). | 360 | * (but still we want to give the user that illusion >;). |
361 | */ | 361 | */ |
362 | left = (dl_se->dl_period >> 10) * (dl_se->runtime >> 10); | 362 | left = (pi_se->dl_period >> 10) * (dl_se->runtime >> 10); |
363 | right = ((dl_se->deadline - t) >> 10) * (dl_se->dl_runtime >> 10); | 363 | right = ((dl_se->deadline - t) >> 10) * (pi_se->dl_runtime >> 10); |
364 | 364 | ||
365 | return dl_time_before(right, left); | 365 | return dl_time_before(right, left); |
366 | } | 366 | } |
@@ -374,7 +374,8 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) | |||
374 | * - using the remaining runtime with the current deadline would make | 374 | * - using the remaining runtime with the current deadline would make |
375 | * the entity exceed its bandwidth. | 375 | * the entity exceed its bandwidth. |
376 | */ | 376 | */ |
377 | static void update_dl_entity(struct sched_dl_entity *dl_se) | 377 | static void update_dl_entity(struct sched_dl_entity *dl_se, |
378 | struct sched_dl_entity *pi_se) | ||
378 | { | 379 | { |
379 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 380 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
380 | struct rq *rq = rq_of_dl_rq(dl_rq); | 381 | struct rq *rq = rq_of_dl_rq(dl_rq); |
@@ -384,14 +385,14 @@ static void update_dl_entity(struct sched_dl_entity *dl_se) | |||
384 | * the actual scheduling parameters have to be "renewed". | 385 | * the actual scheduling parameters have to be "renewed". |
385 | */ | 386 | */ |
386 | if (dl_se->dl_new) { | 387 | if (dl_se->dl_new) { |
387 | setup_new_dl_entity(dl_se); | 388 | setup_new_dl_entity(dl_se, pi_se); |
388 | return; | 389 | return; |
389 | } | 390 | } |
390 | 391 | ||
391 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) || | 392 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) || |
392 | dl_entity_overflow(dl_se, rq_clock(rq))) { | 393 | dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { |
393 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | 394 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
394 | dl_se->runtime = dl_se->dl_runtime; | 395 | dl_se->runtime = pi_se->dl_runtime; |
395 | } | 396 | } |
396 | } | 397 | } |
397 | 398 | ||
@@ -405,7 +406,7 @@ static void update_dl_entity(struct sched_dl_entity *dl_se) | |||
405 | * actually started or not (i.e., the replenishment instant is in | 406 | * actually started or not (i.e., the replenishment instant is in |
406 | * the future or in the past). | 407 | * the future or in the past). |
407 | */ | 408 | */ |
408 | static int start_dl_timer(struct sched_dl_entity *dl_se) | 409 | static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted) |
409 | { | 410 | { |
410 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 411 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
411 | struct rq *rq = rq_of_dl_rq(dl_rq); | 412 | struct rq *rq = rq_of_dl_rq(dl_rq); |
@@ -414,6 +415,8 @@ static int start_dl_timer(struct sched_dl_entity *dl_se) | |||
414 | unsigned long range; | 415 | unsigned long range; |
415 | s64 delta; | 416 | s64 delta; |
416 | 417 | ||
418 | if (boosted) | ||
419 | return 0; | ||
417 | /* | 420 | /* |
418 | * We want the timer to fire at the deadline, but considering | 421 | * We want the timer to fire at the deadline, but considering |
419 | * that it is actually coming from rq->clock and not from | 422 | * that it is actually coming from rq->clock and not from |
@@ -573,7 +576,7 @@ static void update_curr_dl(struct rq *rq) | |||
573 | dl_se->runtime -= delta_exec; | 576 | dl_se->runtime -= delta_exec; |
574 | if (dl_runtime_exceeded(rq, dl_se)) { | 577 | if (dl_runtime_exceeded(rq, dl_se)) { |
575 | __dequeue_task_dl(rq, curr, 0); | 578 | __dequeue_task_dl(rq, curr, 0); |
576 | if (likely(start_dl_timer(dl_se))) | 579 | if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) |
577 | dl_se->dl_throttled = 1; | 580 | dl_se->dl_throttled = 1; |
578 | else | 581 | else |
579 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | 582 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); |
@@ -728,7 +731,8 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) | |||
728 | } | 731 | } |
729 | 732 | ||
730 | static void | 733 | static void |
731 | enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) | 734 | enqueue_dl_entity(struct sched_dl_entity *dl_se, |
735 | struct sched_dl_entity *pi_se, int flags) | ||
732 | { | 736 | { |
733 | BUG_ON(on_dl_rq(dl_se)); | 737 | BUG_ON(on_dl_rq(dl_se)); |
734 | 738 | ||
@@ -738,9 +742,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) | |||
738 | * we want a replenishment of its runtime. | 742 | * we want a replenishment of its runtime. |
739 | */ | 743 | */ |
740 | if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) | 744 | if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) |
741 | replenish_dl_entity(dl_se); | 745 | replenish_dl_entity(dl_se, pi_se); |
742 | else | 746 | else |
743 | update_dl_entity(dl_se); | 747 | update_dl_entity(dl_se, pi_se); |
744 | 748 | ||
745 | __enqueue_dl_entity(dl_se); | 749 | __enqueue_dl_entity(dl_se); |
746 | } | 750 | } |
@@ -752,6 +756,18 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se) | |||
752 | 756 | ||
753 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | 757 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) |
754 | { | 758 | { |
759 | struct task_struct *pi_task = rt_mutex_get_top_task(p); | ||
760 | struct sched_dl_entity *pi_se = &p->dl; | ||
761 | |||
762 | /* | ||
763 | * Use the scheduling parameters of the top pi-waiter | ||
764 | * task if we have one and its (relative) deadline is | ||
765 | * smaller than our one... OTW we keep our runtime and | ||
766 | * deadline. | ||
767 | */ | ||
768 | if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) | ||
769 | pi_se = &pi_task->dl; | ||
770 | |||
755 | /* | 771 | /* |
756 | * If p is throttled, we do nothing. In fact, if it exhausted | 772 | * If p is throttled, we do nothing. In fact, if it exhausted |
757 | * its budget it needs a replenishment and, since it now is on | 773 | * its budget it needs a replenishment and, since it now is on |
@@ -761,7 +777,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
761 | if (p->dl.dl_throttled) | 777 | if (p->dl.dl_throttled) |
762 | return; | 778 | return; |
763 | 779 | ||
764 | enqueue_dl_entity(&p->dl, flags); | 780 | enqueue_dl_entity(&p->dl, pi_se, flags); |
765 | 781 | ||
766 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | 782 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) |
767 | enqueue_pushable_dl_task(rq, p); | 783 | enqueue_pushable_dl_task(rq, p); |
@@ -985,8 +1001,7 @@ static void task_dead_dl(struct task_struct *p) | |||
985 | { | 1001 | { |
986 | struct hrtimer *timer = &p->dl.dl_timer; | 1002 | struct hrtimer *timer = &p->dl.dl_timer; |
987 | 1003 | ||
988 | if (hrtimer_active(timer)) | 1004 | hrtimer_cancel(timer); |
989 | hrtimer_try_to_cancel(timer); | ||
990 | } | 1005 | } |
991 | 1006 | ||
992 | static void set_curr_task_dl(struct rq *rq) | 1007 | static void set_curr_task_dl(struct rq *rq) |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 93ea62754f11..52453a2d0a79 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -107,6 +107,20 @@ static inline int task_has_dl_policy(struct task_struct *p) | |||
107 | return dl_policy(p->policy); | 107 | return dl_policy(p->policy); |
108 | } | 108 | } |
109 | 109 | ||
110 | static inline int dl_time_before(u64 a, u64 b) | ||
111 | { | ||
112 | return (s64)(a - b) < 0; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Tells if entity @a should preempt entity @b. | ||
117 | */ | ||
118 | static inline | ||
119 | int dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) | ||
120 | { | ||
121 | return dl_time_before(a->deadline, b->deadline); | ||
122 | } | ||
123 | |||
110 | /* | 124 | /* |
111 | * This is the priority-queue data structure of the RT scheduling class: | 125 | * This is the priority-queue data structure of the RT scheduling class: |
112 | */ | 126 | */ |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 090c4d9dcf16..6e32635e5e57 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
17 | #include <linux/ftrace.h> | 17 | #include <linux/ftrace.h> |
18 | #include <linux/sched/rt.h> | 18 | #include <linux/sched/rt.h> |
19 | #include <linux/sched/deadline.h> | ||
19 | #include <trace/events/sched.h> | 20 | #include <trace/events/sched.h> |
20 | #include "trace.h" | 21 | #include "trace.h" |
21 | 22 | ||