aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDario Faggioli <raistlin@linux.it>2013-11-07 08:43:44 -0500
committerIngo Molnar <mingo@kernel.org>2014-01-13 07:42:56 -0500
commit2d3d891d3344159d5b452a645e355bbe29591e8b (patch)
treeab7c4ef00b48d68efa2d57cabf8c3c86160f2406
parentfb00aca474405f4fa8a8519c3179fed722eabd83 (diff)
sched/deadline: Add SCHED_DEADLINE inheritance logic
Some method to deal with rt-mutexes and make sched_dl interact with the current PI-coded is needed, raising all but trivial issues, that needs (according to us) to be solved with some restructuring of the pi-code (i.e., going toward a proxy execution-ish implementation). This is under development, in the meanwhile, as a temporary solution, what this commits does is: - ensure a pi-lock owner with waiters is never throttled down. Instead, when it runs out of runtime, it immediately gets replenished and it's deadline is postponed; - the scheduling parameters (relative deadline and default runtime) used for that replenishments --during the whole period it holds the pi-lock-- are the ones of the waiting task with earliest deadline. Acting this way, we provide some kind of boosting to the lock-owner, still by using the existing (actually, slightly modified by the previous commit) pi-architecture. We would stress the fact that this is only a surely needed, all but clean solution to the problem. In the end it's only a way to re-start discussion within the community. So, as always, comments, ideas, rants, etc.. are welcome! :-) Signed-off-by: Dario Faggioli <raistlin@linux.it> Signed-off-by: Juri Lelli <juri.lelli@gmail.com> [ Added !RT_MUTEXES build fix. ] Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1383831828-15501-11-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/sched/rt.h5
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/locking/rtmutex.c31
-rw-r--r--kernel/locking/rtmutex_common.h1
-rw-r--r--kernel/sched/core.c36
-rw-r--r--kernel/sched/deadline.c91
-rw-r--r--kernel/sched/sched.h14
-rw-r--r--kernel/trace/trace_sched_wakeup.c1
9 files changed, 134 insertions, 54 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9ea15019a5b6..13c53a99920f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1124,8 +1124,12 @@ struct sched_dl_entity {
1124 * @dl_new tells if a new instance arrived. If so we must 1124 * @dl_new tells if a new instance arrived. If so we must
1125 * start executing it with full runtime and reset its absolute 1125 * start executing it with full runtime and reset its absolute
1126 * deadline; 1126 * deadline;
1127 *
1128 * @dl_boosted tells if we are boosted due to DI. If so we are
1129 * outside bandwidth enforcement mechanism (but only until we
1130 * exit the critical section).
1127 */ 1131 */
1128 int dl_throttled, dl_new; 1132 int dl_throttled, dl_new, dl_boosted;
1129 1133
1130 /* 1134 /*
1131 * Bandwidth enforcement timer. Each -deadline task has its 1135 * Bandwidth enforcement timer. Each -deadline task has its
@@ -1359,6 +1363,8 @@ struct task_struct {
1359 struct rb_node *pi_waiters_leftmost; 1363 struct rb_node *pi_waiters_leftmost;
1360 /* Deadlock detection and priority inheritance handling */ 1364 /* Deadlock detection and priority inheritance handling */
1361 struct rt_mutex_waiter *pi_blocked_on; 1365 struct rt_mutex_waiter *pi_blocked_on;
1366 /* Top pi_waiters task */
1367 struct task_struct *pi_top_task;
1362#endif 1368#endif
1363 1369
1364#ifdef CONFIG_DEBUG_MUTEXES 1370#ifdef CONFIG_DEBUG_MUTEXES
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index 440434df3627..34e4ebea8fce 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -35,6 +35,7 @@ static inline int rt_task(struct task_struct *p)
35#ifdef CONFIG_RT_MUTEXES 35#ifdef CONFIG_RT_MUTEXES
36extern int rt_mutex_getprio(struct task_struct *p); 36extern int rt_mutex_getprio(struct task_struct *p);
37extern void rt_mutex_setprio(struct task_struct *p, int prio); 37extern void rt_mutex_setprio(struct task_struct *p, int prio);
38extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
38extern void rt_mutex_adjust_pi(struct task_struct *p); 39extern void rt_mutex_adjust_pi(struct task_struct *p);
39static inline bool tsk_is_pi_blocked(struct task_struct *tsk) 40static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
40{ 41{
@@ -45,6 +46,10 @@ static inline int rt_mutex_getprio(struct task_struct *p)
45{ 46{
46 return p->normal_prio; 47 return p->normal_prio;
47} 48}
49static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
50{
51 return NULL;
52}
48# define rt_mutex_adjust_pi(p) do { } while (0) 53# define rt_mutex_adjust_pi(p) do { } while (0)
49static inline bool tsk_is_pi_blocked(struct task_struct *tsk) 54static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
50{ 55{
diff --git a/kernel/fork.c b/kernel/fork.c
index 7049ae526a54..01b450a61abd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1090,6 +1090,7 @@ static void rt_mutex_init_task(struct task_struct *p)
1090 p->pi_waiters = RB_ROOT; 1090 p->pi_waiters = RB_ROOT;
1091 p->pi_waiters_leftmost = NULL; 1091 p->pi_waiters_leftmost = NULL;
1092 p->pi_blocked_on = NULL; 1092 p->pi_blocked_on = NULL;
1093 p->pi_top_task = NULL;
1093#endif 1094#endif
1094} 1095}
1095 1096
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3bf0aa68dd3f..2e960a2bab81 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -96,13 +96,16 @@ static inline int
96rt_mutex_waiter_less(struct rt_mutex_waiter *left, 96rt_mutex_waiter_less(struct rt_mutex_waiter *left,
97 struct rt_mutex_waiter *right) 97 struct rt_mutex_waiter *right)
98{ 98{
99 if (left->task->prio < right->task->prio) 99 if (left->prio < right->prio)
100 return 1; 100 return 1;
101 101
102 /* 102 /*
103 * If both tasks are dl_task(), we check their deadlines. 103 * If both waiters have dl_prio(), we check the deadlines of the
104 * associated tasks.
105 * If left waiter has a dl_prio(), and we didn't return 1 above,
106 * then right waiter has a dl_prio() too.
104 */ 107 */
105 if (dl_prio(left->task->prio) && dl_prio(right->task->prio)) 108 if (dl_prio(left->prio))
106 return (left->task->dl.deadline < right->task->dl.deadline); 109 return (left->task->dl.deadline < right->task->dl.deadline);
107 110
108 return 0; 111 return 0;
@@ -197,10 +200,18 @@ int rt_mutex_getprio(struct task_struct *task)
197 if (likely(!task_has_pi_waiters(task))) 200 if (likely(!task_has_pi_waiters(task)))
198 return task->normal_prio; 201 return task->normal_prio;
199 202
200 return min(task_top_pi_waiter(task)->task->prio, 203 return min(task_top_pi_waiter(task)->prio,
201 task->normal_prio); 204 task->normal_prio);
202} 205}
203 206
207struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
208{
209 if (likely(!task_has_pi_waiters(task)))
210 return NULL;
211
212 return task_top_pi_waiter(task)->task;
213}
214
204/* 215/*
205 * Adjust the priority of a task, after its pi_waiters got modified. 216 * Adjust the priority of a task, after its pi_waiters got modified.
206 * 217 *
@@ -210,7 +221,7 @@ static void __rt_mutex_adjust_prio(struct task_struct *task)
210{ 221{
211 int prio = rt_mutex_getprio(task); 222 int prio = rt_mutex_getprio(task);
212 223
213 if (task->prio != prio) 224 if (task->prio != prio || dl_prio(prio))
214 rt_mutex_setprio(task, prio); 225 rt_mutex_setprio(task, prio);
215} 226}
216 227
@@ -328,7 +339,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
328 * When deadlock detection is off then we check, if further 339 * When deadlock detection is off then we check, if further
329 * priority adjustment is necessary. 340 * priority adjustment is necessary.
330 */ 341 */
331 if (!detect_deadlock && waiter->task->prio == task->prio) 342 if (!detect_deadlock && waiter->prio == task->prio)
332 goto out_unlock_pi; 343 goto out_unlock_pi;
333 344
334 lock = waiter->lock; 345 lock = waiter->lock;
@@ -350,7 +361,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
350 361
351 /* Requeue the waiter */ 362 /* Requeue the waiter */
352 rt_mutex_dequeue(lock, waiter); 363 rt_mutex_dequeue(lock, waiter);
353 waiter->task->prio = task->prio; 364 waiter->prio = task->prio;
354 rt_mutex_enqueue(lock, waiter); 365 rt_mutex_enqueue(lock, waiter);
355 366
356 /* Release the task */ 367 /* Release the task */
@@ -448,7 +459,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
448 * 3) it is top waiter 459 * 3) it is top waiter
449 */ 460 */
450 if (rt_mutex_has_waiters(lock)) { 461 if (rt_mutex_has_waiters(lock)) {
451 if (task->prio >= rt_mutex_top_waiter(lock)->task->prio) { 462 if (task->prio >= rt_mutex_top_waiter(lock)->prio) {
452 if (!waiter || waiter != rt_mutex_top_waiter(lock)) 463 if (!waiter || waiter != rt_mutex_top_waiter(lock))
453 return 0; 464 return 0;
454 } 465 }
@@ -508,6 +519,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
508 __rt_mutex_adjust_prio(task); 519 __rt_mutex_adjust_prio(task);
509 waiter->task = task; 520 waiter->task = task;
510 waiter->lock = lock; 521 waiter->lock = lock;
522 waiter->prio = task->prio;
511 523
512 /* Get the top priority waiter on the lock */ 524 /* Get the top priority waiter on the lock */
513 if (rt_mutex_has_waiters(lock)) 525 if (rt_mutex_has_waiters(lock))
@@ -653,7 +665,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
653 raw_spin_lock_irqsave(&task->pi_lock, flags); 665 raw_spin_lock_irqsave(&task->pi_lock, flags);
654 666
655 waiter = task->pi_blocked_on; 667 waiter = task->pi_blocked_on;
656 if (!waiter || waiter->task->prio == task->prio) { 668 if (!waiter || (waiter->prio == task->prio &&
669 !dl_prio(task->prio))) {
657 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 670 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
658 return; 671 return;
659 } 672 }
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index b65442fe5ade..7431a9c86f35 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -54,6 +54,7 @@ struct rt_mutex_waiter {
54 struct pid *deadlock_task_pid; 54 struct pid *deadlock_task_pid;
55 struct rt_mutex *deadlock_lock; 55 struct rt_mutex *deadlock_lock;
56#endif 56#endif
57 int prio;
57}; 58};
58 59
59/* 60/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index aebcc70b5c93..599ee3b11b44 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -947,7 +947,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
947 if (prev_class->switched_from) 947 if (prev_class->switched_from)
948 prev_class->switched_from(rq, p); 948 prev_class->switched_from(rq, p);
949 p->sched_class->switched_to(rq, p); 949 p->sched_class->switched_to(rq, p);
950 } else if (oldprio != p->prio) 950 } else if (oldprio != p->prio || dl_task(p))
951 p->sched_class->prio_changed(rq, p, oldprio); 951 p->sched_class->prio_changed(rq, p, oldprio);
952} 952}
953 953
@@ -2781,7 +2781,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
2781 */ 2781 */
2782void rt_mutex_setprio(struct task_struct *p, int prio) 2782void rt_mutex_setprio(struct task_struct *p, int prio)
2783{ 2783{
2784 int oldprio, on_rq, running; 2784 int oldprio, on_rq, running, enqueue_flag = 0;
2785 struct rq *rq; 2785 struct rq *rq;
2786 const struct sched_class *prev_class; 2786 const struct sched_class *prev_class;
2787 2787
@@ -2808,6 +2808,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
2808 } 2808 }
2809 2809
2810 trace_sched_pi_setprio(p, prio); 2810 trace_sched_pi_setprio(p, prio);
2811 p->pi_top_task = rt_mutex_get_top_task(p);
2811 oldprio = p->prio; 2812 oldprio = p->prio;
2812 prev_class = p->sched_class; 2813 prev_class = p->sched_class;
2813 on_rq = p->on_rq; 2814 on_rq = p->on_rq;
@@ -2817,19 +2818,42 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
2817 if (running) 2818 if (running)
2818 p->sched_class->put_prev_task(rq, p); 2819 p->sched_class->put_prev_task(rq, p);
2819 2820
2820 if (dl_prio(prio)) 2821 /*
2822 * Boosting condition are:
2823 * 1. -rt task is running and holds mutex A
2824 * --> -dl task blocks on mutex A
2825 *
2826 * 2. -dl task is running and holds mutex A
2827 * --> -dl task blocks on mutex A and could preempt the
2828 * running task
2829 */
2830 if (dl_prio(prio)) {
2831 if (!dl_prio(p->normal_prio) || (p->pi_top_task &&
2832 dl_entity_preempt(&p->pi_top_task->dl, &p->dl))) {
2833 p->dl.dl_boosted = 1;
2834 p->dl.dl_throttled = 0;
2835 enqueue_flag = ENQUEUE_REPLENISH;
2836 } else
2837 p->dl.dl_boosted = 0;
2821 p->sched_class = &dl_sched_class; 2838 p->sched_class = &dl_sched_class;
2822 else if (rt_prio(prio)) 2839 } else if (rt_prio(prio)) {
2840 if (dl_prio(oldprio))
2841 p->dl.dl_boosted = 0;
2842 if (oldprio < prio)
2843 enqueue_flag = ENQUEUE_HEAD;
2823 p->sched_class = &rt_sched_class; 2844 p->sched_class = &rt_sched_class;
2824 else 2845 } else {
2846 if (dl_prio(oldprio))
2847 p->dl.dl_boosted = 0;
2825 p->sched_class = &fair_sched_class; 2848 p->sched_class = &fair_sched_class;
2849 }
2826 2850
2827 p->prio = prio; 2851 p->prio = prio;
2828 2852
2829 if (running) 2853 if (running)
2830 p->sched_class->set_curr_task(rq); 2854 p->sched_class->set_curr_task(rq);
2831 if (on_rq) 2855 if (on_rq)
2832 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); 2856 enqueue_task(rq, p, enqueue_flag);
2833 2857
2834 check_class_changed(rq, p, prev_class, oldprio); 2858 check_class_changed(rq, p, prev_class, oldprio);
2835out_unlock: 2859out_unlock:
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3958bc576d67..7f6de4316990 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -16,20 +16,6 @@
16 */ 16 */
17#include "sched.h" 17#include "sched.h"
18 18
19static inline int dl_time_before(u64 a, u64 b)
20{
21 return (s64)(a - b) < 0;
22}
23
24/*
25 * Tells if entity @a should preempt entity @b.
26 */
27static inline
28int dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
29{
30 return dl_time_before(a->deadline, b->deadline);
31}
32
33static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) 19static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
34{ 20{
35 return container_of(dl_se, struct task_struct, dl); 21 return container_of(dl_se, struct task_struct, dl);
@@ -242,7 +228,8 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
242 * one, and to (try to!) reconcile itself with its own scheduling 228 * one, and to (try to!) reconcile itself with its own scheduling
243 * parameters. 229 * parameters.
244 */ 230 */
245static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) 231static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
232 struct sched_dl_entity *pi_se)
246{ 233{
247 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 234 struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
248 struct rq *rq = rq_of_dl_rq(dl_rq); 235 struct rq *rq = rq_of_dl_rq(dl_rq);
@@ -254,8 +241,8 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
254 * future; in fact, we must consider execution overheads (time 241 * future; in fact, we must consider execution overheads (time
255 * spent on hardirq context, etc.). 242 * spent on hardirq context, etc.).
256 */ 243 */
257 dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; 244 dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
258 dl_se->runtime = dl_se->dl_runtime; 245 dl_se->runtime = pi_se->dl_runtime;
259 dl_se->dl_new = 0; 246 dl_se->dl_new = 0;
260} 247}
261 248
@@ -277,11 +264,23 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
277 * could happen are, typically, a entity voluntarily trying to overcome its 264 * could happen are, typically, a entity voluntarily trying to overcome its
278 * runtime, or it just underestimated it during sched_setscheduler_ex(). 265 * runtime, or it just underestimated it during sched_setscheduler_ex().
279 */ 266 */
280static void replenish_dl_entity(struct sched_dl_entity *dl_se) 267static void replenish_dl_entity(struct sched_dl_entity *dl_se,
268 struct sched_dl_entity *pi_se)
281{ 269{
282 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 270 struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
283 struct rq *rq = rq_of_dl_rq(dl_rq); 271 struct rq *rq = rq_of_dl_rq(dl_rq);
284 272
273 BUG_ON(pi_se->dl_runtime <= 0);
274
275 /*
276 * This could be the case for a !-dl task that is boosted.
277 * Just go with full inherited parameters.
278 */
279 if (dl_se->dl_deadline == 0) {
280 dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
281 dl_se->runtime = pi_se->dl_runtime;
282 }
283
285 /* 284 /*
286 * We keep moving the deadline away until we get some 285 * We keep moving the deadline away until we get some
287 * available runtime for the entity. This ensures correct 286 * available runtime for the entity. This ensures correct
@@ -289,8 +288,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
289 * arbitrary large. 288 * arbitrary large.
290 */ 289 */
291 while (dl_se->runtime <= 0) { 290 while (dl_se->runtime <= 0) {
292 dl_se->deadline += dl_se->dl_period; 291 dl_se->deadline += pi_se->dl_period;
293 dl_se->runtime += dl_se->dl_runtime; 292 dl_se->runtime += pi_se->dl_runtime;
294 } 293 }
295 294
296 /* 295 /*
@@ -309,8 +308,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
309 lag_once = true; 308 lag_once = true;
310 printk_sched("sched: DL replenish lagged to much\n"); 309 printk_sched("sched: DL replenish lagged to much\n");
311 } 310 }
312 dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; 311 dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
313 dl_se->runtime = dl_se->dl_runtime; 312 dl_se->runtime = pi_se->dl_runtime;
314 } 313 }
315} 314}
316 315
@@ -337,7 +336,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
337 * task with deadline equal to period this is the same of using 336 * task with deadline equal to period this is the same of using
338 * dl_deadline instead of dl_period in the equation above. 337 * dl_deadline instead of dl_period in the equation above.
339 */ 338 */
340static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) 339static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
340 struct sched_dl_entity *pi_se, u64 t)
341{ 341{
342 u64 left, right; 342 u64 left, right;
343 343
@@ -359,8 +359,8 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
359 * of anything below microseconds resolution is actually fiction 359 * of anything below microseconds resolution is actually fiction
360 * (but still we want to give the user that illusion >;). 360 * (but still we want to give the user that illusion >;).
361 */ 361 */
362 left = (dl_se->dl_period >> 10) * (dl_se->runtime >> 10); 362 left = (pi_se->dl_period >> 10) * (dl_se->runtime >> 10);
363 right = ((dl_se->deadline - t) >> 10) * (dl_se->dl_runtime >> 10); 363 right = ((dl_se->deadline - t) >> 10) * (pi_se->dl_runtime >> 10);
364 364
365 return dl_time_before(right, left); 365 return dl_time_before(right, left);
366} 366}
@@ -374,7 +374,8 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
374 * - using the remaining runtime with the current deadline would make 374 * - using the remaining runtime with the current deadline would make
375 * the entity exceed its bandwidth. 375 * the entity exceed its bandwidth.
376 */ 376 */
377static void update_dl_entity(struct sched_dl_entity *dl_se) 377static void update_dl_entity(struct sched_dl_entity *dl_se,
378 struct sched_dl_entity *pi_se)
378{ 379{
379 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 380 struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
380 struct rq *rq = rq_of_dl_rq(dl_rq); 381 struct rq *rq = rq_of_dl_rq(dl_rq);
@@ -384,14 +385,14 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
384 * the actual scheduling parameters have to be "renewed". 385 * the actual scheduling parameters have to be "renewed".
385 */ 386 */
386 if (dl_se->dl_new) { 387 if (dl_se->dl_new) {
387 setup_new_dl_entity(dl_se); 388 setup_new_dl_entity(dl_se, pi_se);
388 return; 389 return;
389 } 390 }
390 391
391 if (dl_time_before(dl_se->deadline, rq_clock(rq)) || 392 if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
392 dl_entity_overflow(dl_se, rq_clock(rq))) { 393 dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
393 dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; 394 dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
394 dl_se->runtime = dl_se->dl_runtime; 395 dl_se->runtime = pi_se->dl_runtime;
395 } 396 }
396} 397}
397 398
@@ -405,7 +406,7 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
405 * actually started or not (i.e., the replenishment instant is in 406 * actually started or not (i.e., the replenishment instant is in
406 * the future or in the past). 407 * the future or in the past).
407 */ 408 */
408static int start_dl_timer(struct sched_dl_entity *dl_se) 409static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
409{ 410{
410 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 411 struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
411 struct rq *rq = rq_of_dl_rq(dl_rq); 412 struct rq *rq = rq_of_dl_rq(dl_rq);
@@ -414,6 +415,8 @@ static int start_dl_timer(struct sched_dl_entity *dl_se)
414 unsigned long range; 415 unsigned long range;
415 s64 delta; 416 s64 delta;
416 417
418 if (boosted)
419 return 0;
417 /* 420 /*
418 * We want the timer to fire at the deadline, but considering 421 * We want the timer to fire at the deadline, but considering
419 * that it is actually coming from rq->clock and not from 422 * that it is actually coming from rq->clock and not from
@@ -573,7 +576,7 @@ static void update_curr_dl(struct rq *rq)
573 dl_se->runtime -= delta_exec; 576 dl_se->runtime -= delta_exec;
574 if (dl_runtime_exceeded(rq, dl_se)) { 577 if (dl_runtime_exceeded(rq, dl_se)) {
575 __dequeue_task_dl(rq, curr, 0); 578 __dequeue_task_dl(rq, curr, 0);
576 if (likely(start_dl_timer(dl_se))) 579 if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted)))
577 dl_se->dl_throttled = 1; 580 dl_se->dl_throttled = 1;
578 else 581 else
579 enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); 582 enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
@@ -728,7 +731,8 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
728} 731}
729 732
730static void 733static void
731enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) 734enqueue_dl_entity(struct sched_dl_entity *dl_se,
735 struct sched_dl_entity *pi_se, int flags)
732{ 736{
733 BUG_ON(on_dl_rq(dl_se)); 737 BUG_ON(on_dl_rq(dl_se));
734 738
@@ -738,9 +742,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
738 * we want a replenishment of its runtime. 742 * we want a replenishment of its runtime.
739 */ 743 */
740 if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) 744 if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH)
741 replenish_dl_entity(dl_se); 745 replenish_dl_entity(dl_se, pi_se);
742 else 746 else
743 update_dl_entity(dl_se); 747 update_dl_entity(dl_se, pi_se);
744 748
745 __enqueue_dl_entity(dl_se); 749 __enqueue_dl_entity(dl_se);
746} 750}
@@ -752,6 +756,18 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
752 756
753static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) 757static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
754{ 758{
759 struct task_struct *pi_task = rt_mutex_get_top_task(p);
760 struct sched_dl_entity *pi_se = &p->dl;
761
762 /*
763 * Use the scheduling parameters of the top pi-waiter
764 * task if we have one and its (relative) deadline is
765 * smaller than our one... OTW we keep our runtime and
766 * deadline.
767 */
768 if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio))
769 pi_se = &pi_task->dl;
770
755 /* 771 /*
756 * If p is throttled, we do nothing. In fact, if it exhausted 772 * If p is throttled, we do nothing. In fact, if it exhausted
757 * its budget it needs a replenishment and, since it now is on 773 * its budget it needs a replenishment and, since it now is on
@@ -761,7 +777,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
761 if (p->dl.dl_throttled) 777 if (p->dl.dl_throttled)
762 return; 778 return;
763 779
764 enqueue_dl_entity(&p->dl, flags); 780 enqueue_dl_entity(&p->dl, pi_se, flags);
765 781
766 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 782 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
767 enqueue_pushable_dl_task(rq, p); 783 enqueue_pushable_dl_task(rq, p);
@@ -985,8 +1001,7 @@ static void task_dead_dl(struct task_struct *p)
985{ 1001{
986 struct hrtimer *timer = &p->dl.dl_timer; 1002 struct hrtimer *timer = &p->dl.dl_timer;
987 1003
988 if (hrtimer_active(timer)) 1004 hrtimer_cancel(timer);
989 hrtimer_try_to_cancel(timer);
990} 1005}
991 1006
992static void set_curr_task_dl(struct rq *rq) 1007static void set_curr_task_dl(struct rq *rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 93ea62754f11..52453a2d0a79 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -107,6 +107,20 @@ static inline int task_has_dl_policy(struct task_struct *p)
107 return dl_policy(p->policy); 107 return dl_policy(p->policy);
108} 108}
109 109
110static inline int dl_time_before(u64 a, u64 b)
111{
112 return (s64)(a - b) < 0;
113}
114
115/*
116 * Tells if entity @a should preempt entity @b.
117 */
118static inline
119int dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
120{
121 return dl_time_before(a->deadline, b->deadline);
122}
123
110/* 124/*
111 * This is the priority-queue data structure of the RT scheduling class: 125 * This is the priority-queue data structure of the RT scheduling class:
112 */ 126 */
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 090c4d9dcf16..6e32635e5e57 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -16,6 +16,7 @@
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/sched/rt.h> 18#include <linux/sched/rt.h>
19#include <linux/sched/deadline.h>
19#include <trace/events/sched.h> 20#include <trace/events/sched.h>
20#include "trace.h" 21#include "trace.h"
21 22