diff options
author | Peter Zijlstra <peterz@infradead.org> | 2017-03-23 10:56:11 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2017-04-04 05:44:06 -0400 |
commit | acd58620e415aee4a43a808d7d2fd87259ee0001 (patch) | |
tree | b0971a53edac32523a6b99b4bd5f15200041634e /kernel/locking/rtmutex.c | |
parent | aa2bfe55366552cb7e93e8709d66e698d79ccc47 (diff) |
sched/rtmutex: Refactor rt_mutex_setprio()
With the introduction of SCHED_DEADLINE the whole notion that priority
is a single number is gone, therefore the @prio argument to
rt_mutex_setprio() doesn't make sense anymore.
So rework the code to pass a pi_task instead.
Note this also fixes a problem with pi_top_task caching; previously we
would not set the pointer (call rt_mutex_update_top_task) if the
priority didn't change, this could lead to a stale pointer.
As for the XXX, I think its fine to use pi_task->prio, because if it
differs from waiter->prio, a PI chain update is immenent.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: juri.lelli@arm.com
Cc: bigeasy@linutronix.de
Cc: xlpang@redhat.com
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: jdesfossez@efficios.com
Cc: bristot@redhat.com
Link: http://lkml.kernel.org/r/20170323150216.303827095@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/locking/rtmutex.c')
-rw-r--r-- | kernel/locking/rtmutex.c | 112 |
1 files changed, 30 insertions, 82 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 4b1015ef0dc7..00b49cdbb4e0 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
@@ -322,67 +322,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) | |||
322 | RB_CLEAR_NODE(&waiter->pi_tree_entry); | 322 | RB_CLEAR_NODE(&waiter->pi_tree_entry); |
323 | } | 323 | } |
324 | 324 | ||
325 | /* | 325 | static void rt_mutex_adjust_prio(struct task_struct *p) |
326 | * Must hold both p->pi_lock and task_rq(p)->lock. | ||
327 | */ | ||
328 | void rt_mutex_update_top_task(struct task_struct *p) | ||
329 | { | ||
330 | if (!task_has_pi_waiters(p)) { | ||
331 | p->pi_top_task = NULL; | ||
332 | return; | ||
333 | } | ||
334 | |||
335 | p->pi_top_task = task_top_pi_waiter(p)->task; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Calculate task priority from the waiter tree priority | ||
340 | * | ||
341 | * Return task->normal_prio when the waiter tree is empty or when | ||
342 | * the waiter is not allowed to do priority boosting | ||
343 | */ | ||
344 | int rt_mutex_getprio(struct task_struct *task) | ||
345 | { | ||
346 | if (likely(!task_has_pi_waiters(task))) | ||
347 | return task->normal_prio; | ||
348 | |||
349 | return min(task_top_pi_waiter(task)->prio, | ||
350 | task->normal_prio); | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * Must hold either p->pi_lock or task_rq(p)->lock. | ||
355 | */ | ||
356 | struct task_struct *rt_mutex_get_top_task(struct task_struct *task) | ||
357 | { | ||
358 | return task->pi_top_task; | ||
359 | } | ||
360 | |||
361 | /* | ||
362 | * Called by sched_setscheduler() to get the priority which will be | ||
363 | * effective after the change. | ||
364 | */ | ||
365 | int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) | ||
366 | { | 326 | { |
367 | struct task_struct *top_task = rt_mutex_get_top_task(task); | 327 | struct task_struct *pi_task = NULL; |
368 | 328 | ||
369 | if (!top_task) | 329 | lockdep_assert_held(&p->pi_lock); |
370 | return newprio; | ||
371 | 330 | ||
372 | return min(top_task->prio, newprio); | 331 | if (task_has_pi_waiters(p)) |
373 | } | 332 | pi_task = task_top_pi_waiter(p)->task; |
374 | 333 | ||
375 | /* | 334 | rt_mutex_setprio(p, pi_task); |
376 | * Adjust the priority of a task, after its pi_waiters got modified. | ||
377 | * | ||
378 | * This can be both boosting and unboosting. task->pi_lock must be held. | ||
379 | */ | ||
380 | static void __rt_mutex_adjust_prio(struct task_struct *task) | ||
381 | { | ||
382 | int prio = rt_mutex_getprio(task); | ||
383 | |||
384 | if (task->prio != prio || dl_prio(prio)) | ||
385 | rt_mutex_setprio(task, prio); | ||
386 | } | 335 | } |
387 | 336 | ||
388 | /* | 337 | /* |
@@ -742,7 +691,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
742 | */ | 691 | */ |
743 | rt_mutex_dequeue_pi(task, prerequeue_top_waiter); | 692 | rt_mutex_dequeue_pi(task, prerequeue_top_waiter); |
744 | rt_mutex_enqueue_pi(task, waiter); | 693 | rt_mutex_enqueue_pi(task, waiter); |
745 | __rt_mutex_adjust_prio(task); | 694 | rt_mutex_adjust_prio(task); |
746 | 695 | ||
747 | } else if (prerequeue_top_waiter == waiter) { | 696 | } else if (prerequeue_top_waiter == waiter) { |
748 | /* | 697 | /* |
@@ -758,7 +707,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
758 | rt_mutex_dequeue_pi(task, waiter); | 707 | rt_mutex_dequeue_pi(task, waiter); |
759 | waiter = rt_mutex_top_waiter(lock); | 708 | waiter = rt_mutex_top_waiter(lock); |
760 | rt_mutex_enqueue_pi(task, waiter); | 709 | rt_mutex_enqueue_pi(task, waiter); |
761 | __rt_mutex_adjust_prio(task); | 710 | rt_mutex_adjust_prio(task); |
762 | } else { | 711 | } else { |
763 | /* | 712 | /* |
764 | * Nothing changed. No need to do any priority | 713 | * Nothing changed. No need to do any priority |
@@ -966,7 +915,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
966 | return -EDEADLK; | 915 | return -EDEADLK; |
967 | 916 | ||
968 | raw_spin_lock(&task->pi_lock); | 917 | raw_spin_lock(&task->pi_lock); |
969 | __rt_mutex_adjust_prio(task); | 918 | rt_mutex_adjust_prio(task); |
970 | waiter->task = task; | 919 | waiter->task = task; |
971 | waiter->lock = lock; | 920 | waiter->lock = lock; |
972 | waiter->prio = task->prio; | 921 | waiter->prio = task->prio; |
@@ -988,7 +937,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
988 | rt_mutex_dequeue_pi(owner, top_waiter); | 937 | rt_mutex_dequeue_pi(owner, top_waiter); |
989 | rt_mutex_enqueue_pi(owner, waiter); | 938 | rt_mutex_enqueue_pi(owner, waiter); |
990 | 939 | ||
991 | __rt_mutex_adjust_prio(owner); | 940 | rt_mutex_adjust_prio(owner); |
992 | if (owner->pi_blocked_on) | 941 | if (owner->pi_blocked_on) |
993 | chain_walk = 1; | 942 | chain_walk = 1; |
994 | } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { | 943 | } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { |
@@ -1040,13 +989,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |||
1040 | waiter = rt_mutex_top_waiter(lock); | 989 | waiter = rt_mutex_top_waiter(lock); |
1041 | 990 | ||
1042 | /* | 991 | /* |
1043 | * Remove it from current->pi_waiters. We do not adjust a | 992 | * Remove it from current->pi_waiters and deboost. |
1044 | * possible priority boost right now. We execute wakeup in the | 993 | * |
1045 | * boosted mode and go back to normal after releasing | 994 | * We must in fact deboost here in order to ensure we call |
1046 | * lock->wait_lock. | 995 | * rt_mutex_setprio() to update p->pi_top_task before the |
996 | * task unblocks. | ||
1047 | */ | 997 | */ |
1048 | rt_mutex_dequeue_pi(current, waiter); | 998 | rt_mutex_dequeue_pi(current, waiter); |
1049 | __rt_mutex_adjust_prio(current); | 999 | rt_mutex_adjust_prio(current); |
1050 | 1000 | ||
1051 | /* | 1001 | /* |
1052 | * As we are waking up the top waiter, and the waiter stays | 1002 | * As we are waking up the top waiter, and the waiter stays |
@@ -1058,9 +1008,19 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |||
1058 | */ | 1008 | */ |
1059 | lock->owner = (void *) RT_MUTEX_HAS_WAITERS; | 1009 | lock->owner = (void *) RT_MUTEX_HAS_WAITERS; |
1060 | 1010 | ||
1061 | raw_spin_unlock(¤t->pi_lock); | 1011 | /* |
1062 | 1012 | * We deboosted before waking the top waiter task such that we don't | |
1013 | * run two tasks with the 'same' priority (and ensure the | ||
1014 | * p->pi_top_task pointer points to a blocked task). This however can | ||
1015 | * lead to priority inversion if we would get preempted after the | ||
1016 | * deboost but before waking our donor task, hence the preempt_disable() | ||
1017 | * before unlock. | ||
1018 | * | ||
1019 | * Pairs with preempt_enable() in rt_mutex_postunlock(); | ||
1020 | */ | ||
1021 | preempt_disable(); | ||
1063 | wake_q_add(wake_q, waiter->task); | 1022 | wake_q_add(wake_q, waiter->task); |
1023 | raw_spin_unlock(¤t->pi_lock); | ||
1064 | } | 1024 | } |
1065 | 1025 | ||
1066 | /* | 1026 | /* |
@@ -1095,7 +1055,7 @@ static void remove_waiter(struct rt_mutex *lock, | |||
1095 | if (rt_mutex_has_waiters(lock)) | 1055 | if (rt_mutex_has_waiters(lock)) |
1096 | rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); | 1056 | rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); |
1097 | 1057 | ||
1098 | __rt_mutex_adjust_prio(owner); | 1058 | rt_mutex_adjust_prio(owner); |
1099 | 1059 | ||
1100 | /* Store the lock on which owner is blocked or NULL */ | 1060 | /* Store the lock on which owner is blocked or NULL */ |
1101 | next_lock = task_blocked_on_lock(owner); | 1061 | next_lock = task_blocked_on_lock(owner); |
@@ -1134,8 +1094,7 @@ void rt_mutex_adjust_pi(struct task_struct *task) | |||
1134 | raw_spin_lock_irqsave(&task->pi_lock, flags); | 1094 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
1135 | 1095 | ||
1136 | waiter = task->pi_blocked_on; | 1096 | waiter = task->pi_blocked_on; |
1137 | if (!waiter || (waiter->prio == task->prio && | 1097 | if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) { |
1138 | !dl_prio(task->prio))) { | ||
1139 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | 1098 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
1140 | return; | 1099 | return; |
1141 | } | 1100 | } |
@@ -1389,17 +1348,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |||
1389 | * Queue the next waiter for wakeup once we release the wait_lock. | 1348 | * Queue the next waiter for wakeup once we release the wait_lock. |
1390 | */ | 1349 | */ |
1391 | mark_wakeup_next_waiter(wake_q, lock); | 1350 | mark_wakeup_next_waiter(wake_q, lock); |
1392 | |||
1393 | /* | ||
1394 | * We should deboost before waking the top waiter task such that | ||
1395 | * we don't run two tasks with the 'same' priority. This however | ||
1396 | * can lead to prio-inversion if we would get preempted after | ||
1397 | * the deboost but before waking our high-prio task, hence the | ||
1398 | * preempt_disable before unlock. Pairs with preempt_enable() in | ||
1399 | * rt_mutex_postunlock(); | ||
1400 | */ | ||
1401 | preempt_disable(); | ||
1402 | |||
1403 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | 1351 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
1404 | 1352 | ||
1405 | return true; /* call rt_mutex_postunlock() */ | 1353 | return true; /* call rt_mutex_postunlock() */ |