aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking/rtmutex.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2017-03-23 10:56:11 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-04-04 05:44:06 -0400
commitacd58620e415aee4a43a808d7d2fd87259ee0001 (patch)
treeb0971a53edac32523a6b99b4bd5f15200041634e /kernel/locking/rtmutex.c
parentaa2bfe55366552cb7e93e8709d66e698d79ccc47 (diff)
sched/rtmutex: Refactor rt_mutex_setprio()
With the introduction of SCHED_DEADLINE the whole notion that priority is a single number is gone, therefore the @prio argument to rt_mutex_setprio() doesn't make sense anymore. So rework the code to pass a pi_task instead. Note this also fixes a problem with pi_top_task caching; previously we would not set the pointer (call rt_mutex_update_top_task) if the priority didn't change, this could lead to a stale pointer. As for the XXX, I think its fine to use pi_task->prio, because if it differs from waiter->prio, a PI chain update is immenent. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: juri.lelli@arm.com Cc: bigeasy@linutronix.de Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170323150216.303827095@infradead.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/locking/rtmutex.c')
-rw-r--r--kernel/locking/rtmutex.c112
1 files changed, 30 insertions, 82 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 4b1015ef0dc7..00b49cdbb4e0 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -322,67 +322,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
322 RB_CLEAR_NODE(&waiter->pi_tree_entry); 322 RB_CLEAR_NODE(&waiter->pi_tree_entry);
323} 323}
324 324
325/* 325static void rt_mutex_adjust_prio(struct task_struct *p)
326 * Must hold both p->pi_lock and task_rq(p)->lock.
327 */
328void rt_mutex_update_top_task(struct task_struct *p)
329{
330 if (!task_has_pi_waiters(p)) {
331 p->pi_top_task = NULL;
332 return;
333 }
334
335 p->pi_top_task = task_top_pi_waiter(p)->task;
336}
337
338/*
339 * Calculate task priority from the waiter tree priority
340 *
341 * Return task->normal_prio when the waiter tree is empty or when
342 * the waiter is not allowed to do priority boosting
343 */
344int rt_mutex_getprio(struct task_struct *task)
345{
346 if (likely(!task_has_pi_waiters(task)))
347 return task->normal_prio;
348
349 return min(task_top_pi_waiter(task)->prio,
350 task->normal_prio);
351}
352
353/*
354 * Must hold either p->pi_lock or task_rq(p)->lock.
355 */
356struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
357{
358 return task->pi_top_task;
359}
360
361/*
362 * Called by sched_setscheduler() to get the priority which will be
363 * effective after the change.
364 */
365int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
366{ 326{
367 struct task_struct *top_task = rt_mutex_get_top_task(task); 327 struct task_struct *pi_task = NULL;
368 328
369 if (!top_task) 329 lockdep_assert_held(&p->pi_lock);
370 return newprio;
371 330
372 return min(top_task->prio, newprio); 331 if (task_has_pi_waiters(p))
373} 332 pi_task = task_top_pi_waiter(p)->task;
374 333
375/* 334 rt_mutex_setprio(p, pi_task);
376 * Adjust the priority of a task, after its pi_waiters got modified.
377 *
378 * This can be both boosting and unboosting. task->pi_lock must be held.
379 */
380static void __rt_mutex_adjust_prio(struct task_struct *task)
381{
382 int prio = rt_mutex_getprio(task);
383
384 if (task->prio != prio || dl_prio(prio))
385 rt_mutex_setprio(task, prio);
386} 335}
387 336
388/* 337/*
@@ -742,7 +691,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
742 */ 691 */
743 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 692 rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
744 rt_mutex_enqueue_pi(task, waiter); 693 rt_mutex_enqueue_pi(task, waiter);
745 __rt_mutex_adjust_prio(task); 694 rt_mutex_adjust_prio(task);
746 695
747 } else if (prerequeue_top_waiter == waiter) { 696 } else if (prerequeue_top_waiter == waiter) {
748 /* 697 /*
@@ -758,7 +707,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
758 rt_mutex_dequeue_pi(task, waiter); 707 rt_mutex_dequeue_pi(task, waiter);
759 waiter = rt_mutex_top_waiter(lock); 708 waiter = rt_mutex_top_waiter(lock);
760 rt_mutex_enqueue_pi(task, waiter); 709 rt_mutex_enqueue_pi(task, waiter);
761 __rt_mutex_adjust_prio(task); 710 rt_mutex_adjust_prio(task);
762 } else { 711 } else {
763 /* 712 /*
764 * Nothing changed. No need to do any priority 713 * Nothing changed. No need to do any priority
@@ -966,7 +915,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
966 return -EDEADLK; 915 return -EDEADLK;
967 916
968 raw_spin_lock(&task->pi_lock); 917 raw_spin_lock(&task->pi_lock);
969 __rt_mutex_adjust_prio(task); 918 rt_mutex_adjust_prio(task);
970 waiter->task = task; 919 waiter->task = task;
971 waiter->lock = lock; 920 waiter->lock = lock;
972 waiter->prio = task->prio; 921 waiter->prio = task->prio;
@@ -988,7 +937,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
988 rt_mutex_dequeue_pi(owner, top_waiter); 937 rt_mutex_dequeue_pi(owner, top_waiter);
989 rt_mutex_enqueue_pi(owner, waiter); 938 rt_mutex_enqueue_pi(owner, waiter);
990 939
991 __rt_mutex_adjust_prio(owner); 940 rt_mutex_adjust_prio(owner);
992 if (owner->pi_blocked_on) 941 if (owner->pi_blocked_on)
993 chain_walk = 1; 942 chain_walk = 1;
994 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 943 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
@@ -1040,13 +989,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1040 waiter = rt_mutex_top_waiter(lock); 989 waiter = rt_mutex_top_waiter(lock);
1041 990
1042 /* 991 /*
1043 * Remove it from current->pi_waiters. We do not adjust a 992 * Remove it from current->pi_waiters and deboost.
1044 * possible priority boost right now. We execute wakeup in the 993 *
1045 * boosted mode and go back to normal after releasing 994 * We must in fact deboost here in order to ensure we call
1046 * lock->wait_lock. 995 * rt_mutex_setprio() to update p->pi_top_task before the
996 * task unblocks.
1047 */ 997 */
1048 rt_mutex_dequeue_pi(current, waiter); 998 rt_mutex_dequeue_pi(current, waiter);
1049 __rt_mutex_adjust_prio(current); 999 rt_mutex_adjust_prio(current);
1050 1000
1051 /* 1001 /*
1052 * As we are waking up the top waiter, and the waiter stays 1002 * As we are waking up the top waiter, and the waiter stays
@@ -1058,9 +1008,19 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1058 */ 1008 */
1059 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1009 lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1060 1010
1061 raw_spin_unlock(&current->pi_lock); 1011 /*
1062 1012 * We deboosted before waking the top waiter task such that we don't
1013 * run two tasks with the 'same' priority (and ensure the
1014 * p->pi_top_task pointer points to a blocked task). This however can
1015 * lead to priority inversion if we would get preempted after the
1016 * deboost but before waking our donor task, hence the preempt_disable()
1017 * before unlock.
1018 *
1019 * Pairs with preempt_enable() in rt_mutex_postunlock();
1020 */
1021 preempt_disable();
1063 wake_q_add(wake_q, waiter->task); 1022 wake_q_add(wake_q, waiter->task);
1023 raw_spin_unlock(&current->pi_lock);
1064} 1024}
1065 1025
1066/* 1026/*
@@ -1095,7 +1055,7 @@ static void remove_waiter(struct rt_mutex *lock,
1095 if (rt_mutex_has_waiters(lock)) 1055 if (rt_mutex_has_waiters(lock))
1096 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1056 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1097 1057
1098 __rt_mutex_adjust_prio(owner); 1058 rt_mutex_adjust_prio(owner);
1099 1059
1100 /* Store the lock on which owner is blocked or NULL */ 1060 /* Store the lock on which owner is blocked or NULL */
1101 next_lock = task_blocked_on_lock(owner); 1061 next_lock = task_blocked_on_lock(owner);
@@ -1134,8 +1094,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
1134 raw_spin_lock_irqsave(&task->pi_lock, flags); 1094 raw_spin_lock_irqsave(&task->pi_lock, flags);
1135 1095
1136 waiter = task->pi_blocked_on; 1096 waiter = task->pi_blocked_on;
1137 if (!waiter || (waiter->prio == task->prio && 1097 if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) {
1138 !dl_prio(task->prio))) {
1139 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1098 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1140 return; 1099 return;
1141 } 1100 }
@@ -1389,17 +1348,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1389 * Queue the next waiter for wakeup once we release the wait_lock. 1348 * Queue the next waiter for wakeup once we release the wait_lock.
1390 */ 1349 */
1391 mark_wakeup_next_waiter(wake_q, lock); 1350 mark_wakeup_next_waiter(wake_q, lock);
1392
1393 /*
1394 * We should deboost before waking the top waiter task such that
1395 * we don't run two tasks with the 'same' priority. This however
1396 * can lead to prio-inversion if we would get preempted after
1397 * the deboost but before waking our high-prio task, hence the
1398 * preempt_disable before unlock. Pairs with preempt_enable() in
1399 * rt_mutex_postunlock();
1400 */
1401 preempt_disable();
1402
1403 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1351 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1404 1352
1405 return true; /* call rt_mutex_postunlock() */ 1353 return true; /* call rt_mutex_postunlock() */