diff options
author | Xunlei Pang <xlpang@redhat.com> | 2017-03-23 10:56:07 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2017-04-04 05:44:05 -0400 |
commit | 2a1c6029940675abb2217b590512dbf691867ec4 (patch) | |
tree | a975e4e9f643d86a904b6314a3bab21b023fd6cc /kernel/locking | |
parent | 38bffdac071b720db627bfd2b125a2802a04d419 (diff) |
rtmutex: Deboost before waking up the top waiter
We should deboost before waking the high-priority task, such that we
don't run two tasks with the same "state" (priority, deadline,
sched_class, etc).
In order to make sure the boosting task doesn't start running between
unlock and deboost (due to 'spurious' wakeup), we move the deboost
under the wait_lock, that way its serialized against the wait loop in
__rt_mutex_slowlock().
Doing the deboost early can however lead to priority-inversion if
current would get preempted after the deboost but before waking our
high-prio task, hence we disable preemption before doing deboost, and
enabling it after the wake up is over.
This gets us the right semantic order, but most importantly however;
this change ensures pointer stability for the next patch, where we
have rt_mutex_setprio() cache a pointer to the top-most waiter task.
If we, as before this change, do the wakeup first and then deboost,
this pointer might point into thin air.
[peterz: Changelog + patch munging]
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: juri.lelli@arm.com
Cc: bigeasy@linutronix.de
Cc: mathieu.desnoyers@efficios.com
Cc: jdesfossez@efficios.com
Cc: bristot@redhat.com
Link: http://lkml.kernel.org/r/20170323150216.110065320@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/rtmutex.c | 59 | ||||
-rw-r--r-- | kernel/locking/rtmutex_common.h | 2 |
2 files changed, 33 insertions, 28 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index dd103124166b..71ecf0624410 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
@@ -373,24 +373,6 @@ static void __rt_mutex_adjust_prio(struct task_struct *task) | |||
373 | } | 373 | } |
374 | 374 | ||
375 | /* | 375 | /* |
376 | * Adjust task priority (undo boosting). Called from the exit path of | ||
377 | * rt_mutex_slowunlock() and rt_mutex_slowlock(). | ||
378 | * | ||
379 | * (Note: We do this outside of the protection of lock->wait_lock to | ||
380 | * allow the lock to be taken while or before we readjust the priority | ||
381 | * of task. We do not use the spin_xx_mutex() variants here as we are | ||
382 | * outside of the debug path.) | ||
383 | */ | ||
384 | void rt_mutex_adjust_prio(struct task_struct *task) | ||
385 | { | ||
386 | unsigned long flags; | ||
387 | |||
388 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
389 | __rt_mutex_adjust_prio(task); | ||
390 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * Deadlock detection is conditional: | 376 | * Deadlock detection is conditional: |
395 | * | 377 | * |
396 | * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted | 378 | * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted |
@@ -1051,6 +1033,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |||
1051 | * lock->wait_lock. | 1033 | * lock->wait_lock. |
1052 | */ | 1034 | */ |
1053 | rt_mutex_dequeue_pi(current, waiter); | 1035 | rt_mutex_dequeue_pi(current, waiter); |
1036 | __rt_mutex_adjust_prio(current); | ||
1054 | 1037 | ||
1055 | /* | 1038 | /* |
1056 | * As we are waking up the top waiter, and the waiter stays | 1039 | * As we are waking up the top waiter, and the waiter stays |
@@ -1393,6 +1376,16 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |||
1393 | */ | 1376 | */ |
1394 | mark_wakeup_next_waiter(wake_q, lock); | 1377 | mark_wakeup_next_waiter(wake_q, lock); |
1395 | 1378 | ||
1379 | /* | ||
1380 | * We should deboost before waking the top waiter task such that | ||
1381 | * we don't run two tasks with the 'same' priority. This however | ||
1382 | * can lead to prio-inversion if we would get preempted after | ||
1383 | * the deboost but before waking our high-prio task, hence the | ||
1384 | * preempt_disable before unlock. Pairs with preempt_enable() in | ||
1385 | * rt_mutex_postunlock(); | ||
1386 | */ | ||
1387 | preempt_disable(); | ||
1388 | |||
1396 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | 1389 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
1397 | 1390 | ||
1398 | /* check PI boosting */ | 1391 | /* check PI boosting */ |
@@ -1442,6 +1435,18 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, | |||
1442 | return slowfn(lock); | 1435 | return slowfn(lock); |
1443 | } | 1436 | } |
1444 | 1437 | ||
1438 | /* | ||
1439 | * Undo pi boosting (if necessary) and wake top waiter. | ||
1440 | */ | ||
1441 | void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost) | ||
1442 | { | ||
1443 | wake_up_q(wake_q); | ||
1444 | |||
1445 | /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ | ||
1446 | if (deboost) | ||
1447 | preempt_enable(); | ||
1448 | } | ||
1449 | |||
1445 | static inline void | 1450 | static inline void |
1446 | rt_mutex_fastunlock(struct rt_mutex *lock, | 1451 | rt_mutex_fastunlock(struct rt_mutex *lock, |
1447 | bool (*slowfn)(struct rt_mutex *lock, | 1452 | bool (*slowfn)(struct rt_mutex *lock, |
@@ -1455,11 +1460,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock, | |||
1455 | 1460 | ||
1456 | deboost = slowfn(lock, &wake_q); | 1461 | deboost = slowfn(lock, &wake_q); |
1457 | 1462 | ||
1458 | wake_up_q(&wake_q); | 1463 | rt_mutex_postunlock(&wake_q, deboost); |
1459 | |||
1460 | /* Undo pi boosting if necessary: */ | ||
1461 | if (deboost) | ||
1462 | rt_mutex_adjust_prio(current); | ||
1463 | } | 1464 | } |
1464 | 1465 | ||
1465 | /** | 1466 | /** |
@@ -1572,6 +1573,13 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, | |||
1572 | } | 1573 | } |
1573 | 1574 | ||
1574 | mark_wakeup_next_waiter(wake_q, lock); | 1575 | mark_wakeup_next_waiter(wake_q, lock); |
1576 | /* | ||
1577 | * We've already deboosted, retain preempt_disabled when dropping | ||
1578 | * the wait_lock to avoid inversion until the wakeup. Matched | ||
1579 | * by rt_mutex_postunlock(); | ||
1580 | */ | ||
1581 | preempt_disable(); | ||
1582 | |||
1575 | return true; /* deboost and wakeups */ | 1583 | return true; /* deboost and wakeups */ |
1576 | } | 1584 | } |
1577 | 1585 | ||
@@ -1584,10 +1592,7 @@ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) | |||
1584 | deboost = __rt_mutex_futex_unlock(lock, &wake_q); | 1592 | deboost = __rt_mutex_futex_unlock(lock, &wake_q); |
1585 | raw_spin_unlock_irq(&lock->wait_lock); | 1593 | raw_spin_unlock_irq(&lock->wait_lock); |
1586 | 1594 | ||
1587 | if (deboost) { | 1595 | rt_mutex_postunlock(&wake_q, deboost); |
1588 | wake_up_q(&wake_q); | ||
1589 | rt_mutex_adjust_prio(current); | ||
1590 | } | ||
1591 | } | 1596 | } |
1592 | 1597 | ||
1593 | /** | 1598 | /** |
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index b1ccfea2effe..a09c02982391 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h | |||
@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct rt_mutex *lock); | |||
122 | extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, | 122 | extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, |
123 | struct wake_q_head *wqh); | 123 | struct wake_q_head *wqh); |
124 | 124 | ||
125 | extern void rt_mutex_adjust_prio(struct task_struct *task); | 125 | extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost); |
126 | 126 | ||
127 | #ifdef CONFIG_DEBUG_RT_MUTEXES | 127 | #ifdef CONFIG_DEBUG_RT_MUTEXES |
128 | # include "rtmutex-debug.h" | 128 | # include "rtmutex-debug.h" |