summaryrefslogtreecommitdiffstats
path: root/kernel/locking
diff options
context:
space:
mode:
authorXunlei Pang <xlpang@redhat.com>2017-03-23 10:56:07 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-04-04 05:44:05 -0400
commit2a1c6029940675abb2217b590512dbf691867ec4 (patch)
treea975e4e9f643d86a904b6314a3bab21b023fd6cc /kernel/locking
parent38bffdac071b720db627bfd2b125a2802a04d419 (diff)
rtmutex: Deboost before waking up the top waiter
We should deboost before waking the high-priority task, such that we don't run two tasks with the same "state" (priority, deadline, sched_class, etc). In order to make sure the boosting task doesn't start running between unlock and deboost (due to 'spurious' wakeup), we move the deboost under the wait_lock, that way its serialized against the wait loop in __rt_mutex_slowlock(). Doing the deboost early can however lead to priority-inversion if current would get preempted after the deboost but before waking our high-prio task, hence we disable preemption before doing deboost, and enabling it after the wake up is over. This gets us the right semantic order, but most importantly however; this change ensures pointer stability for the next patch, where we have rt_mutex_setprio() cache a pointer to the top-most waiter task. If we, as before this change, do the wakeup first and then deboost, this pointer might point into thin air. [peterz: Changelog + patch munging] Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Xunlei Pang <xlpang@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Steven Rostedt <rostedt@goodmis.org> Cc: juri.lelli@arm.com Cc: bigeasy@linutronix.de Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170323150216.110065320@infradead.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/rtmutex.c59
-rw-r--r--kernel/locking/rtmutex_common.h2
2 files changed, 33 insertions, 28 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index dd103124166b..71ecf0624410 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -373,24 +373,6 @@ static void __rt_mutex_adjust_prio(struct task_struct *task)
373} 373}
374 374
375/* 375/*
376 * Adjust task priority (undo boosting). Called from the exit path of
377 * rt_mutex_slowunlock() and rt_mutex_slowlock().
378 *
379 * (Note: We do this outside of the protection of lock->wait_lock to
380 * allow the lock to be taken while or before we readjust the priority
381 * of task. We do not use the spin_xx_mutex() variants here as we are
382 * outside of the debug path.)
383 */
384void rt_mutex_adjust_prio(struct task_struct *task)
385{
386 unsigned long flags;
387
388 raw_spin_lock_irqsave(&task->pi_lock, flags);
389 __rt_mutex_adjust_prio(task);
390 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
391}
392
393/*
394 * Deadlock detection is conditional: 376 * Deadlock detection is conditional:
395 * 377 *
396 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 378 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
@@ -1051,6 +1033,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1051 * lock->wait_lock. 1033 * lock->wait_lock.
1052 */ 1034 */
1053 rt_mutex_dequeue_pi(current, waiter); 1035 rt_mutex_dequeue_pi(current, waiter);
1036 __rt_mutex_adjust_prio(current);
1054 1037
1055 /* 1038 /*
1056 * As we are waking up the top waiter, and the waiter stays 1039 * As we are waking up the top waiter, and the waiter stays
@@ -1393,6 +1376,16 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1393 */ 1376 */
1394 mark_wakeup_next_waiter(wake_q, lock); 1377 mark_wakeup_next_waiter(wake_q, lock);
1395 1378
1379 /*
1380 * We should deboost before waking the top waiter task such that
1381 * we don't run two tasks with the 'same' priority. This however
1382 * can lead to prio-inversion if we would get preempted after
1383 * the deboost but before waking our high-prio task, hence the
1384 * preempt_disable before unlock. Pairs with preempt_enable() in
1385 * rt_mutex_postunlock();
1386 */
1387 preempt_disable();
1388
1396 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1389 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1397 1390
1398 /* check PI boosting */ 1391 /* check PI boosting */
@@ -1442,6 +1435,18 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
1442 return slowfn(lock); 1435 return slowfn(lock);
1443} 1436}
1444 1437
1438/*
1439 * Undo pi boosting (if necessary) and wake top waiter.
1440 */
1441void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost)
1442{
1443 wake_up_q(wake_q);
1444
1445 /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
1446 if (deboost)
1447 preempt_enable();
1448}
1449
1445static inline void 1450static inline void
1446rt_mutex_fastunlock(struct rt_mutex *lock, 1451rt_mutex_fastunlock(struct rt_mutex *lock,
1447 bool (*slowfn)(struct rt_mutex *lock, 1452 bool (*slowfn)(struct rt_mutex *lock,
@@ -1455,11 +1460,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
1455 1460
1456 deboost = slowfn(lock, &wake_q); 1461 deboost = slowfn(lock, &wake_q);
1457 1462
1458 wake_up_q(&wake_q); 1463 rt_mutex_postunlock(&wake_q, deboost);
1459
1460 /* Undo pi boosting if necessary: */
1461 if (deboost)
1462 rt_mutex_adjust_prio(current);
1463} 1464}
1464 1465
1465/** 1466/**
@@ -1572,6 +1573,13 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
1572 } 1573 }
1573 1574
1574 mark_wakeup_next_waiter(wake_q, lock); 1575 mark_wakeup_next_waiter(wake_q, lock);
1576 /*
1577 * We've already deboosted, retain preempt_disabled when dropping
1578 * the wait_lock to avoid inversion until the wakeup. Matched
1579 * by rt_mutex_postunlock();
1580 */
1581 preempt_disable();
1582
1575 return true; /* deboost and wakeups */ 1583 return true; /* deboost and wakeups */
1576} 1584}
1577 1585
@@ -1584,10 +1592,7 @@ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
1584 deboost = __rt_mutex_futex_unlock(lock, &wake_q); 1592 deboost = __rt_mutex_futex_unlock(lock, &wake_q);
1585 raw_spin_unlock_irq(&lock->wait_lock); 1593 raw_spin_unlock_irq(&lock->wait_lock);
1586 1594
1587 if (deboost) { 1595 rt_mutex_postunlock(&wake_q, deboost);
1588 wake_up_q(&wake_q);
1589 rt_mutex_adjust_prio(current);
1590 }
1591} 1596}
1592 1597
1593/** 1598/**
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index b1ccfea2effe..a09c02982391 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
122extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, 122extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
123 struct wake_q_head *wqh); 123 struct wake_q_head *wqh);
124 124
125extern void rt_mutex_adjust_prio(struct task_struct *task); 125extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost);
126 126
127#ifdef CONFIG_DEBUG_RT_MUTEXES 127#ifdef CONFIG_DEBUG_RT_MUTEXES
128# include "rtmutex-debug.h" 128# include "rtmutex-debug.h"