aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2015-06-17 04:33:50 -0400
committerThomas Gleixner <tglx@linutronix.de>2015-06-19 15:26:38 -0400
commit802ab58da74bb49ab348d2872190ef26ddc1a3e0 (patch)
treec62e2f4927ba283ddd5a8fd493c57fa7b9b2dbaf /kernel
parent45ab4effc3bee6f8a5cb05652b7bb895ec5b6a7a (diff)
futex: Lower the lock contention on the HB lock during wake up
wake_futex_pi() wakes the task before releasing the hash bucket lock (HB). The first thing the woken up task usually does is to acquire the lock which requires the HB lock. On SMP Systems this leads to blocking on the HB lock which is released by the owner shortly after. This patch rearranges the unlock path by first releasing the HB lock and then waking up the task. [ tglx: Fixed up the rtmutex unlock path ] Originally-from: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Mike Galbraith <umgwanakikbuti@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Link: http://lkml.kernel.org/r/20150617083350.GA2433@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/futex.c32
-rw-r--r--kernel/locking/rtmutex.c56
-rw-r--r--kernel/locking/rtmutex_common.h3
3 files changed, 73 insertions, 18 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index f9984c363e9a..a0cf6fa953de 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1117,11 +1117,14 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1117 q->lock_ptr = NULL; 1117 q->lock_ptr = NULL;
1118} 1118}
1119 1119
1120static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) 1120static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
1121 struct futex_hash_bucket *hb)
1121{ 1122{
1122 struct task_struct *new_owner; 1123 struct task_struct *new_owner;
1123 struct futex_pi_state *pi_state = this->pi_state; 1124 struct futex_pi_state *pi_state = this->pi_state;
1124 u32 uninitialized_var(curval), newval; 1125 u32 uninitialized_var(curval), newval;
1126 WAKE_Q(wake_q);
1127 bool deboost;
1125 int ret = 0; 1128 int ret = 0;
1126 1129
1127 if (!pi_state) 1130 if (!pi_state)
@@ -1173,7 +1176,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
1173 raw_spin_unlock_irq(&new_owner->pi_lock); 1176 raw_spin_unlock_irq(&new_owner->pi_lock);
1174 1177
1175 raw_spin_unlock(&pi_state->pi_mutex.wait_lock); 1178 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1176 rt_mutex_unlock(&pi_state->pi_mutex); 1179
1180 deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
1181
1182 /*
1183 * First unlock HB so the waiter does not spin on it once he got woken
1184 * up. Second wake up the waiter before the priority is adjusted. If we
1185 * deboost first (and lose our higher priority), then the task might get
1186 * scheduled away before the wake up can take place.
1187 */
1188 spin_unlock(&hb->lock);
1189 wake_up_q(&wake_q);
1190 if (deboost)
1191 rt_mutex_adjust_prio(current);
1177 1192
1178 return 0; 1193 return 0;
1179} 1194}
@@ -2413,13 +2428,23 @@ retry:
2413 */ 2428 */
2414 match = futex_top_waiter(hb, &key); 2429 match = futex_top_waiter(hb, &key);
2415 if (match) { 2430 if (match) {
2416 ret = wake_futex_pi(uaddr, uval, match); 2431 ret = wake_futex_pi(uaddr, uval, match, hb);
2432 /*
2433 * In case of success wake_futex_pi dropped the hash
2434 * bucket lock.
2435 */
2436 if (!ret)
2437 goto out_putkey;
2417 /* 2438 /*
2418 * The atomic access to the futex value generated a 2439 * The atomic access to the futex value generated a
2419 * pagefault, so retry the user-access and the wakeup: 2440 * pagefault, so retry the user-access and the wakeup:
2420 */ 2441 */
2421 if (ret == -EFAULT) 2442 if (ret == -EFAULT)
2422 goto pi_faulted; 2443 goto pi_faulted;
2444 /*
2445 * wake_futex_pi has detected invalid state. Tell user
2446 * space.
2447 */
2423 goto out_unlock; 2448 goto out_unlock;
2424 } 2449 }
2425 2450
@@ -2440,6 +2465,7 @@ retry:
2440 2465
2441out_unlock: 2466out_unlock:
2442 spin_unlock(&hb->lock); 2467 spin_unlock(&hb->lock);
2468out_putkey:
2443 put_futex_key(&key); 2469 put_futex_key(&key);
2444 return ret; 2470 return ret;
2445 2471
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 44ee8f85a78b..0add7248c8bf 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -300,7 +300,7 @@ static void __rt_mutex_adjust_prio(struct task_struct *task)
300 * of task. We do not use the spin_xx_mutex() variants here as we are 300 * of task. We do not use the spin_xx_mutex() variants here as we are
301 * outside of the debug path.) 301 * outside of the debug path.)
302 */ 302 */
303static void rt_mutex_adjust_prio(struct task_struct *task) 303void rt_mutex_adjust_prio(struct task_struct *task)
304{ 304{
305 unsigned long flags; 305 unsigned long flags;
306 306
@@ -1247,13 +1247,12 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
1247} 1247}
1248 1248
1249/* 1249/*
1250 * Slow path to release a rt-mutex: 1250 * Slow path to release a rt-mutex.
1251 * Return whether the current task needs to undo a potential priority boosting.
1251 */ 1252 */
1252static void __sched 1253static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1253rt_mutex_slowunlock(struct rt_mutex *lock) 1254 struct wake_q_head *wake_q)
1254{ 1255{
1255 WAKE_Q(wake_q);
1256
1257 raw_spin_lock(&lock->wait_lock); 1256 raw_spin_lock(&lock->wait_lock);
1258 1257
1259 debug_rt_mutex_unlock(lock); 1258 debug_rt_mutex_unlock(lock);
@@ -1294,7 +1293,7 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
1294 while (!rt_mutex_has_waiters(lock)) { 1293 while (!rt_mutex_has_waiters(lock)) {
1295 /* Drops lock->wait_lock ! */ 1294 /* Drops lock->wait_lock ! */
1296 if (unlock_rt_mutex_safe(lock) == true) 1295 if (unlock_rt_mutex_safe(lock) == true)
1297 return; 1296 return false;
1298 /* Relock the rtmutex and try again */ 1297 /* Relock the rtmutex and try again */
1299 raw_spin_lock(&lock->wait_lock); 1298 raw_spin_lock(&lock->wait_lock);
1300 } 1299 }
@@ -1305,13 +1304,12 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
1305 * 1304 *
1306 * Queue the next waiter for wakeup once we release the wait_lock. 1305 * Queue the next waiter for wakeup once we release the wait_lock.
1307 */ 1306 */
1308 mark_wakeup_next_waiter(&wake_q, lock); 1307 mark_wakeup_next_waiter(wake_q, lock);
1309 1308
1310 raw_spin_unlock(&lock->wait_lock); 1309 raw_spin_unlock(&lock->wait_lock);
1311 wake_up_q(&wake_q);
1312 1310
1313 /* Undo pi boosting if necessary: */ 1311 /* check PI boosting */
1314 rt_mutex_adjust_prio(current); 1312 return true;
1315} 1313}
1316 1314
1317/* 1315/*
@@ -1362,12 +1360,23 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
1362 1360
1363static inline void 1361static inline void
1364rt_mutex_fastunlock(struct rt_mutex *lock, 1362rt_mutex_fastunlock(struct rt_mutex *lock,
1365 void (*slowfn)(struct rt_mutex *lock)) 1363 bool (*slowfn)(struct rt_mutex *lock,
1364 struct wake_q_head *wqh))
1366{ 1365{
1367 if (likely(rt_mutex_cmpxchg(lock, current, NULL))) 1366 WAKE_Q(wake_q);
1367
1368 if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
1368 rt_mutex_deadlock_account_unlock(current); 1369 rt_mutex_deadlock_account_unlock(current);
1369 else 1370
1370 slowfn(lock); 1371 } else {
1372 bool deboost = slowfn(lock, &wake_q);
1373
1374 wake_up_q(&wake_q);
1375
1376 /* Undo pi boosting if necessary: */
1377 if (deboost)
1378 rt_mutex_adjust_prio(current);
1379 }
1371} 1380}
1372 1381
1373/** 1382/**
@@ -1462,6 +1471,23 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
1462EXPORT_SYMBOL_GPL(rt_mutex_unlock); 1471EXPORT_SYMBOL_GPL(rt_mutex_unlock);
1463 1472
1464/** 1473/**
1474 * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
1475 * @lock: the rt_mutex to be unlocked
1476 *
1477 * Returns: true/false indicating whether priority adjustment is
1478 * required or not.
1479 */
1480bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
1481 struct wake_q_head *wqh)
1482{
1483 if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
1484 rt_mutex_deadlock_account_unlock(current);
1485 return false;
1486 }
1487 return rt_mutex_slowunlock(lock, wqh);
1488}
1489
1490/**
1465 * rt_mutex_destroy - mark a mutex unusable 1491 * rt_mutex_destroy - mark a mutex unusable
1466 * @lock: the mutex to be destroyed 1492 * @lock: the mutex to be destroyed
1467 * 1493 *
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 855212501407..7844f8f0e639 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -131,6 +131,9 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
131 struct hrtimer_sleeper *to, 131 struct hrtimer_sleeper *to,
132 struct rt_mutex_waiter *waiter); 132 struct rt_mutex_waiter *waiter);
133extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); 133extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
134extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
135 struct wake_q_head *wqh);
136extern void rt_mutex_adjust_prio(struct task_struct *task);
134 137
135#ifdef CONFIG_DEBUG_RT_MUTEXES 138#ifdef CONFIG_DEBUG_RT_MUTEXES
136# include "rtmutex-debug.h" 139# include "rtmutex-debug.h"