2 files changed, 66 insertions, 3 deletions
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index e364b424b019..79d2d765a75f 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
        if (!hold_ctx)
                return 0;
-        if (unlikely(ctx == hold_ctx))
-                return -EALREADY;
        if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
            (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
 #ifdef CONFIG_DEBUG_MUTEXES
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
        unsigned long flags;
        int ret;
+        if (use_ww_ctx) {
+                struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
+                if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
+                        return -EALREADY;
+        }
        preempt_disable();
        mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index ce2f75e32ae1..5fc8c311b8fe 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -267,6 +267,66 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
 #define queued_spin_lock_slowpath       native_queued_spin_lock_slowpath
 #endif
+/*
+ * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
+ * issuing an _unordered_ store to set _Q_LOCKED_VAL.
+ *
+ * This means that the store can be delayed, but no later than the
+ * store-release from the unlock. This means that simply observing
+ * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
+ *
+ * There are two paths that can issue the unordered store:
+ *
+ *  (1) clear_pending_set_locked():     *,1,0 -> *,0,1
+ *
+ *  (2) set_locked():                   t,0,0 -> t,0,1 ; t != 0
+ *      atomic_cmpxchg_relaxed():       t,0,0 -> 0,0,1
+ *
+ * However, in both cases we have other !0 state we've set before to queue
+ * ourseves:
+ *
+ * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
+ * load is constrained by that ACQUIRE to not pass before that, and thus must
+ * observe the store.
+ *
+ * For (2) we have a more intersting scenario. We enqueue ourselves using
+ * xchg_tail(), which ends up being a RELEASE. This in itself is not
+ * sufficient, however that is followed by an smp_cond_acquire() on the same
+ * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
+ * guarantees we must observe that store.
+ *
+ * Therefore both cases have other !0 state that is observable before the
+ * unordered locked byte store comes through. This means we can use that to
+ * wait for the lock store, and then wait for an unlock.
+ */
+#ifndef queued_spin_unlock_wait
+void queued_spin_unlock_wait(struct qspinlock *lock)
+{
+        u32 val;
+        for (;;) {
+                val = atomic_read(&lock->val);
+                if (!val) /* not locked, we're done */
+                        goto done;
+                if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
+                        break;
+                /* not locked, but pending, wait until we observe the lock */
+                cpu_relax();
+        }
+        /* any unlock is good */
+        while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
+                cpu_relax();
+done:
+        smp_rmb(); /* CTRL + RMB -> ACQUIRE */
+}
+EXPORT_SYMBOL(queued_spin_unlock_wait);
+#endif
 #endif /* _GEN_PV_LOCK_SLOWPATH */
 /**

diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index e364b424b019..79d2d765a75f 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex lock, struct ww_acquire_ctx ctx)
486	if (!hold_ctx)	486	if (!hold_ctx)
487	return 0;	487	return 0;
488		488
489	if (unlikely(ctx == hold_ctx))
490	return -EALREADY;
491
492	if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&	489	if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
493	(ctx->stamp != hold_ctx->stamp \|\| ctx > hold_ctx)) {	490	(ctx->stamp != hold_ctx->stamp \|\| ctx > hold_ctx)) {
494	#ifdef CONFIG_DEBUG_MUTEXES	491	#ifdef CONFIG_DEBUG_MUTEXES
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
514	unsigned long flags;	511	unsigned long flags;
515	int ret;	512	int ret;
516		513
		514	if (use_ww_ctx) {
		515	struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
		516	if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
		517	return -EALREADY;
		518	}
		519
517	preempt_disable();	520	preempt_disable();
518	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);	521	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
519		522


diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce2f75e32ae1..5fc8c311b8fe 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c
@@ -267,6 +267,66 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
267	#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath	267	#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
268	#endif	268	#endif
269		269
		270	/*
		271	* queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
		272	* issuing an _unordered_ store to set _Q_LOCKED_VAL.
		273	*
		274	* This means that the store can be delayed, but no later than the
		275	* store-release from the unlock. This means that simply observing
		276	* _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
		277	*
		278	* There are two paths that can issue the unordered store:
		279	*
		280	* (1) clear_pending_set_locked(): ,1,0 -> ,0,1
		281	*
		282	* (2) set_locked(): t,0,0 -> t,0,1 ; t != 0
		283	* atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1
		284	*
		285	* However, in both cases we have other !0 state we've set before to queue
		286	* ourseves:
		287	*
		288	* For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
		289	* load is constrained by that ACQUIRE to not pass before that, and thus must
		290	* observe the store.
		291	*
		292	* For (2) we have a more intersting scenario. We enqueue ourselves using
		293	* xchg_tail(), which ends up being a RELEASE. This in itself is not
		294	* sufficient, however that is followed by an smp_cond_acquire() on the same
		295	* word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
		296	* guarantees we must observe that store.
		297	*
		298	* Therefore both cases have other !0 state that is observable before the
		299	* unordered locked byte store comes through. This means we can use that to
		300	* wait for the lock store, and then wait for an unlock.
		301	*/
		302	#ifndef queued_spin_unlock_wait
		303	void queued_spin_unlock_wait(struct qspinlock *lock)
		304	{
		305	u32 val;
		306
		307	for (;;) {
		308	val = atomic_read(&lock->val);
		309
		310	if (!val) /* not locked, we're done */
		311	goto done;
		312
		313	if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
		314	break;
		315
		316	/* not locked, but pending, wait until we observe the lock */
		317	cpu_relax();
		318	}
		319
		320	/* any unlock is good */
		321	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
		322	cpu_relax();
		323
		324	done:
		325	smp_rmb(); /* CTRL + RMB -> ACQUIRE */
		326	}
		327	EXPORT_SYMBOL(queued_spin_unlock_wait);
		328	#endif
		329
270	#endif /* _GEN_PV_LOCK_SLOWPATH */	330	#endif /* _GEN_PV_LOCK_SLOWPATH */
271		331
272	/**	332	/**