aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking/qspinlock.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking/qspinlock.c')
-rw-r--r--kernel/locking/qspinlock.c117
1 files changed, 0 insertions, 117 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index fd24153e8a48..294294c71ba4 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -268,123 +268,6 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
268#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath 268#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
269#endif 269#endif
270 270
271/*
272 * Various notes on spin_is_locked() and spin_unlock_wait(), which are
273 * 'interesting' functions:
274 *
275 * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE
276 * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64,
277 * PPC). Also qspinlock has a similar issue per construction, the setting of
278 * the locked byte can be unordered acquiring the lock proper.
279 *
280 * This gets to be 'interesting' in the following cases, where the /should/s
281 * end up false because of this issue.
282 *
283 *
284 * CASE 1:
285 *
286 * So the spin_is_locked() correctness issue comes from something like:
287 *
288 * CPU0 CPU1
289 *
290 * global_lock(); local_lock(i)
291 * spin_lock(&G) spin_lock(&L[i])
292 * for (i) if (!spin_is_locked(&G)) {
293 * spin_unlock_wait(&L[i]); smp_acquire__after_ctrl_dep();
294 * return;
295 * }
296 * // deal with fail
297 *
298 * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such
299 * that there is exclusion between the two critical sections.
300 *
301 * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from
302 * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i])
303 * /should/ be constrained by the ACQUIRE from spin_lock(&G).
304 *
305 * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB.
306 *
307 *
308 * CASE 2:
309 *
310 * For spin_unlock_wait() there is a second correctness issue, namely:
311 *
312 * CPU0 CPU1
313 *
314 * flag = set;
315 * smp_mb(); spin_lock(&l)
316 * spin_unlock_wait(&l); if (!flag)
317 * // add to lockless list
318 * spin_unlock(&l);
319 * // iterate lockless list
320 *
321 * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0
322 * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE
323 * semantics etc..)
324 *
325 * Where flag /should/ be ordered against the locked store of l.
326 */
327
328/*
329 * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
330 * issuing an _unordered_ store to set _Q_LOCKED_VAL.
331 *
332 * This means that the store can be delayed, but no later than the
333 * store-release from the unlock. This means that simply observing
334 * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
335 *
336 * There are two paths that can issue the unordered store:
337 *
338 * (1) clear_pending_set_locked(): *,1,0 -> *,0,1
339 *
340 * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0
341 * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1
342 *
343 * However, in both cases we have other !0 state we've set before to queue
344 * ourseves:
345 *
346 * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
347 * load is constrained by that ACQUIRE to not pass before that, and thus must
348 * observe the store.
349 *
350 * For (2) we have a more intersting scenario. We enqueue ourselves using
351 * xchg_tail(), which ends up being a RELEASE. This in itself is not
352 * sufficient, however that is followed by an smp_cond_acquire() on the same
353 * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
354 * guarantees we must observe that store.
355 *
356 * Therefore both cases have other !0 state that is observable before the
357 * unordered locked byte store comes through. This means we can use that to
358 * wait for the lock store, and then wait for an unlock.
359 */
360#ifndef queued_spin_unlock_wait
361void queued_spin_unlock_wait(struct qspinlock *lock)
362{
363 u32 val;
364
365 for (;;) {
366 val = atomic_read(&lock->val);
367
368 if (!val) /* not locked, we're done */
369 goto done;
370
371 if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
372 break;
373
374 /* not locked, but pending, wait until we observe the lock */
375 cpu_relax();
376 }
377
378 /* any unlock is good */
379 while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
380 cpu_relax();
381
382done:
383 smp_acquire__after_ctrl_dep();
384}
385EXPORT_SYMBOL(queued_spin_unlock_wait);
386#endif
387
388#endif /* _GEN_PV_LOCK_SLOWPATH */ 271#endif /* _GEN_PV_LOCK_SLOWPATH */
389 272
390/** 273/**