diff options
Diffstat (limited to 'kernel/locking/qspinlock.c')
-rw-r--r-- | kernel/locking/qspinlock.c | 117 |
1 files changed, 0 insertions, 117 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index fd24153e8a48..294294c71ba4 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -268,123 +268,6 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, | |||
268 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath | 268 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath |
269 | #endif | 269 | #endif |
270 | 270 | ||
271 | /* | ||
272 | * Various notes on spin_is_locked() and spin_unlock_wait(), which are | ||
273 | * 'interesting' functions: | ||
274 | * | ||
275 | * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE | ||
276 | * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64, | ||
277 | * PPC). Also qspinlock has a similar issue per construction, the setting of | ||
278 | * the locked byte can be unordered acquiring the lock proper. | ||
279 | * | ||
280 | * This gets to be 'interesting' in the following cases, where the /should/s | ||
281 | * end up false because of this issue. | ||
282 | * | ||
283 | * | ||
284 | * CASE 1: | ||
285 | * | ||
286 | * So the spin_is_locked() correctness issue comes from something like: | ||
287 | * | ||
288 | * CPU0 CPU1 | ||
289 | * | ||
290 | * global_lock(); local_lock(i) | ||
291 | * spin_lock(&G) spin_lock(&L[i]) | ||
292 | * for (i) if (!spin_is_locked(&G)) { | ||
293 | * spin_unlock_wait(&L[i]); smp_acquire__after_ctrl_dep(); | ||
294 | * return; | ||
295 | * } | ||
296 | * // deal with fail | ||
297 | * | ||
298 | * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such | ||
299 | * that there is exclusion between the two critical sections. | ||
300 | * | ||
301 | * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from | ||
302 | * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i]) | ||
303 | * /should/ be constrained by the ACQUIRE from spin_lock(&G). | ||
304 | * | ||
305 | * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB. | ||
306 | * | ||
307 | * | ||
308 | * CASE 2: | ||
309 | * | ||
310 | * For spin_unlock_wait() there is a second correctness issue, namely: | ||
311 | * | ||
312 | * CPU0 CPU1 | ||
313 | * | ||
314 | * flag = set; | ||
315 | * smp_mb(); spin_lock(&l) | ||
316 | * spin_unlock_wait(&l); if (!flag) | ||
317 | * // add to lockless list | ||
318 | * spin_unlock(&l); | ||
319 | * // iterate lockless list | ||
320 | * | ||
321 | * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0 | ||
322 | * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE | ||
323 | * semantics etc..) | ||
324 | * | ||
325 | * Where flag /should/ be ordered against the locked store of l. | ||
326 | */ | ||
327 | |||
328 | /* | ||
329 | * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before | ||
330 | * issuing an _unordered_ store to set _Q_LOCKED_VAL. | ||
331 | * | ||
332 | * This means that the store can be delayed, but no later than the | ||
333 | * store-release from the unlock. This means that simply observing | ||
334 | * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired. | ||
335 | * | ||
336 | * There are two paths that can issue the unordered store: | ||
337 | * | ||
338 | * (1) clear_pending_set_locked(): *,1,0 -> *,0,1 | ||
339 | * | ||
340 | * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0 | ||
341 | * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1 | ||
342 | * | ||
343 | * However, in both cases we have other !0 state we've set before to queue | ||
344 | * ourseves: | ||
345 | * | ||
346 | * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our | ||
347 | * load is constrained by that ACQUIRE to not pass before that, and thus must | ||
348 | * observe the store. | ||
349 | * | ||
350 | * For (2) we have a more intersting scenario. We enqueue ourselves using | ||
351 | * xchg_tail(), which ends up being a RELEASE. This in itself is not | ||
352 | * sufficient, however that is followed by an smp_cond_acquire() on the same | ||
353 | * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and | ||
354 | * guarantees we must observe that store. | ||
355 | * | ||
356 | * Therefore both cases have other !0 state that is observable before the | ||
357 | * unordered locked byte store comes through. This means we can use that to | ||
358 | * wait for the lock store, and then wait for an unlock. | ||
359 | */ | ||
360 | #ifndef queued_spin_unlock_wait | ||
361 | void queued_spin_unlock_wait(struct qspinlock *lock) | ||
362 | { | ||
363 | u32 val; | ||
364 | |||
365 | for (;;) { | ||
366 | val = atomic_read(&lock->val); | ||
367 | |||
368 | if (!val) /* not locked, we're done */ | ||
369 | goto done; | ||
370 | |||
371 | if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */ | ||
372 | break; | ||
373 | |||
374 | /* not locked, but pending, wait until we observe the lock */ | ||
375 | cpu_relax(); | ||
376 | } | ||
377 | |||
378 | /* any unlock is good */ | ||
379 | while (atomic_read(&lock->val) & _Q_LOCKED_MASK) | ||
380 | cpu_relax(); | ||
381 | |||
382 | done: | ||
383 | smp_acquire__after_ctrl_dep(); | ||
384 | } | ||
385 | EXPORT_SYMBOL(queued_spin_unlock_wait); | ||
386 | #endif | ||
387 | |||
388 | #endif /* _GEN_PV_LOCK_SLOWPATH */ | 271 | #endif /* _GEN_PV_LOCK_SLOWPATH */ |
389 | 272 | ||
390 | /** | 273 | /** |