diff options
| author | Manfred Spraul <manfred@colorfullife.com> | 2017-02-27 17:28:18 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-27 21:43:46 -0500 |
| commit | 9de5ab8a2eeea9ae4b63b6f6353b415b93e020c0 (patch) | |
| tree | cf33b34f1edcbca39139176cfe1781c41590ca3f /ipc | |
| parent | 27d7be1801a4824ecccbc735593101d72c038f13 (diff) | |
ipc/sem: add hysteresis
sysv sem has two lock modes: One with per-semaphore locks, one lock mode
with a single global lock for the whole array. When switching from the
per-semaphore locks to the global lock, all per-semaphore locks must be
scanned for ongoing operations.
The patch adds a hysteresis for switching from the global lock to the
per semaphore locks. This reduces how often the per-semaphore locks
must be scanned.
Compared to the initial patch, this is a simplified solution: Setting
USE_GLOBAL_LOCK_HYSTERESIS to 1 restores the current behavior.
In theory, a workload with exactly 10 simple sops and then one complex
op now scales a bit worse, but this is pure theory: If there is
concurrency, the it won't be exactly 10:1:10:1:10:1:... If there is no
concurrency, then there is no need for scalability.
Link: http://lkml.kernel.org/r/1476851896-3590-3-git-send-email-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: <1vier1@web.de>
Cc: kernel test robot <xiaolong.ye@intel.com>
Cc: <felixh@informatik.uni-bremen.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc')
| -rw-r--r-- | ipc/sem.c | 86 |
1 files changed, 61 insertions, 25 deletions
| @@ -159,22 +159,42 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); | |||
| 159 | #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ | 159 | #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ |
| 160 | 160 | ||
| 161 | /* | 161 | /* |
| 162 | * Switching from the mode suitable for simple ops | ||
| 163 | * to the mode for complex ops is costly. Therefore: | ||
| 164 | * use some hysteresis | ||
| 165 | */ | ||
| 166 | #define USE_GLOBAL_LOCK_HYSTERESIS 10 | ||
| 167 | |||
| 168 | /* | ||
| 162 | * Locking: | 169 | * Locking: |
| 163 | * a) global sem_lock() for read/write | 170 | * a) global sem_lock() for read/write |
| 164 | * sem_undo.id_next, | 171 | * sem_undo.id_next, |
| 165 | * sem_array.complex_count, | 172 | * sem_array.complex_count, |
| 166 | * sem_array.complex_mode | ||
| 167 | * sem_array.pending{_alter,_const}, | 173 | * sem_array.pending{_alter,_const}, |
| 168 | * sem_array.sem_undo | 174 | * sem_array.sem_undo |
| 169 | * | 175 | * |
| 170 | * b) global or semaphore sem_lock() for read/write: | 176 | * b) global or semaphore sem_lock() for read/write: |
| 171 | * sem_array.sem_base[i].pending_{const,alter}: | 177 | * sem_array.sem_base[i].pending_{const,alter}: |
| 172 | * sem_array.complex_mode (for read) | ||
| 173 | * | 178 | * |
| 174 | * c) special: | 179 | * c) special: |
| 175 | * sem_undo_list.list_proc: | 180 | * sem_undo_list.list_proc: |
| 176 | * * undo_list->lock for write | 181 | * * undo_list->lock for write |
| 177 | * * rcu for read | 182 | * * rcu for read |
| 183 | * use_global_lock: | ||
| 184 | * * global sem_lock() for write | ||
| 185 | * * either local or global sem_lock() for read. | ||
| 186 | * | ||
| 187 | * Memory ordering: | ||
| 188 | * Most ordering is enforced by using spin_lock() and spin_unlock(). | ||
| 189 | * The special case is use_global_lock: | ||
| 190 | * Setting it from non-zero to 0 is a RELEASE, this is ensured by | ||
| 191 | * using smp_store_release(). | ||
| 192 | * Testing if it is non-zero is an ACQUIRE, this is ensured by using | ||
| 193 | * smp_load_acquire(). | ||
| 194 | * Setting it from 0 to non-zero must be ordered with regards to | ||
| 195 | * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() | ||
| 196 | * is inside a spin_lock() and after a write from 0 to non-zero a | ||
| 197 | * spin_lock()+spin_unlock() is done. | ||
| 178 | */ | 198 | */ |
| 179 | 199 | ||
| 180 | #define sc_semmsl sem_ctls[0] | 200 | #define sc_semmsl sem_ctls[0] |
| @@ -273,12 +293,16 @@ static void complexmode_enter(struct sem_array *sma) | |||
| 273 | int i; | 293 | int i; |
| 274 | struct sem *sem; | 294 | struct sem *sem; |
| 275 | 295 | ||
| 276 | if (sma->complex_mode) { | 296 | if (sma->use_global_lock > 0) { |
| 277 | /* We are already in complex_mode. Nothing to do */ | 297 | /* |
| 298 | * We are already in global lock mode. | ||
| 299 | * Nothing to do, just reset the | ||
| 300 | * counter until we return to simple mode. | ||
| 301 | */ | ||
| 302 | sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; | ||
| 278 | return; | 303 | return; |
| 279 | } | 304 | } |
| 280 | 305 | sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; | |
| 281 | sma->complex_mode = true; | ||
| 282 | 306 | ||
| 283 | for (i = 0; i < sma->sem_nsems; i++) { | 307 | for (i = 0; i < sma->sem_nsems; i++) { |
| 284 | sem = sma->sem_base + i; | 308 | sem = sma->sem_base + i; |
| @@ -299,13 +323,17 @@ static void complexmode_tryleave(struct sem_array *sma) | |||
| 299 | */ | 323 | */ |
| 300 | return; | 324 | return; |
| 301 | } | 325 | } |
| 302 | /* | 326 | if (sma->use_global_lock == 1) { |
| 303 | * Immediately after setting complex_mode to false, | 327 | /* |
| 304 | * a simple op can start. Thus: all memory writes | 328 | * Immediately after setting use_global_lock to 0, |
| 305 | * performed by the current operation must be visible | 329 | * a simple op can start. Thus: all memory writes |
| 306 | * before we set complex_mode to false. | 330 | * performed by the current operation must be visible |
| 307 | */ | 331 | * before we set use_global_lock to 0. |
| 308 | smp_store_release(&sma->complex_mode, false); | 332 | */ |
| 333 | smp_store_release(&sma->use_global_lock, 0); | ||
| 334 | } else { | ||
| 335 | sma->use_global_lock--; | ||
| 336 | } | ||
| 309 | } | 337 | } |
| 310 | 338 | ||
| 311 | #define SEM_GLOBAL_LOCK (-1) | 339 | #define SEM_GLOBAL_LOCK (-1) |
| @@ -335,22 +363,23 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, | |||
| 335 | * Optimized locking is possible if no complex operation | 363 | * Optimized locking is possible if no complex operation |
| 336 | * is either enqueued or processed right now. | 364 | * is either enqueued or processed right now. |
| 337 | * | 365 | * |
| 338 | * Both facts are tracked by complex_mode. | 366 | * Both facts are tracked by use_global_mode. |
| 339 | */ | 367 | */ |
| 340 | sem = sma->sem_base + sops->sem_num; | 368 | sem = sma->sem_base + sops->sem_num; |
| 341 | 369 | ||
| 342 | /* | 370 | /* |
| 343 | * Initial check for complex_mode. Just an optimization, | 371 | * Initial check for use_global_lock. Just an optimization, |
| 344 | * no locking, no memory barrier. | 372 | * no locking, no memory barrier. |
| 345 | */ | 373 | */ |
| 346 | if (!sma->complex_mode) { | 374 | if (!sma->use_global_lock) { |
| 347 | /* | 375 | /* |
| 348 | * It appears that no complex operation is around. | 376 | * It appears that no complex operation is around. |
| 349 | * Acquire the per-semaphore lock. | 377 | * Acquire the per-semaphore lock. |
| 350 | */ | 378 | */ |
| 351 | spin_lock(&sem->lock); | 379 | spin_lock(&sem->lock); |
| 352 | 380 | ||
| 353 | if (!smp_load_acquire(&sma->complex_mode)) { | 381 | /* pairs with smp_store_release() */ |
| 382 | if (!smp_load_acquire(&sma->use_global_lock)) { | ||
| 354 | /* fast path successful! */ | 383 | /* fast path successful! */ |
| 355 | return sops->sem_num; | 384 | return sops->sem_num; |
| 356 | } | 385 | } |
| @@ -360,19 +389,26 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, | |||
| 360 | /* slow path: acquire the full lock */ | 389 | /* slow path: acquire the full lock */ |
| 361 | ipc_lock_object(&sma->sem_perm); | 390 | ipc_lock_object(&sma->sem_perm); |
| 362 | 391 | ||
| 363 | if (sma->complex_count == 0) { | 392 | if (sma->use_global_lock == 0) { |
| 364 | /* False alarm: | 393 | /* |
| 365 | * There is no complex operation, thus we can switch | 394 | * The use_global_lock mode ended while we waited for |
| 366 | * back to the fast path. | 395 | * sma->sem_perm.lock. Thus we must switch to locking |
| 396 | * with sem->lock. | ||
| 397 | * Unlike in the fast path, there is no need to recheck | ||
| 398 | * sma->use_global_lock after we have acquired sem->lock: | ||
| 399 | * We own sma->sem_perm.lock, thus use_global_lock cannot | ||
| 400 | * change. | ||
| 367 | */ | 401 | */ |
| 368 | spin_lock(&sem->lock); | 402 | spin_lock(&sem->lock); |
| 403 | |||
| 369 | ipc_unlock_object(&sma->sem_perm); | 404 | ipc_unlock_object(&sma->sem_perm); |
| 370 | return sops->sem_num; | 405 | return sops->sem_num; |
| 371 | } else { | 406 | } else { |
| 372 | /* Not a false alarm, thus complete the sequence for a | 407 | /* |
| 373 | * full lock. | 408 | * Not a false alarm, thus continue to use the global lock |
| 409 | * mode. No need for complexmode_enter(), this was done by | ||
| 410 | * the caller that has set use_global_mode to non-zero. | ||
| 374 | */ | 411 | */ |
| 375 | complexmode_enter(sma); | ||
| 376 | return SEM_GLOBAL_LOCK; | 412 | return SEM_GLOBAL_LOCK; |
| 377 | } | 413 | } |
| 378 | } | 414 | } |
| @@ -476,7 +512,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) | |||
| 476 | } | 512 | } |
| 477 | 513 | ||
| 478 | sma->complex_count = 0; | 514 | sma->complex_count = 0; |
| 479 | sma->complex_mode = true; /* dropped by sem_unlock below */ | 515 | sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; |
| 480 | INIT_LIST_HEAD(&sma->pending_alter); | 516 | INIT_LIST_HEAD(&sma->pending_alter); |
| 481 | INIT_LIST_HEAD(&sma->pending_const); | 517 | INIT_LIST_HEAD(&sma->pending_const); |
| 482 | INIT_LIST_HEAD(&sma->list_id); | 518 | INIT_LIST_HEAD(&sma->list_id); |
