aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorManfred Spraul <manfred@colorfullife.com>2016-10-11 16:54:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-11 18:06:33 -0400
commit5864a2fd3088db73d47942370d0f7210a807b9bc (patch)
treef985a10bc1459348f13e77820ca8a3b60296d2b5 /ipc
parent65deb8af76defeae4b114a75242ed15b0bcba173 (diff)
ipc/sem.c: fix complex_count vs. simple op race
Commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") introduced a race: sem_lock has a fast path that allows parallel simple operations. There are two reasons why a simple operation cannot run in parallel: - a non-simple operations is ongoing (sma->sem_perm.lock held) - a complex operation is sleeping (sma->complex_count != 0) As both facts are stored independently, a thread can bypass the current checks by sleeping in the right positions. See below for more details (or kernel bugzilla 105651). The patch fixes that by creating one variable (complex_mode) that tracks both reasons why parallel operations are not possible. The patch also updates stale documentation regarding the locking. With regards to stable kernels: The patch is required for all kernels that include the commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") (3.10?) The alternative is to revert the patch that introduced the race. The patch is safe for backporting, i.e. it makes no assumptions about memory barriers in spin_unlock_wait(). Background: Here is the race of the current implementation: Thread A: (simple op) - does the first "sma->complex_count == 0" test Thread B: (complex op) - does sem_lock(): This includes an array scan. But the scan can't find Thread A, because Thread A does not own sem->lock yet. - the thread does the operation, increases complex_count, drops sem_lock, sleeps Thread A: - spin_lock(&sem->lock), spin_is_locked(sma->sem_perm.lock) - sleeps before the complex_count test Thread C: (complex op) - does sem_lock (no array scan, complex_count==1) - wakes up Thread B. - decrements complex_count Thread A: - does the complex_count test Bug: Now both thread A and thread C operate on the same array, without any synchronization. Fixes: 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") Link: http://lkml.kernel.org/r/1469123695-5661-1-git-send-email-manfred@colorfullife.com Reported-by: <felixh@informatik.uni-bremen.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: <1vier1@web.de> Cc: <stable@vger.kernel.org> [3.10+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc')
-rw-r--r--ipc/sem.c138
1 files changed, 83 insertions, 55 deletions
diff --git a/ipc/sem.c b/ipc/sem.c
index 7c9d4f7683c0..5e318c5f749d 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -162,14 +162,21 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
162 162
163/* 163/*
164 * Locking: 164 * Locking:
165 * a) global sem_lock() for read/write
165 * sem_undo.id_next, 166 * sem_undo.id_next,
166 * sem_array.complex_count, 167 * sem_array.complex_count,
167 * sem_array.pending{_alter,_cont}, 168 * sem_array.complex_mode
168 * sem_array.sem_undo: global sem_lock() for read/write 169 * sem_array.pending{_alter,_const},
169 * sem_undo.proc_next: only "current" is allowed to read/write that field. 170 * sem_array.sem_undo
170 * 171 *
172 * b) global or semaphore sem_lock() for read/write:
171 * sem_array.sem_base[i].pending_{const,alter}: 173 * sem_array.sem_base[i].pending_{const,alter}:
172 * global or semaphore sem_lock() for read/write 174 * sem_array.complex_mode (for read)
175 *
176 * c) special:
177 * sem_undo_list.list_proc:
178 * * undo_list->lock for write
179 * * rcu for read
173 */ 180 */
174 181
175#define sc_semmsl sem_ctls[0] 182#define sc_semmsl sem_ctls[0]
@@ -260,30 +267,61 @@ static void sem_rcu_free(struct rcu_head *head)
260} 267}
261 268
262/* 269/*
263 * Wait until all currently ongoing simple ops have completed. 270 * Enter the mode suitable for non-simple operations:
264 * Caller must own sem_perm.lock. 271 * Caller must own sem_perm.lock.
265 * New simple ops cannot start, because simple ops first check
266 * that sem_perm.lock is free.
267 * that a) sem_perm.lock is free and b) complex_count is 0.
268 */ 272 */
269static void sem_wait_array(struct sem_array *sma) 273static void complexmode_enter(struct sem_array *sma)
270{ 274{
271 int i; 275 int i;
272 struct sem *sem; 276 struct sem *sem;
273 277
274 if (sma->complex_count) { 278 if (sma->complex_mode) {
275 /* The thread that increased sma->complex_count waited on 279 /* We are already in complex_mode. Nothing to do */
276 * all sem->lock locks. Thus we don't need to wait again.
277 */
278 return; 280 return;
279 } 281 }
280 282
283 /* We need a full barrier after seting complex_mode:
284 * The write to complex_mode must be visible
285 * before we read the first sem->lock spinlock state.
286 */
287 smp_store_mb(sma->complex_mode, true);
288
281 for (i = 0; i < sma->sem_nsems; i++) { 289 for (i = 0; i < sma->sem_nsems; i++) {
282 sem = sma->sem_base + i; 290 sem = sma->sem_base + i;
283 spin_unlock_wait(&sem->lock); 291 spin_unlock_wait(&sem->lock);
284 } 292 }
293 /*
294 * spin_unlock_wait() is not a memory barriers, it is only a
295 * control barrier. The code must pair with spin_unlock(&sem->lock),
296 * thus just the control barrier is insufficient.
297 *
298 * smp_rmb() is sufficient, as writes cannot pass the control barrier.
299 */
300 smp_rmb();
301}
302
303/*
304 * Try to leave the mode that disallows simple operations:
305 * Caller must own sem_perm.lock.
306 */
307static void complexmode_tryleave(struct sem_array *sma)
308{
309 if (sma->complex_count) {
310 /* Complex ops are sleeping.
311 * We must stay in complex mode
312 */
313 return;
314 }
315 /*
316 * Immediately after setting complex_mode to false,
317 * a simple op can start. Thus: all memory writes
318 * performed by the current operation must be visible
319 * before we set complex_mode to false.
320 */
321 smp_store_release(&sma->complex_mode, false);
285} 322}
286 323
324#define SEM_GLOBAL_LOCK (-1)
287/* 325/*
288 * If the request contains only one semaphore operation, and there are 326 * If the request contains only one semaphore operation, and there are
289 * no complex transactions pending, lock only the semaphore involved. 327 * no complex transactions pending, lock only the semaphore involved.
@@ -300,56 +338,42 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
300 /* Complex operation - acquire a full lock */ 338 /* Complex operation - acquire a full lock */
301 ipc_lock_object(&sma->sem_perm); 339 ipc_lock_object(&sma->sem_perm);
302 340
303 /* And wait until all simple ops that are processed 341 /* Prevent parallel simple ops */
304 * right now have dropped their locks. 342 complexmode_enter(sma);
305 */ 343 return SEM_GLOBAL_LOCK;
306 sem_wait_array(sma);
307 return -1;
308 } 344 }
309 345
310 /* 346 /*
311 * Only one semaphore affected - try to optimize locking. 347 * Only one semaphore affected - try to optimize locking.
312 * The rules are: 348 * Optimized locking is possible if no complex operation
313 * - optimized locking is possible if no complex operation 349 * is either enqueued or processed right now.
314 * is either enqueued or processed right now. 350 *
315 * - The test for enqueued complex ops is simple: 351 * Both facts are tracked by complex_mode.
316 * sma->complex_count != 0
317 * - Testing for complex ops that are processed right now is
318 * a bit more difficult. Complex ops acquire the full lock
319 * and first wait that the running simple ops have completed.
320 * (see above)
321 * Thus: If we own a simple lock and the global lock is free
322 * and complex_count is now 0, then it will stay 0 and
323 * thus just locking sem->lock is sufficient.
324 */ 352 */
325 sem = sma->sem_base + sops->sem_num; 353 sem = sma->sem_base + sops->sem_num;
326 354
327 if (sma->complex_count == 0) { 355 /*
356 * Initial check for complex_mode. Just an optimization,
357 * no locking, no memory barrier.
358 */
359 if (!sma->complex_mode) {
328 /* 360 /*
329 * It appears that no complex operation is around. 361 * It appears that no complex operation is around.
330 * Acquire the per-semaphore lock. 362 * Acquire the per-semaphore lock.
331 */ 363 */
332 spin_lock(&sem->lock); 364 spin_lock(&sem->lock);
333 365
334 /* Then check that the global lock is free */ 366 /*
335 if (!spin_is_locked(&sma->sem_perm.lock)) { 367 * See 51d7d5205d33
336 /* 368 * ("powerpc: Add smp_mb() to arch_spin_is_locked()"):
337 * We need a memory barrier with acquire semantics, 369 * A full barrier is required: the write of sem->lock
338 * otherwise we can race with another thread that does: 370 * must be visible before the read is executed
339 * complex_count++; 371 */
340 * spin_unlock(sem_perm.lock); 372 smp_mb();
341 */
342 smp_acquire__after_ctrl_dep();
343 373
344 /* 374 if (!smp_load_acquire(&sma->complex_mode)) {
345 * Now repeat the test of complex_count: 375 /* fast path successful! */
346 * It can't change anymore until we drop sem->lock. 376 return sops->sem_num;
347 * Thus: if is now 0, then it will stay 0.
348 */
349 if (sma->complex_count == 0) {
350 /* fast path successful! */
351 return sops->sem_num;
352 }
353 } 377 }
354 spin_unlock(&sem->lock); 378 spin_unlock(&sem->lock);
355 } 379 }
@@ -369,15 +393,16 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
369 /* Not a false alarm, thus complete the sequence for a 393 /* Not a false alarm, thus complete the sequence for a
370 * full lock. 394 * full lock.
371 */ 395 */
372 sem_wait_array(sma); 396 complexmode_enter(sma);
373 return -1; 397 return SEM_GLOBAL_LOCK;
374 } 398 }
375} 399}
376 400
377static inline void sem_unlock(struct sem_array *sma, int locknum) 401static inline void sem_unlock(struct sem_array *sma, int locknum)
378{ 402{
379 if (locknum == -1) { 403 if (locknum == SEM_GLOBAL_LOCK) {
380 unmerge_queues(sma); 404 unmerge_queues(sma);
405 complexmode_tryleave(sma);
381 ipc_unlock_object(&sma->sem_perm); 406 ipc_unlock_object(&sma->sem_perm);
382 } else { 407 } else {
383 struct sem *sem = sma->sem_base + locknum; 408 struct sem *sem = sma->sem_base + locknum;
@@ -529,6 +554,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
529 } 554 }
530 555
531 sma->complex_count = 0; 556 sma->complex_count = 0;
557 sma->complex_mode = true; /* dropped by sem_unlock below */
532 INIT_LIST_HEAD(&sma->pending_alter); 558 INIT_LIST_HEAD(&sma->pending_alter);
533 INIT_LIST_HEAD(&sma->pending_const); 559 INIT_LIST_HEAD(&sma->pending_const);
534 INIT_LIST_HEAD(&sma->list_id); 560 INIT_LIST_HEAD(&sma->list_id);
@@ -2184,10 +2210,10 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2184 /* 2210 /*
2185 * The proc interface isn't aware of sem_lock(), it calls 2211 * The proc interface isn't aware of sem_lock(), it calls
2186 * ipc_lock_object() directly (in sysvipc_find_ipc). 2212 * ipc_lock_object() directly (in sysvipc_find_ipc).
2187 * In order to stay compatible with sem_lock(), we must wait until 2213 * In order to stay compatible with sem_lock(), we must
2188 * all simple semop() calls have left their critical regions. 2214 * enter / leave complex_mode.
2189 */ 2215 */
2190 sem_wait_array(sma); 2216 complexmode_enter(sma);
2191 2217
2192 sem_otime = get_semotime(sma); 2218 sem_otime = get_semotime(sma);
2193 2219
@@ -2204,6 +2230,8 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2204 sem_otime, 2230 sem_otime,
2205 sma->sem_ctime); 2231 sma->sem_ctime);
2206 2232
2233 complexmode_tryleave(sma);
2234
2207 return 0; 2235 return 0;
2208} 2236}
2209#endif 2237#endif