ipc/sem.c: fix race in sem_lock()

The exclusion of complex operations in sem_lock() is insufficient: after acquiring the per-semaphore lock, a simple op must first check that sem_perm.lock is not locked and only after that test check complex_count. The current code does it the other way around - and that creates a race. Details are below. The patch is a complete rewrite of sem_lock(), based in part on the code from Mike Galbraith. It removes all gotos and all loops and thus the risk of livelocks. I have tested the patch (together with the next one) on my i3 laptop and it didn't cause any problems. The bug is probably also present in 3.10 and 3.11, but for these kernels it might be simpler just to move the test of sma->complex_count after the spin_is_locked() test. Details of the bug: Assume: - sma->complex_count = 0. - Thread 1: semtimedop(complex op that must sleep) - Thread 2: semtimedop(simple op). Pseudo-Trace: Thread 1: sem_lock(): acquire sem_perm.lock Thread 1: sem_lock(): check for ongoing simple ops Nothing ongoing, thread 2 is still before sem_lock(). Thread 1: try_atomic_semop() <<< preempted. Thread 2: sem_lock(): static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, int nsops) { int locknum; again: if (nsops == 1 && !sma->complex_count) { struct sem *sem = sma->sem_base + sops->sem_num; /* Lock just the semaphore we are interested in. */ spin_lock(&sem->lock); /* * If sma->complex_count was set while we were spinning, * we may need to look at things we did not lock here. */ if (unlikely(sma->complex_count)) { spin_unlock(&sem->lock); goto lock_array; } <<<<<<<<< <<< complex_count is still 0. <<< <<< Here it is preempted <<<<<<<<< Thread 1: try_atomic_semop() returns, notices that it must sleep. Thread 1: increases sma->complex_count. Thread 1: drops sem_perm.lock Thread 2: /* * Another process is holding the global lock on the * sem_array; we cannot enter our critical section, * but have to wait for the global lock to be released. */ if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { spin_unlock(&sem->lock); spin_unlock_wait(&sma->sem_perm.lock); goto again; } <<< sem_perm.lock already dropped, thus no "goto again;" locknum = sops->sem_num; Signed-off-by: Manfred Spraul <manfred@colorfullife.com> Cc: Mike Galbraith <bitbucket@online.de> Cc: Rik van Riel <riel@redhat.com> Cc: Davidlohr Bueso <davidlohr.bueso@hp.com> Cc: <stable@vger.kernel.org> [3.10+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Manfred Spraul <manfred@colorfullife.com> 2013-09-30 16:45:04 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-09-30 17:31:01 -0400
commit: 5e9d527591421ccdb16acb8c23662231135d8686 (patch)
tree: dbe055ecd0bb6e497dceab2c9c0136151d04063a /ipc/sem.c
parent: f6ea3adb70b20ae36277a1b0eaaf4da9f6479a28 (diff)
1 files changed, 78 insertions, 44 deletions
diff --git a/ipc/sem.c b/ipc/sem.c
index 19c8b980d1fe..4a92c0447ad6 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -253,70 +253,104 @@ static void sem_rcu_free(struct rcu_head *head)
 }
 /*
+ * Wait until all currently ongoing simple ops have completed.
+ * Caller must own sem_perm.lock.
+ * New simple ops cannot start, because simple ops first check
+ * that sem_perm.lock is free.
+ */
+static void sem_wait_array(struct sem_array *sma)
+{
+        int i;
+        struct sem *sem;
+        for (i = 0; i < sma->sem_nsems; i++) {
+                sem = sma->sem_base + i;
+                spin_unlock_wait(&sem->lock);
+        }
+}
+/*
 * If the request contains only one semaphore operation, and there are
 * no complex transactions pending, lock only the semaphore involved.
 * Otherwise, lock the entire semaphore array, since we either have
 * multiple semaphores in our own semops, or we need to look at
 * semaphores from other pending complex operations.
- *
- * Carefully guard against sma->complex_count changing between zero
- * and non-zero while we are spinning for the lock. The value of
- * sma->complex_count cannot change while we are holding the lock,
- * so sem_unlock should be fine.
- *
- * The global lock path checks that all the local locks have been released,
- * checking each local lock once. This means that the local lock paths
- * cannot start their critical sections while the global lock is held.
 */
 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
                              int nsops)
 {
-        int locknum;
+        struct sem *sem;
- again:
-        if (nsops == 1 && !sma->complex_count) {
-                struct sem *sem = sma->sem_base + sops->sem_num;
-                /* Lock just the semaphore we are interested in. */
+        if (nsops != 1) {
-                spin_lock(&sem->lock);
+                /* Complex operation - acquire a full lock */
+                ipc_lock_object(&sma->sem_perm);
-                /*
+                /* And wait until all simple ops that are processed
-                 * If sma->complex_count was set while we were spinning,
+                 * right now have dropped their locks.
-                 * we may need to look at things we did not lock here.
                 */
-                if (unlikely(sma->complex_count)) {
+                sem_wait_array(sma);
-                        spin_unlock(&sem->lock);
+                return -1;
-                        goto lock_array;
+        }
-                }
+        /*
+         * Only one semaphore affected - try to optimize locking.
+         * The rules are:
+         * - optimized locking is possible if no complex operation
+         *   is either enqueued or processed right now.
+         * - The test for enqueued complex ops is simple:
+         *      sma->complex_count != 0
+         * - Testing for complex ops that are processed right now is
+         *   a bit more difficult. Complex ops acquire the full lock
+         *   and first wait that the running simple ops have completed.
+         *   (see above)
+         *   Thus: If we own a simple lock and the global lock is free
+         *      and complex_count is now 0, then it will stay 0 and
+         *      thus just locking sem->lock is sufficient.
+         */
+        sem = sma->sem_base + sops->sem_num;
+        if (sma->complex_count == 0) {
                /*
-                 * Another process is holding the global lock on the
+                 * It appears that no complex operation is around.
-                 * sem_array; we cannot enter our critical section,
+                 * Acquire the per-semaphore lock.
-                 * but have to wait for the global lock to be released.
                 */
-                if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
+                spin_lock(&sem->lock);
-                        spin_unlock(&sem->lock);
-                        spin_unlock_wait(&sma->sem_perm.lock);
+                /* Then check that the global lock is free */
-                        goto again;
+                if (!spin_is_locked(&sma->sem_perm.lock)) {
+                        /* spin_is_locked() is not a memory barrier */
+                        smp_mb();
+                        /* Now repeat the test of complex_count:
+                         * It can't change anymore until we drop sem->lock.
+                         * Thus: if is now 0, then it will stay 0.
+                         */
+                        if (sma->complex_count == 0) {
+                                /* fast path successful! */
+                                return sops->sem_num;
+                        }
                }
+                spin_unlock(&sem->lock);
+        }
-                locknum = sops->sem_num;
+        /* slow path: acquire the full lock */
+        ipc_lock_object(&sma->sem_perm);
+        if (sma->complex_count == 0) {
+                /* False alarm:
+                 * There is no complex operation, thus we can switch
+                 * back to the fast path.
+                 */
+                spin_lock(&sem->lock);
+                ipc_unlock_object(&sma->sem_perm);
+                return sops->sem_num;
        } else {
-                int i;
+                /* Not a false alarm, thus complete the sequence for a
-                /*
+                 * full lock.
-                 * Lock the semaphore array, and wait for all of the
-                 * individual semaphore locks to go away.  The code
-                 * above ensures no new single-lock holders will enter
-                 * their critical section while the array lock is held.
                 */
- lock_array:
+                sem_wait_array(sma);
-                ipc_lock_object(&sma->sem_perm);
+                return -1;
-                for (i = 0; i < sma->sem_nsems; i++) {
-                        struct sem *sem = sma->sem_base + i;
-                        spin_unlock_wait(&sem->lock);
-                }
-                locknum = -1;
        }
-        return locknum;
 }
 static inline void sem_unlock(struct sem_array *sma, int locknum)
author	Manfred Spraul <manfred@colorfullife.com>	2013-09-30 16:45:04 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-09-30 17:31:01 -0400
commit	5e9d527591421ccdb16acb8c23662231135d8686 (patch)
tree	dbe055ecd0bb6e497dceab2c9c0136151d04063a /ipc/sem.c
parent	f6ea3adb70b20ae36277a1b0eaaf4da9f6479a28 (diff)

diff --git a/ipc/sem.c b/ipc/sem.c index 19c8b980d1fe..4a92c0447ad6 100644 --- a/ipc/sem.c +++ b/ipc/sem.c
@@ -253,70 +253,104 @@ static void sem_rcu_free(struct rcu_head *head)
253	}	253	}
254		254
255	/*	255	/*
		256	* Wait until all currently ongoing simple ops have completed.
		257	* Caller must own sem_perm.lock.
		258	* New simple ops cannot start, because simple ops first check
		259	* that sem_perm.lock is free.
		260	*/
		261	static void sem_wait_array(struct sem_array *sma)
		262	{
		263	int i;
		264	struct sem *sem;
		265
		266	for (i = 0; i < sma->sem_nsems; i++) {
		267	sem = sma->sem_base + i;
		268	spin_unlock_wait(&sem->lock);
		269	}
		270	}
		271
		272	/*
256	* If the request contains only one semaphore operation, and there are	273	* If the request contains only one semaphore operation, and there are
257	* no complex transactions pending, lock only the semaphore involved.	274	* no complex transactions pending, lock only the semaphore involved.
258	* Otherwise, lock the entire semaphore array, since we either have	275	* Otherwise, lock the entire semaphore array, since we either have
259	* multiple semaphores in our own semops, or we need to look at	276	* multiple semaphores in our own semops, or we need to look at
260	* semaphores from other pending complex operations.	277	* semaphores from other pending complex operations.
261	*
262	* Carefully guard against sma->complex_count changing between zero
263	* and non-zero while we are spinning for the lock. The value of
264	* sma->complex_count cannot change while we are holding the lock,
265	* so sem_unlock should be fine.
266	*
267	* The global lock path checks that all the local locks have been released,
268	* checking each local lock once. This means that the local lock paths
269	* cannot start their critical sections while the global lock is held.
270	*/	278	*/
271	static inline int sem_lock(struct sem_array sma, struct sembuf sops,	279	static inline int sem_lock(struct sem_array sma, struct sembuf sops,
272	int nsops)	280	int nsops)
273	{	281	{
274	int locknum;	282	struct sem *sem;
275	again:
276	if (nsops == 1 && !sma->complex_count) {
277	struct sem *sem = sma->sem_base + sops->sem_num;
278		283
279	/* Lock just the semaphore we are interested in. */	284	if (nsops != 1) {
280	spin_lock(&sem->lock);	285	/* Complex operation - acquire a full lock */
		286	ipc_lock_object(&sma->sem_perm);
281		287
282	/*	288	/* And wait until all simple ops that are processed
283	* If sma->complex_count was set while we were spinning,	289	* right now have dropped their locks.
284	* we may need to look at things we did not lock here.
285	*/	290	*/
286	if (unlikely(sma->complex_count)) {	291	sem_wait_array(sma);
287	spin_unlock(&sem->lock);	292	return -1;
288	goto lock_array;	293	}
289	}	294
		295	/*
		296	* Only one semaphore affected - try to optimize locking.
		297	* The rules are:
		298	* - optimized locking is possible if no complex operation
		299	* is either enqueued or processed right now.
		300	* - The test for enqueued complex ops is simple:
		301	* sma->complex_count != 0
		302	* - Testing for complex ops that are processed right now is
		303	* a bit more difficult. Complex ops acquire the full lock
		304	* and first wait that the running simple ops have completed.
		305	* (see above)
		306	* Thus: If we own a simple lock and the global lock is free
		307	* and complex_count is now 0, then it will stay 0 and
		308	* thus just locking sem->lock is sufficient.
		309	*/
		310	sem = sma->sem_base + sops->sem_num;
290		311
		312	if (sma->complex_count == 0) {
291	/*	313	/*
292	* Another process is holding the global lock on the	314	* It appears that no complex operation is around.
293	* sem_array; we cannot enter our critical section,	315	* Acquire the per-semaphore lock.
294	* but have to wait for the global lock to be released.
295	*/	316	*/
296	if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {	317	spin_lock(&sem->lock);
297	spin_unlock(&sem->lock);	318
298	spin_unlock_wait(&sma->sem_perm.lock);	319	/* Then check that the global lock is free */
299	goto again;	320	if (!spin_is_locked(&sma->sem_perm.lock)) {
		321	/* spin_is_locked() is not a memory barrier */
		322	smp_mb();
		323
		324	/* Now repeat the test of complex_count:
		325	* It can't change anymore until we drop sem->lock.
		326	* Thus: if is now 0, then it will stay 0.
		327	*/
		328	if (sma->complex_count == 0) {
		329	/* fast path successful! */
		330	return sops->sem_num;
		331	}
300	}	332	}
		333	spin_unlock(&sem->lock);
		334	}
301		335
302	locknum = sops->sem_num;	336	/* slow path: acquire the full lock */
		337	ipc_lock_object(&sma->sem_perm);
		338
		339	if (sma->complex_count == 0) {
		340	/* False alarm:
		341	* There is no complex operation, thus we can switch
		342	* back to the fast path.
		343	*/
		344	spin_lock(&sem->lock);
		345	ipc_unlock_object(&sma->sem_perm);
		346	return sops->sem_num;
303	} else {	347	} else {
304	int i;	348	/* Not a false alarm, thus complete the sequence for a
305	/*	349	* full lock.
306	* Lock the semaphore array, and wait for all of the
307	* individual semaphore locks to go away. The code
308	* above ensures no new single-lock holders will enter
309	* their critical section while the array lock is held.
310	*/	350	*/
311	lock_array:	351	sem_wait_array(sma);
312	ipc_lock_object(&sma->sem_perm);	352	return -1;
313	for (i = 0; i < sma->sem_nsems; i++) {
314	struct sem *sem = sma->sem_base + i;
315	spin_unlock_wait(&sem->lock);
316	}
317	locknum = -1;
318	}	353	}
319	return locknum;
320	}	354	}
321		355
322	static inline void sem_unlock(struct sem_array *sma, int locknum)	356	static inline void sem_unlock(struct sem_array *sma, int locknum)