ipc/sem: add hysteresis

sysv sem has two lock modes: One with per-semaphore locks, one lock mode with a single global lock for the whole array. When switching from the per-semaphore locks to the global lock, all per-semaphore locks must be scanned for ongoing operations. The patch adds a hysteresis for switching from the global lock to the per semaphore locks. This reduces how often the per-semaphore locks must be scanned. Compared to the initial patch, this is a simplified solution: Setting USE_GLOBAL_LOCK_HYSTERESIS to 1 restores the current behavior. In theory, a workload with exactly 10 simple sops and then one complex op now scales a bit worse, but this is pure theory: If there is concurrency, the it won't be exactly 10:1:10:1:10:1:... If there is no concurrency, then there is no need for scalability. Link: http://lkml.kernel.org/r/1476851896-3590-3-git-send-email-manfred@colorfullife.com Signed-off-by: Manfred Spraul <manfred@colorfullife.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@zytor.com> Cc: <1vier1@web.de> Cc: kernel test robot <xiaolong.ye@intel.com> Cc: <felixh@informatik.uni-bremen.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Manfred Spraul <manfred@colorfullife.com> 2017-02-27 17:28:18 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-27 21:43:46 -0500
commit: 9de5ab8a2eeea9ae4b63b6f6353b415b93e020c0 (patch)
tree: cf33b34f1edcbca39139176cfe1781c41590ca3f /ipc
parent: 27d7be1801a4824ecccbc735593101d72c038f13 (diff)
1 files changed, 61 insertions, 25 deletions
diff --git a/ipc/sem.c b/ipc/sem.c
index fe5db1ed081b..e468cd1c12f0 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -159,22 +159,42 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #define SEMOPM_FAST     64  /* ~ 372 bytes on stack */
 /*
+ * Switching from the mode suitable for simple ops
+ * to the mode for complex ops is costly. Therefore:
+ * use some hysteresis
+ */
+#define USE_GLOBAL_LOCK_HYSTERESIS      10
+/*
 * Locking:
 * a) global sem_lock() for read/write
 *      sem_undo.id_next,
 *      sem_array.complex_count,
- *      sem_array.complex_mode
 *      sem_array.pending{_alter,_const},
 *      sem_array.sem_undo
 *
 * b) global or semaphore sem_lock() for read/write:
 *      sem_array.sem_base[i].pending_{const,alter}:
- *      sem_array.complex_mode (for read)
 *
 * c) special:
 *      sem_undo_list.list_proc:
 *      * undo_list->lock for write
 *      * rcu for read
+ *      use_global_lock:
+ *      * global sem_lock() for write
+ *      * either local or global sem_lock() for read.
+ *
+ * Memory ordering:
+ * Most ordering is enforced by using spin_lock() and spin_unlock().
+ * The special case is use_global_lock:
+ * Setting it from non-zero to 0 is a RELEASE, this is ensured by
+ * using smp_store_release().
+ * Testing if it is non-zero is an ACQUIRE, this is ensured by using
+ * smp_load_acquire().
+ * Setting it from 0 to non-zero must be ordered with regards to
+ * this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
+ * is inside a spin_lock() and after a write from 0 to non-zero a
+ * spin_lock()+spin_unlock() is done.
 */
 #define sc_semmsl       sem_ctls[0]
@@ -273,12 +293,16 @@ static void complexmode_enter(struct sem_array *sma)
        int i;
        struct sem *sem;
-        if (sma->complex_mode)  {
+        if (sma->use_global_lock > 0)  {
-                /* We are already in complex_mode. Nothing to do */
+                /*
+                 * We are already in global lock mode.
+                 * Nothing to do, just reset the
+                 * counter until we return to simple mode.
+                 */
+                sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
                return;
        }
+        sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
-        sma->complex_mode = true;
        for (i = 0; i < sma->sem_nsems; i++) {
                sem = sma->sem_base + i;
@@ -299,13 +323,17 @@ static void complexmode_tryleave(struct sem_array *sma)
                 */
                return;
        }
-        /*
+        if (sma->use_global_lock == 1) {
-         * Immediately after setting complex_mode to false,
+                /*
-         * a simple op can start. Thus: all memory writes
+                 * Immediately after setting use_global_lock to 0,
-         * performed by the current operation must be visible
+                 * a simple op can start. Thus: all memory writes
-         * before we set complex_mode to false.
+                 * performed by the current operation must be visible
-         */
+                 * before we set use_global_lock to 0.
-        smp_store_release(&sma->complex_mode, false);
+                 */
+                smp_store_release(&sma->use_global_lock, 0);
+        } else {
+                sma->use_global_lock--;
+        }
 }
 #define SEM_GLOBAL_LOCK (-1)
@@ -335,22 +363,23 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
         * Optimized locking is possible if no complex operation
         * is either enqueued or processed right now.
         *
-         * Both facts are tracked by complex_mode.
+         * Both facts are tracked by use_global_mode.
         */
        sem = sma->sem_base + sops->sem_num;
        /*
-         * Initial check for complex_mode. Just an optimization,
+         * Initial check for use_global_lock. Just an optimization,
         * no locking, no memory barrier.
         */
-        if (!sma->complex_mode) {
+        if (!sma->use_global_lock) {
                /*
                 * It appears that no complex operation is around.
                 * Acquire the per-semaphore lock.
                 */
                spin_lock(&sem->lock);
-                if (!smp_load_acquire(&sma->complex_mode)) {
+                /* pairs with smp_store_release() */
+                if (!smp_load_acquire(&sma->use_global_lock)) {
                        /* fast path successful! */
                        return sops->sem_num;
                }
@@ -360,19 +389,26 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
        /* slow path: acquire the full lock */
        ipc_lock_object(&sma->sem_perm);
-        if (sma->complex_count == 0) {
+        if (sma->use_global_lock == 0) {
-                /* False alarm:
+                /*
-                 * There is no complex operation, thus we can switch
+                 * The use_global_lock mode ended while we waited for
-                 * back to the fast path.
+                 * sma->sem_perm.lock. Thus we must switch to locking
+                 * with sem->lock.
+                 * Unlike in the fast path, there is no need to recheck
+                 * sma->use_global_lock after we have acquired sem->lock:
+                 * We own sma->sem_perm.lock, thus use_global_lock cannot
+                 * change.
                 */
                spin_lock(&sem->lock);
                ipc_unlock_object(&sma->sem_perm);
                return sops->sem_num;
        } else {
-                /* Not a false alarm, thus complete the sequence for a
+                /*
-                 * full lock.
+                 * Not a false alarm, thus continue to use the global lock
+                 * mode. No need for complexmode_enter(), this was done by
+                 * the caller that has set use_global_mode to non-zero.
                 */
-                complexmode_enter(sma);
                return SEM_GLOBAL_LOCK;
        }
 }
@@ -476,7 +512,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
        }
        sma->complex_count = 0;
-        sma->complex_mode = true; /* dropped by sem_unlock below */
+        sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
        INIT_LIST_HEAD(&sma->pending_alter);
        INIT_LIST_HEAD(&sma->pending_const);
        INIT_LIST_HEAD(&sma->list_id);
author	Manfred Spraul <manfred@colorfullife.com>	2017-02-27 17:28:18 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-27 21:43:46 -0500
commit	9de5ab8a2eeea9ae4b63b6f6353b415b93e020c0 (patch)
tree	cf33b34f1edcbca39139176cfe1781c41590ca3f /ipc
parent	27d7be1801a4824ecccbc735593101d72c038f13 (diff)

diff --git a/ipc/sem.c b/ipc/sem.c index fe5db1ed081b..e468cd1c12f0 100644 --- a/ipc/sem.c +++ b/ipc/sem.c
@@ -159,22 +159,42 @@ static int sysvipc_sem_proc_show(struct seq_file s, void it);
159	#define SEMOPM_FAST 64 /* ~ 372 bytes on stack */	159	#define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
160		160
161	/*	161	/*
		162	* Switching from the mode suitable for simple ops
		163	* to the mode for complex ops is costly. Therefore:
		164	* use some hysteresis
		165	*/
		166	#define USE_GLOBAL_LOCK_HYSTERESIS 10
		167
		168	/*
162	* Locking:	169	* Locking:
163	* a) global sem_lock() for read/write	170	* a) global sem_lock() for read/write
164	* sem_undo.id_next,	171	* sem_undo.id_next,
165	* sem_array.complex_count,	172	* sem_array.complex_count,
166	* sem_array.complex_mode
167	* sem_array.pending{_alter,_const},	173	* sem_array.pending{_alter,_const},
168	* sem_array.sem_undo	174	* sem_array.sem_undo
169	*	175	*
170	* b) global or semaphore sem_lock() for read/write:	176	* b) global or semaphore sem_lock() for read/write:
171	* sem_array.sem_base[i].pending_{const,alter}:	177	* sem_array.sem_base[i].pending_{const,alter}:
172	* sem_array.complex_mode (for read)
173	*	178	*
174	* c) special:	179	* c) special:
175	* sem_undo_list.list_proc:	180	* sem_undo_list.list_proc:
176	* * undo_list->lock for write	181	* * undo_list->lock for write
177	* * rcu for read	182	* * rcu for read
		183	* use_global_lock:
		184	* * global sem_lock() for write
		185	* * either local or global sem_lock() for read.
		186	*
		187	* Memory ordering:
		188	* Most ordering is enforced by using spin_lock() and spin_unlock().
		189	* The special case is use_global_lock:
		190	* Setting it from non-zero to 0 is a RELEASE, this is ensured by
		191	* using smp_store_release().
		192	* Testing if it is non-zero is an ACQUIRE, this is ensured by using
		193	* smp_load_acquire().
		194	* Setting it from 0 to non-zero must be ordered with regards to
		195	* this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
		196	* is inside a spin_lock() and after a write from 0 to non-zero a
		197	* spin_lock()+spin_unlock() is done.
178	*/	198	*/
179		199
180	#define sc_semmsl sem_ctls[0]	200	#define sc_semmsl sem_ctls[0]
@@ -273,12 +293,16 @@ static void complexmode_enter(struct sem_array *sma)
273	int i;	293	int i;
274	struct sem *sem;	294	struct sem *sem;
275		295
276	if (sma->complex_mode) {	296	if (sma->use_global_lock > 0) {
277	/* We are already in complex_mode. Nothing to do */	297	/*
		298	* We are already in global lock mode.
		299	* Nothing to do, just reset the
		300	* counter until we return to simple mode.
		301	*/
		302	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
278	return;	303	return;
279	}	304	}
280		305	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
281	sma->complex_mode = true;
282		306
283	for (i = 0; i < sma->sem_nsems; i++) {	307	for (i = 0; i < sma->sem_nsems; i++) {
284	sem = sma->sem_base + i;	308	sem = sma->sem_base + i;
@@ -299,13 +323,17 @@ static void complexmode_tryleave(struct sem_array *sma)
299	*/	323	*/
300	return;	324	return;
301	}	325	}
302	/*	326	if (sma->use_global_lock == 1) {
303	* Immediately after setting complex_mode to false,	327	/*
304	* a simple op can start. Thus: all memory writes	328	* Immediately after setting use_global_lock to 0,
305	* performed by the current operation must be visible	329	* a simple op can start. Thus: all memory writes
306	* before we set complex_mode to false.	330	* performed by the current operation must be visible
307	*/	331	* before we set use_global_lock to 0.
308	smp_store_release(&sma->complex_mode, false);	332	*/
		333	smp_store_release(&sma->use_global_lock, 0);
		334	} else {
		335	sma->use_global_lock--;
		336	}
309	}	337	}
310		338
311	#define SEM_GLOBAL_LOCK (-1)	339	#define SEM_GLOBAL_LOCK (-1)
@@ -335,22 +363,23 @@ static inline int sem_lock(struct sem_array sma, struct sembuf sops,
335	* Optimized locking is possible if no complex operation	363	* Optimized locking is possible if no complex operation
336	* is either enqueued or processed right now.	364	* is either enqueued or processed right now.
337	*	365	*
338	* Both facts are tracked by complex_mode.	366	* Both facts are tracked by use_global_mode.
339	*/	367	*/
340	sem = sma->sem_base + sops->sem_num;	368	sem = sma->sem_base + sops->sem_num;
341		369
342	/*	370	/*
343	* Initial check for complex_mode. Just an optimization,	371	* Initial check for use_global_lock. Just an optimization,
344	* no locking, no memory barrier.	372	* no locking, no memory barrier.
345	*/	373	*/
346	if (!sma->complex_mode) {	374	if (!sma->use_global_lock) {
347	/*	375	/*
348	* It appears that no complex operation is around.	376	* It appears that no complex operation is around.
349	* Acquire the per-semaphore lock.	377	* Acquire the per-semaphore lock.
350	*/	378	*/
351	spin_lock(&sem->lock);	379	spin_lock(&sem->lock);
352		380
353	if (!smp_load_acquire(&sma->complex_mode)) {	381	/* pairs with smp_store_release() */
		382	if (!smp_load_acquire(&sma->use_global_lock)) {
354	/* fast path successful! */	383	/* fast path successful! */
355	return sops->sem_num;	384	return sops->sem_num;
356	}	385	}
@@ -360,19 +389,26 @@ static inline int sem_lock(struct sem_array sma, struct sembuf sops,
360	/* slow path: acquire the full lock */	389	/* slow path: acquire the full lock */
361	ipc_lock_object(&sma->sem_perm);	390	ipc_lock_object(&sma->sem_perm);
362		391
363	if (sma->complex_count == 0) {	392	if (sma->use_global_lock == 0) {
364	/* False alarm:	393	/*
365	* There is no complex operation, thus we can switch	394	* The use_global_lock mode ended while we waited for
366	* back to the fast path.	395	* sma->sem_perm.lock. Thus we must switch to locking
		396	* with sem->lock.
		397	* Unlike in the fast path, there is no need to recheck
		398	* sma->use_global_lock after we have acquired sem->lock:
		399	* We own sma->sem_perm.lock, thus use_global_lock cannot
		400	* change.
367	*/	401	*/
368	spin_lock(&sem->lock);	402	spin_lock(&sem->lock);
		403
369	ipc_unlock_object(&sma->sem_perm);	404	ipc_unlock_object(&sma->sem_perm);
370	return sops->sem_num;	405	return sops->sem_num;
371	} else {	406	} else {
372	/* Not a false alarm, thus complete the sequence for a	407	/*
373	* full lock.	408	* Not a false alarm, thus continue to use the global lock
		409	* mode. No need for complexmode_enter(), this was done by
		410	* the caller that has set use_global_mode to non-zero.
374	*/	411	*/
375	complexmode_enter(sma);
376	return SEM_GLOBAL_LOCK;	412	return SEM_GLOBAL_LOCK;
377	}	413	}
378	}	414	}
@@ -476,7 +512,7 @@ static int newary(struct ipc_namespace ns, struct ipc_params params)
476	}	512	}
477		513
478	sma->complex_count = 0;	514	sma->complex_count = 0;
479	sma->complex_mode = true; /* dropped by sem_unlock below */	515	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
480	INIT_LIST_HEAD(&sma->pending_alter);	516	INIT_LIST_HEAD(&sma->pending_alter);
481	INIT_LIST_HEAD(&sma->pending_const);	517	INIT_LIST_HEAD(&sma->pending_const);
482	INIT_LIST_HEAD(&sma->list_id);	518	INIT_LIST_HEAD(&sma->list_id);