diff options
author | Heiko Carstens <heiko.carstens@de.ibm.com> | 2014-09-08 02:20:43 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2014-09-09 02:53:30 -0400 |
commit | 442302820356977237e32a76a211e7942255003a (patch) | |
tree | caf9d4d00753beab556f28f4f4cfac3d7f8b73e9 /arch/s390 | |
parent | 3d1e220d08c6a00ffa83d39030b8162f66665b2b (diff) |
s390/spinlock: optimize spin_unlock code
Use a memory barrier + store sequence instead of a load + compare and swap
sequence to unlock a spinlock and an rw lock.
For the spinlock case this saves us two memory reads and a not needed cpu
serialization after the compare and swap instruction stored the new value.
The kernel size (performance_defconfig) gets reduced by ~14k.
Average execution time of a tight inlined spin_unlock loop drops from
5.8ns to 0.7ns on a zEC12 machine.
An artificial stress test case where several counters are protected with
a single spinlock and which are only incremented while holding the spinlock
shows ~30% improvement on a 4 cpu machine.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/include/asm/barrier.h | 6 | ||||
-rw-r--r-- | arch/s390/include/asm/spinlock.h | 21 |
2 files changed, 18 insertions, 9 deletions
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 19ff956b752b..b5dce6544d76 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h | |||
@@ -15,11 +15,13 @@ | |||
15 | 15 | ||
16 | #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES | 16 | #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES |
17 | /* Fast-BCR without checkpoint synchronization */ | 17 | /* Fast-BCR without checkpoint synchronization */ |
18 | #define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0) | 18 | #define __ASM_BARRIER "bcr 14,0\n" |
19 | #else | 19 | #else |
20 | #define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0) | 20 | #define __ASM_BARRIER "bcr 15,0\n" |
21 | #endif | 21 | #endif |
22 | 22 | ||
23 | #define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0) | ||
24 | |||
23 | #define rmb() mb() | 25 | #define rmb() mb() |
24 | #define wmb() mb() | 26 | #define wmb() mb() |
25 | #define read_barrier_depends() do { } while(0) | 27 | #define read_barrier_depends() do { } while(0) |
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 96879f7ad6da..d26ad2ac7cb2 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h | |||
@@ -64,11 +64,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) | |||
64 | _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); | 64 | _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); |
65 | } | 65 | } |
66 | 66 | ||
67 | static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp) | ||
68 | { | ||
69 | return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0); | ||
70 | } | ||
71 | |||
72 | static inline void arch_spin_lock(arch_spinlock_t *lp) | 67 | static inline void arch_spin_lock(arch_spinlock_t *lp) |
73 | { | 68 | { |
74 | if (!arch_spin_trylock_once(lp)) | 69 | if (!arch_spin_trylock_once(lp)) |
@@ -91,7 +86,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) | |||
91 | 86 | ||
92 | static inline void arch_spin_unlock(arch_spinlock_t *lp) | 87 | static inline void arch_spin_unlock(arch_spinlock_t *lp) |
93 | { | 88 | { |
94 | arch_spin_tryrelease_once(lp); | 89 | typecheck(unsigned int, lp->lock); |
90 | asm volatile( | ||
91 | __ASM_BARRIER | ||
92 | "st %1,%0\n" | ||
93 | : "+Q" (lp->lock) | ||
94 | : "d" (0) | ||
95 | : "cc", "memory"); | ||
95 | } | 96 | } |
96 | 97 | ||
97 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | 98 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) |
@@ -179,7 +180,13 @@ static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) | |||
179 | 180 | ||
180 | static inline void arch_write_unlock(arch_rwlock_t *rw) | 181 | static inline void arch_write_unlock(arch_rwlock_t *rw) |
181 | { | 182 | { |
182 | _raw_compare_and_swap(&rw->lock, 0x80000000, 0); | 183 | typecheck(unsigned int, rw->lock); |
184 | asm volatile( | ||
185 | __ASM_BARRIER | ||
186 | "st %1,%0\n" | ||
187 | : "+Q" (rw->lock) | ||
188 | : "d" (0) | ||
189 | : "cc", "memory"); | ||
183 | } | 190 | } |
184 | 191 | ||
185 | static inline int arch_read_trylock(arch_rwlock_t *rw) | 192 | static inline int arch_read_trylock(arch_rwlock_t *rw) |