aboutsummaryrefslogtreecommitdiffstats
path: root/include/asm-generic/mutex-dec.h
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2008-10-21 04:59:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-23 12:18:20 -0400
commita8ddac7e53e89cb877965097d05adfeb1c91def3 (patch)
treedb4ee686e50f7fb57b0cef20e0a8e7f06151e317 /include/asm-generic/mutex-dec.h
parent5a439c565799cb8d290d71ce375e86be64d43a4b (diff)
mutex: speed up generic mutex implementations
- atomic operations which both modify the variable and return something imply full smp memory barriers before and after the memory operations involved (failing atomic_cmpxchg, atomic_add_unless, etc don't imply a barrier because they don't modify the target). See Documentation/atomic_ops.txt. So remove extra barriers and branches. - All architectures support atomic_cmpxchg. This has no relation to __HAVE_ARCH_CMPXCHG. We can just take the atomic_cmpxchg path unconditionally This reduces a simple single threaded fastpath lock+unlock test from 590 cycles to 203 cycles on a ppc970 system. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/asm-generic/mutex-dec.h')
-rw-r--r--include/asm-generic/mutex-dec.h26
1 files changed, 2 insertions, 24 deletions
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index ed108be6743f..f104af7cf437 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -22,8 +22,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
22{ 22{
23 if (unlikely(atomic_dec_return(count) < 0)) 23 if (unlikely(atomic_dec_return(count) < 0))
24 fail_fn(count); 24 fail_fn(count);
25 else
26 smp_mb();
27} 25}
28 26
29/** 27/**
@@ -41,10 +39,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
41{ 39{
42 if (unlikely(atomic_dec_return(count) < 0)) 40 if (unlikely(atomic_dec_return(count) < 0))
43 return fail_fn(count); 41 return fail_fn(count);
44 else { 42 return 0;
45 smp_mb();
46 return 0;
47 }
48} 43}
49 44
50/** 45/**
@@ -63,7 +58,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
63static inline void 58static inline void
64__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) 59__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
65{ 60{
66 smp_mb();
67 if (unlikely(atomic_inc_return(count) <= 0)) 61 if (unlikely(atomic_inc_return(count) <= 0))
68 fail_fn(count); 62 fail_fn(count);
69} 63}
@@ -88,25 +82,9 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
88static inline int 82static inline int
89__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) 83__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
90{ 84{
91 /* 85 if (likely(atomic_cmpxchg(count, 1, 0) == 1))
92 * We have two variants here. The cmpxchg based one is the best one
93 * because it never induce a false contention state. It is included
94 * here because architectures using the inc/dec algorithms over the
95 * xchg ones are much more likely to support cmpxchg natively.
96 *
97 * If not we fall back to the spinlock based variant - that is
98 * just as efficient (and simpler) as a 'destructive' probing of
99 * the mutex state would be.
100 */
101#ifdef __HAVE_ARCH_CMPXCHG
102 if (likely(atomic_cmpxchg(count, 1, 0) == 1)) {
103 smp_mb();
104 return 1; 86 return 1;
105 }
106 return 0; 87 return 0;
107#else
108 return fail_fn(count);
109#endif
110} 88}
111 89
112#endif 90#endif