diff options
author | Nick Piggin <npiggin@suse.de> | 2008-11-11 12:54:15 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2008-11-19 00:04:58 -0500 |
commit | ae564c63b8311fa73c21e456e00dba1f4b1ff6bc (patch) | |
tree | 743ed995f8718d465c8008a61755ea47feb73f6e /arch/powerpc/include | |
parent | 957ab07b44d839ee8267e827fc4e8f1853798f57 (diff) |
powerpc: Optimise mutex
This implements an optimised mutex fastpath for powerpc, making use of
acquire and release barrier semantics. This takes the mutex
lock+unlock benchmark from 203 to 173 cycles on a G5.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/include')
-rw-r--r-- | arch/powerpc/include/asm/mutex.h | 135 |
1 files changed, 130 insertions, 5 deletions
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h index 458c1f7fbc18..dabc01c727b8 100644 --- a/arch/powerpc/include/asm/mutex.h +++ b/arch/powerpc/include/asm/mutex.h | |||
@@ -1,9 +1,134 @@ | |||
1 | /* | 1 | /* |
2 | * Pull in the generic implementation for the mutex fastpath. | 2 | * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm |
3 | */ | ||
4 | #ifndef _ASM_POWERPC_MUTEX_H | ||
5 | #define _ASM_POWERPC_MUTEX_H | ||
6 | |||
7 | static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new) | ||
8 | { | ||
9 | int t; | ||
10 | |||
11 | __asm__ __volatile__ ( | ||
12 | "1: lwarx %0,0,%1 # mutex trylock\n\ | ||
13 | cmpw 0,%0,%2\n\ | ||
14 | bne- 2f\n" | ||
15 | PPC405_ERR77(0,%1) | ||
16 | " stwcx. %3,0,%1\n\ | ||
17 | bne- 1b" | ||
18 | ISYNC_ON_SMP | ||
19 | "\n\ | ||
20 | 2:" | ||
21 | : "=&r" (t) | ||
22 | : "r" (&v->counter), "r" (old), "r" (new) | ||
23 | : "cc", "memory"); | ||
24 | |||
25 | return t; | ||
26 | } | ||
27 | |||
28 | static inline int __mutex_dec_return_lock(atomic_t *v) | ||
29 | { | ||
30 | int t; | ||
31 | |||
32 | __asm__ __volatile__( | ||
33 | "1: lwarx %0,0,%1 # mutex lock\n\ | ||
34 | addic %0,%0,-1\n" | ||
35 | PPC405_ERR77(0,%1) | ||
36 | " stwcx. %0,0,%1\n\ | ||
37 | bne- 1b" | ||
38 | ISYNC_ON_SMP | ||
39 | : "=&r" (t) | ||
40 | : "r" (&v->counter) | ||
41 | : "cc", "memory"); | ||
42 | |||
43 | return t; | ||
44 | } | ||
45 | |||
46 | static inline int __mutex_inc_return_unlock(atomic_t *v) | ||
47 | { | ||
48 | int t; | ||
49 | |||
50 | __asm__ __volatile__( | ||
51 | LWSYNC_ON_SMP | ||
52 | "1: lwarx %0,0,%1 # mutex unlock\n\ | ||
53 | addic %0,%0,1\n" | ||
54 | PPC405_ERR77(0,%1) | ||
55 | " stwcx. %0,0,%1 \n\ | ||
56 | bne- 1b" | ||
57 | : "=&r" (t) | ||
58 | : "r" (&v->counter) | ||
59 | : "cc", "memory"); | ||
60 | |||
61 | return t; | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | * __mutex_fastpath_lock - try to take the lock by moving the count | ||
66 | * from 1 to a 0 value | ||
67 | * @count: pointer of type atomic_t | ||
68 | * @fail_fn: function to call if the original value was not 1 | ||
69 | * | ||
70 | * Change the count from 1 to a value lower than 1, and call <fail_fn> if | ||
71 | * it wasn't 1 originally. This function MUST leave the value lower than | ||
72 | * 1 even when the "1" assertion wasn't true. | ||
73 | */ | ||
74 | static inline void | ||
75 | __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) | ||
76 | { | ||
77 | if (unlikely(__mutex_dec_return_lock(count) < 0)) | ||
78 | fail_fn(count); | ||
79 | } | ||
80 | |||
81 | /** | ||
82 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count | ||
83 | * from 1 to a 0 value | ||
84 | * @count: pointer of type atomic_t | ||
85 | * @fail_fn: function to call if the original value was not 1 | ||
86 | * | ||
87 | * Change the count from 1 to a value lower than 1, and call <fail_fn> if | ||
88 | * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, | ||
89 | * or anything the slow path function returns. | ||
90 | */ | ||
91 | static inline int | ||
92 | __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) | ||
93 | { | ||
94 | if (unlikely(__mutex_dec_return_lock(count) < 0)) | ||
95 | return fail_fn(count); | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /** | ||
100 | * __mutex_fastpath_unlock - try to promote the count from 0 to 1 | ||
101 | * @count: pointer of type atomic_t | ||
102 | * @fail_fn: function to call if the original value was not 0 | ||
103 | * | ||
104 | * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>. | ||
105 | * In the failure case, this function is allowed to either set the value to | ||
106 | * 1, or to set it to a value lower than 1. | ||
107 | */ | ||
108 | static inline void | ||
109 | __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) | ||
110 | { | ||
111 | if (unlikely(__mutex_inc_return_unlock(count) <= 0)) | ||
112 | fail_fn(count); | ||
113 | } | ||
114 | |||
115 | #define __mutex_slowpath_needs_to_unlock() 1 | ||
116 | |||
117 | /** | ||
118 | * __mutex_fastpath_trylock - try to acquire the mutex, without waiting | ||
119 | * | ||
120 | * @count: pointer of type atomic_t | ||
121 | * @fail_fn: fallback function | ||
3 | * | 122 | * |
4 | * TODO: implement optimized primitives instead, or leave the generic | 123 | * Change the count from 1 to 0, and return 1 (success), or if the count |
5 | * implementation in place, or pick the atomic_xchg() based generic | 124 | * was not 1, then return 0 (failure). |
6 | * implementation. (see asm-generic/mutex-xchg.h for details) | ||
7 | */ | 125 | */ |
126 | static inline int | ||
127 | __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) | ||
128 | { | ||
129 | if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1)) | ||
130 | return 1; | ||
131 | return 0; | ||
132 | } | ||
8 | 133 | ||
9 | #include <asm-generic/mutex-dec.h> | 134 | #endif |