aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2005-08-05 11:02:00 -0400
committerTony Luck <tony.luck@intel.com>2005-08-10 19:13:10 -0400
commitf521089158cd48a81b4d72e8e39da006dd79779b (patch)
treee5053eba17c84c64e2c86e67adefe56f3ef5ba10
parentbc68552faad0e134eb22281343d5ae5a4873fa80 (diff)
[IA64] Spinlock optimizations
1. Nontemporal store for spin unlock. A nontemporal store will not update the LRU setting for the cacheline. The cacheline with the lock may therefore be evicted faster from the cpu caches. Doing so may be useful since it increases the chance that the exclusive cache line has been evicted when another cpu is trying to acquire the lock. The time between dropping and reacquiring a lock on the same cpu is typically very small so the danger of the cacheline being evicted is negligible. 2. Avoid semaphore operation in write_unlock and use nontemporal store write_lock uses a cmpxchg like the regular spin_lock but write_unlock uses clear_bit which requires a load and then a loop over a cmpxchg. The following patch makes write_unlock simply use a nontemporal store to clear the highest 8 bits. We will then still have the lower 3 bytes (24 bits) left to count the readers. Doing the byte store will reduce the number of possible readers from 2^31 to 2^24 = 16 million. These patches were discussed already: http://marc.theaimsgroup.com/?t=111472054400001&r=1&w=2 http://marc.theaimsgroup.com/?l=linux-ia64&m=111401837707849&w=2 The nontemporal stores will only work using GCC. If a compiler is used that does not support inline asm then fallback C code is used. This will preserve the byte store but not be able to do the nontemporal stores. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--include/asm-ia64/spinlock.h33
1 files changed, 24 insertions, 9 deletions
diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h
index 909936f25512..d2430aa0d49d 100644
--- a/include/asm-ia64/spinlock.h
+++ b/include/asm-ia64/spinlock.h
@@ -93,7 +93,15 @@ _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
93# endif /* CONFIG_MCKINLEY */ 93# endif /* CONFIG_MCKINLEY */
94#endif 94#endif
95} 95}
96
96#define _raw_spin_lock(lock) _raw_spin_lock_flags(lock, 0) 97#define _raw_spin_lock(lock) _raw_spin_lock_flags(lock, 0)
98
99/* Unlock by doing an ordered store and releasing the cacheline with nta */
100static inline void _raw_spin_unlock(spinlock_t *x) {
101 barrier();
102 asm volatile ("st4.rel.nta [%0] = r0\n\t" :: "r"(x));
103}
104
97#else /* !ASM_SUPPORTED */ 105#else /* !ASM_SUPPORTED */
98#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) 106#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
99# define _raw_spin_lock(x) \ 107# define _raw_spin_lock(x) \
@@ -109,16 +117,16 @@ do { \
109 } while (ia64_spinlock_val); \ 117 } while (ia64_spinlock_val); \
110 } \ 118 } \
111} while (0) 119} while (0)
120#define _raw_spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0)
112#endif /* !ASM_SUPPORTED */ 121#endif /* !ASM_SUPPORTED */
113 122
114#define spin_is_locked(x) ((x)->lock != 0) 123#define spin_is_locked(x) ((x)->lock != 0)
115#define _raw_spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0)
116#define _raw_spin_trylock(x) (cmpxchg_acq(&(x)->lock, 0, 1) == 0) 124#define _raw_spin_trylock(x) (cmpxchg_acq(&(x)->lock, 0, 1) == 0)
117#define spin_unlock_wait(x) do { barrier(); } while ((x)->lock) 125#define spin_unlock_wait(x) do { barrier(); } while ((x)->lock)
118 126
119typedef struct { 127typedef struct {
120 volatile unsigned int read_counter : 31; 128 volatile unsigned int read_counter : 24;
121 volatile unsigned int write_lock : 1; 129 volatile unsigned int write_lock : 8;
122#ifdef CONFIG_PREEMPT 130#ifdef CONFIG_PREEMPT
123 unsigned int break_lock; 131 unsigned int break_lock;
124#endif 132#endif
@@ -174,6 +182,13 @@ do { \
174 (result == 0); \ 182 (result == 0); \
175}) 183})
176 184
185static inline void _raw_write_unlock(rwlock_t *x)
186{
187 u8 *y = (u8 *)x;
188 barrier();
189 asm volatile ("st1.rel.nta [%0] = r0\n\t" :: "r"(y+3) : "memory" );
190}
191
177#else /* !ASM_SUPPORTED */ 192#else /* !ASM_SUPPORTED */
178 193
179#define _raw_write_lock(l) \ 194#define _raw_write_lock(l) \
@@ -195,14 +210,14 @@ do { \
195 (ia64_val == 0); \ 210 (ia64_val == 0); \
196}) 211})
197 212
213static inline void _raw_write_unlock(rwlock_t *x)
214{
215 barrier();
216 x->write_lock = 0;
217}
218
198#endif /* !ASM_SUPPORTED */ 219#endif /* !ASM_SUPPORTED */
199 220
200#define _raw_read_trylock(lock) generic_raw_read_trylock(lock) 221#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
201 222
202#define _raw_write_unlock(x) \
203({ \
204 smp_mb__before_clear_bit(); /* need barrier before releasing lock... */ \
205 clear_bit(31, (x)); \
206})
207
208#endif /* _ASM_IA64_SPINLOCK_H */ 223#endif /* _ASM_IA64_SPINLOCK_H */