aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJake Moilanen <moilanen@austin.ibm.com>2005-05-01 11:58:47 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-01 11:58:47 -0400
commitd637413f3f05b41f678f8004225b33b62274183f (patch)
tree08a2240395ffef19a3d61ea7de4f55704f8046fe /include
parentd59dd4620fb8d6422555a9e2b82a707718e68327 (diff)
[PATCH] ppc64: reverse prediction on spinlock busy loop code
On our raw spinlocks, we currently have an attempt at the lock, and if we do not get it we enter a spin loop. This spinloop will likely continue for awhile, and we pridict likely. Shouldn't we predict that we will get out of the loop so our next instructions are already prefetched. Even when we miss because the lock is still held, it won't matter since we are waiting anyways. I did a couple quick benchmarks, but the results are inconclusive. 16-way 690 running specjbb with original code # ./specjbb 3000 16 1 1 19 30 120 ... Valid run, Score is 59282 16-way 690 running specjbb with unlikely code # ./specjbb 3000 16 1 1 19 30 120 ... Valid run, Score is 59541 I saw a smaller increase on a JS20 (~1.6%) JS20 specjbb w/ original code # ./specjbb 400 2 1 1 19 30 120 ... Valid run, Score is 20460 JS20 specjbb w/ unlikely code # ./specjbb 400 2 1 1 19 30 120 ... Valid run, Score is 20803 Anton said: Mispredicting the spinlock busy loop also means we slow down the rate at which we do the loads which can be good for heavily contended locks. Note: There are some gcc issues with our default build and branch prediction, but a CONFIG_POWER4_ONLY build should emit them correctly. I'm working with Alan Modra on it now. Signed-off-by: Jake Moilanen <moilanen@austin.ibm.com> Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
-rw-r--r--include/asm-ppc64/spinlock.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/include/asm-ppc64/spinlock.h b/include/asm-ppc64/spinlock.h
index a9b2a1162cf7..acd11564dd75 100644
--- a/include/asm-ppc64/spinlock.h
+++ b/include/asm-ppc64/spinlock.h
@@ -110,7 +110,7 @@ static void __inline__ _raw_spin_lock(spinlock_t *lock)
110 HMT_low(); 110 HMT_low();
111 if (SHARED_PROCESSOR) 111 if (SHARED_PROCESSOR)
112 __spin_yield(lock); 112 __spin_yield(lock);
113 } while (likely(lock->lock != 0)); 113 } while (unlikely(lock->lock != 0));
114 HMT_medium(); 114 HMT_medium();
115 } 115 }
116} 116}
@@ -128,7 +128,7 @@ static void __inline__ _raw_spin_lock_flags(spinlock_t *lock, unsigned long flag
128 HMT_low(); 128 HMT_low();
129 if (SHARED_PROCESSOR) 129 if (SHARED_PROCESSOR)
130 __spin_yield(lock); 130 __spin_yield(lock);
131 } while (likely(lock->lock != 0)); 131 } while (unlikely(lock->lock != 0));
132 HMT_medium(); 132 HMT_medium();
133 local_irq_restore(flags_dis); 133 local_irq_restore(flags_dis);
134 } 134 }
@@ -194,7 +194,7 @@ static void __inline__ _raw_read_lock(rwlock_t *rw)
194 HMT_low(); 194 HMT_low();
195 if (SHARED_PROCESSOR) 195 if (SHARED_PROCESSOR)
196 __rw_yield(rw); 196 __rw_yield(rw);
197 } while (likely(rw->lock < 0)); 197 } while (unlikely(rw->lock < 0));
198 HMT_medium(); 198 HMT_medium();
199 } 199 }
200} 200}
@@ -251,7 +251,7 @@ static void __inline__ _raw_write_lock(rwlock_t *rw)
251 HMT_low(); 251 HMT_low();
252 if (SHARED_PROCESSOR) 252 if (SHARED_PROCESSOR)
253 __rw_yield(rw); 253 __rw_yield(rw);
254 } while (likely(rw->lock != 0)); 254 } while (unlikely(rw->lock != 0));
255 HMT_medium(); 255 HMT_medium();
256 } 256 }
257} 257}