aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2010-02-09 20:10:25 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-02-16 22:03:16 -0500
commit5a0e9b5718d921f5d8e17176d6b483f6b8f1844a (patch)
treece0e0eab2d6fc06d4215c974ead0d98fe85d73f0
parent53eae2281ad2607fa66a8ad1cb06186c8900da56 (diff)
powerpc: Use lwsync for acquire barrier if CPU supports it
Nick Piggin discovered that lwsync barriers around locks were faster than isync on 970. That was a long time ago and I completely dropped the ball in testing his patches across other ppc64 processors. Turns out the idea helps on other chips. Using a microbenchmark that uses a lot of threads to contend on a global pthread mutex (and therefore a global futex), POWER6 improves 8% and POWER7 improves 2%. I checked POWER5 and while I couldn't measure an improvement, there was no regression. This patch uses the lwsync patching code to replace the isyncs with lwsyncs on CPUs that support the instruction. We were marking POWER3 and RS64 as lwsync capable but in reality they treat it as a full sync (ie slow). Remove the CPU_FTR_LWSYNC bit from these CPUs so they continue to use the faster isync method. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/cputable.h4
-rw-r--r--arch/powerpc/include/asm/synch.h6
2 files changed, 7 insertions, 3 deletions
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 80f315e8a421..abb833b0e58f 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -381,9 +381,9 @@ extern const char *powerpc_base_platform;
381#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) 381#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
382 382
383/* 64-bit CPUs */ 383/* 64-bit CPUs */
384#define CPU_FTRS_POWER3 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 384#define CPU_FTRS_POWER3 (CPU_FTR_USE_TB | \
385 CPU_FTR_IABR | CPU_FTR_PPC_LE) 385 CPU_FTR_IABR | CPU_FTR_PPC_LE)
386#define CPU_FTRS_RS64 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 386#define CPU_FTRS_RS64 (CPU_FTR_USE_TB | \
387 CPU_FTR_IABR | \ 387 CPU_FTR_IABR | \
388 CPU_FTR_MMCRA | CPU_FTR_CTRL) 388 CPU_FTR_MMCRA | CPU_FTR_CTRL)
389#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 389#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index 5db1f0d5ea82..d7cab44643c5 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -37,7 +37,11 @@ static inline void isync(void)
37#endif 37#endif
38 38
39#ifdef CONFIG_SMP 39#ifdef CONFIG_SMP
40#define PPC_ACQUIRE_BARRIER "\n\tisync\n" 40#define __PPC_ACQUIRE_BARRIER \
41 START_LWSYNC_SECTION(97); \
42 isync; \
43 MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
44#define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
41#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" 45#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n"
42#else 46#else
43#define PPC_ACQUIRE_BARRIER 47#define PPC_ACQUIRE_BARRIER