aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/include/asm/cputable.h
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2010-08-10 21:40:27 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-09-02 00:07:30 -0400
commitf89451fbd2b9f28f5ff156154989599ec062354b (patch)
tree1722b247079fb80f972449066c9f5c67a5564d4c /arch/powerpc/include/asm/cputable.h
parent8c77391475bc3284a380fc46aaf0bcf26bde3ae6 (diff)
powerpc: Feature nop out reservation clear when stcx checks address
The POWER architecture does not require stcx to check that it is operating on the same address as the larx. This means it is possible for an an exception handler to execute a larx, get a reservation, decide not to do the stcx and then return back with an active reservation. If the interrupted code was in the middle of a larx/stcx sequence the stcx could incorrectly succeed. All recent POWER CPUs check the address before letting the stcx succeed so we can create a CPU feature and nop it out. As Ben suggested, we can only do this in our syscall path because there is a remote possibility some kernel code gets interrupted by an exception that ends up operating on the same cacheline. Thanks to Paul Mackerras and Derek Williams for the idea. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested against 2.6.35-git10 with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n CONFIG_PPC_4K_PAGES=n CONFIG_PPC_64K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=9 CONFIG_PPC_SUBPAGE_PROT=n CONFIG_FUNCTION_TRACER=n CONFIG_FUNCTION_GRAPH_TRACER=n CONFIG_IRQSOFF_TRACER=n CONFIG_STACK_TRACER=n to remove the overhead of virtual CPU accounting, syscall auditing and the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses. POWER6: +8.2% POWER7: +7.0% Another suggestion was to use a larx to something in the L1 instead of a stcx. This was almost as fast as removing the larx on POWER6, but only 3.5% faster on POWER7. We can use this to speed up the reservation clear in our exception exit code. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/include/asm/cputable.h')
-rw-r--r--arch/powerpc/include/asm/cputable.h14
1 files changed, 9 insertions, 5 deletions
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 3a40a992e594..f3a1fdd9cf08 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -198,6 +198,7 @@ extern const char *powerpc_base_platform;
198#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000) 198#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000)
199#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000) 199#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000)
200#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000) 200#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000)
201#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000)
201 202
202#ifndef __ASSEMBLY__ 203#ifndef __ASSEMBLY__
203 204
@@ -392,28 +393,31 @@ extern const char *powerpc_base_platform;
392 CPU_FTR_MMCRA | CPU_FTR_CTRL) 393 CPU_FTR_MMCRA | CPU_FTR_CTRL)
393#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 394#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
394 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 395 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
395 CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ) 396 CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
397 CPU_FTR_STCX_CHECKS_ADDRESS)
396#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 398#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
397 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 399 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
398 CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ 400 CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
399 CPU_FTR_CP_USE_DCBTZ) 401 CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS)
400#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 402#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
401 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 403 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
402 CPU_FTR_MMCRA | CPU_FTR_SMT | \ 404 CPU_FTR_MMCRA | CPU_FTR_SMT | \
403 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ 405 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
404 CPU_FTR_PURR) 406 CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS)
405#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 407#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
406 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 408 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
407 CPU_FTR_MMCRA | CPU_FTR_SMT | \ 409 CPU_FTR_MMCRA | CPU_FTR_SMT | \
408 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ 410 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
409 CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ 411 CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
410 CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD) 412 CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
413 CPU_FTR_STCX_CHECKS_ADDRESS)
411#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 414#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
412 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 415 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
413 CPU_FTR_MMCRA | CPU_FTR_SMT | \ 416 CPU_FTR_MMCRA | CPU_FTR_SMT | \
414 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ 417 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
415 CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ 418 CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
416 CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT) 419 CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
420 CPU_FTR_STCX_CHECKS_ADDRESS)
417#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 421#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
418 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 422 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
419 CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ 423 CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \