aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/entry_64.S
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2010-08-10 21:40:27 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-09-02 00:07:30 -0400
commitf89451fbd2b9f28f5ff156154989599ec062354b (patch)
tree1722b247079fb80f972449066c9f5c67a5564d4c /arch/powerpc/kernel/entry_64.S
parent8c77391475bc3284a380fc46aaf0bcf26bde3ae6 (diff)
powerpc: Feature nop out reservation clear when stcx checks address
The POWER architecture does not require stcx to check that it is operating on the same address as the larx. This means it is possible for an an exception handler to execute a larx, get a reservation, decide not to do the stcx and then return back with an active reservation. If the interrupted code was in the middle of a larx/stcx sequence the stcx could incorrectly succeed. All recent POWER CPUs check the address before letting the stcx succeed so we can create a CPU feature and nop it out. As Ben suggested, we can only do this in our syscall path because there is a remote possibility some kernel code gets interrupted by an exception that ends up operating on the same cacheline. Thanks to Paul Mackerras and Derek Williams for the idea. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested against 2.6.35-git10 with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n CONFIG_PPC_4K_PAGES=n CONFIG_PPC_64K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=9 CONFIG_PPC_SUBPAGE_PROT=n CONFIG_FUNCTION_TRACER=n CONFIG_FUNCTION_GRAPH_TRACER=n CONFIG_IRQSOFF_TRACER=n CONFIG_STACK_TRACER=n to remove the overhead of virtual CPU accounting, syscall auditing and the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses. POWER6: +8.2% POWER7: +7.0% Another suggestion was to use a larx to something in the L1 instead of a stcx. This was almost as fast as removing the larx on POWER6, but only 3.5% faster on POWER7. We can use this to speed up the reservation clear in our exception exit code. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel/entry_64.S')
-rw-r--r--arch/powerpc/kernel/entry_64.S22
1 files changed, 22 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 42e9d908914a..4d5fa12ca6e8 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -202,7 +202,9 @@ syscall_exit:
202 bge- syscall_error 202 bge- syscall_error
203syscall_error_cont: 203syscall_error_cont:
204 ld r7,_NIP(r1) 204 ld r7,_NIP(r1)
205BEGIN_FTR_SECTION
205 stdcx. r0,0,r1 /* to clear the reservation */ 206 stdcx. r0,0,r1 /* to clear the reservation */
207END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
206 andi. r6,r8,MSR_PR 208 andi. r6,r8,MSR_PR
207 ld r4,_LINK(r1) 209 ld r4,_LINK(r1)
208 /* 210 /*
@@ -419,6 +421,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
419 sync 421 sync
420#endif /* CONFIG_SMP */ 422#endif /* CONFIG_SMP */
421 423
424 /*
425 * If we optimise away the clear of the reservation in system
426 * calls because we know the CPU tracks the address of the
427 * reservation, then we need to clear it here to cover the
428 * case that the kernel context switch path has no larx
429 * instructions.
430 */
431BEGIN_FTR_SECTION
432 ldarx r6,0,r1
433END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
434
422 addi r6,r4,-THREAD /* Convert THREAD to 'current' */ 435 addi r6,r4,-THREAD /* Convert THREAD to 'current' */
423 std r6,PACACURRENT(r13) /* Set new 'current' */ 436 std r6,PACACURRENT(r13) /* Set new 'current' */
424 437
@@ -576,7 +589,16 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
576 andi. r0,r3,MSR_RI 589 andi. r0,r3,MSR_RI
577 beq- unrecov_restore 590 beq- unrecov_restore
578 591
592 /*
593 * Clear the reservation. If we know the CPU tracks the address of
594 * the reservation then we can potentially save some cycles and use
595 * a larx. On POWER6 and POWER7 this is significantly faster.
596 */
597BEGIN_FTR_SECTION
579 stdcx. r0,0,r1 /* to clear the reservation */ 598 stdcx. r0,0,r1 /* to clear the reservation */
599FTR_SECTION_ELSE
600 ldarx r4,0,r1
601ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
580 602
581 /* 603 /*
582 * Clear RI before restoring r13. If we are returning to 604 * Clear RI before restoring r13. If we are returning to