diff options
author | Will Deacon <will.deacon@arm.com> | 2014-02-21 11:01:48 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2014-02-25 06:30:20 -0500 |
commit | c32ffce0f66e5d1d4856254516e24f5ef275cd00 (patch) | |
tree | 125229cdd38bfd6e7e62cff7eb8771a34cc999a7 /arch/arm/lib | |
parent | 6ea41c80115f49e7d8b80312ffc99973d283471f (diff) |
ARM: 7984/1: prefetch: add prefetchw invocations for barriered atomics
After a bunch of benchmarking on the interaction between dmb and pldw,
it turns out that issuing the pldw *after* the dmb instruction can
give modest performance gains (~3% atomic_add_return improvement on a
dual A15).
This patch adds prefetchw invocations to our barriered atomic operations
including cmpxchg, test_and_xxx and futexes.
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/lib')
-rw-r--r-- | arch/arm/lib/bitops.h | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 52886b89706c..9f12ed1eea86 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h | |||
@@ -37,6 +37,11 @@ UNWIND( .fnstart ) | |||
37 | add r1, r1, r0, lsl #2 @ Get word offset | 37 | add r1, r1, r0, lsl #2 @ Get word offset |
38 | mov r3, r2, lsl r3 @ create mask | 38 | mov r3, r2, lsl r3 @ create mask |
39 | smp_dmb | 39 | smp_dmb |
40 | #if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) | ||
41 | .arch_extension mp | ||
42 | ALT_SMP(W(pldw) [r1]) | ||
43 | ALT_UP(W(nop)) | ||
44 | #endif | ||
40 | 1: ldrex r2, [r1] | 45 | 1: ldrex r2, [r1] |
41 | ands r0, r2, r3 @ save old value of bit | 46 | ands r0, r2, r3 @ save old value of bit |
42 | \instr r2, r2, r3 @ toggle bit | 47 | \instr r2, r2, r3 @ toggle bit |