aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorRussell King <rmk+kernel@arm.linux.org.uk>2013-10-23 18:38:28 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2013-10-23 18:38:28 -0400
commitbdbf0a4cf2af7867dfdd09ed1d0e381eac188551 (patch)
tree75d6094e6a75452133caf6caaec73e2108815f2c /arch
parent901e7e34f83fc8fb4ce82cb29e5b500b7559cd86 (diff)
parentd779c07dd72098a7416d907494f958213b7726f3 (diff)
Merge branch 'for-rmk/prefetch' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into devel-stable
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/atomic.h7
-rw-r--r--arch/arm/include/asm/processor.h33
-rw-r--r--arch/arm/include/asm/spinlock.h28
-rw-r--r--arch/arm/include/asm/spinlock_types.h2
-rw-r--r--arch/arm/include/asm/unified.h4
-rw-r--r--arch/arm/lib/bitops.h5
6 files changed, 56 insertions, 23 deletions
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index da1c77d39327..55ffc3b850f4 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -12,6 +12,7 @@
12#define __ASM_ARM_ATOMIC_H 12#define __ASM_ARM_ATOMIC_H
13 13
14#include <linux/compiler.h> 14#include <linux/compiler.h>
15#include <linux/prefetch.h>
15#include <linux/types.h> 16#include <linux/types.h>
16#include <linux/irqflags.h> 17#include <linux/irqflags.h>
17#include <asm/barrier.h> 18#include <asm/barrier.h>
@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v)
41 unsigned long tmp; 42 unsigned long tmp;
42 int result; 43 int result;
43 44
45 prefetchw(&v->counter);
44 __asm__ __volatile__("@ atomic_add\n" 46 __asm__ __volatile__("@ atomic_add\n"
45"1: ldrex %0, [%3]\n" 47"1: ldrex %0, [%3]\n"
46" add %0, %0, %4\n" 48" add %0, %0, %4\n"
@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v)
79 unsigned long tmp; 81 unsigned long tmp;
80 int result; 82 int result;
81 83
84 prefetchw(&v->counter);
82 __asm__ __volatile__("@ atomic_sub\n" 85 __asm__ __volatile__("@ atomic_sub\n"
83"1: ldrex %0, [%3]\n" 86"1: ldrex %0, [%3]\n"
84" sub %0, %0, %4\n" 87" sub %0, %0, %4\n"
@@ -138,6 +141,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
138{ 141{
139 unsigned long tmp, tmp2; 142 unsigned long tmp, tmp2;
140 143
144 prefetchw(addr);
141 __asm__ __volatile__("@ atomic_clear_mask\n" 145 __asm__ __volatile__("@ atomic_clear_mask\n"
142"1: ldrex %0, [%3]\n" 146"1: ldrex %0, [%3]\n"
143" bic %0, %0, %4\n" 147" bic %0, %0, %4\n"
@@ -283,6 +287,7 @@ static inline void atomic64_set(atomic64_t *v, u64 i)
283{ 287{
284 u64 tmp; 288 u64 tmp;
285 289
290 prefetchw(&v->counter);
286 __asm__ __volatile__("@ atomic64_set\n" 291 __asm__ __volatile__("@ atomic64_set\n"
287"1: ldrexd %0, %H0, [%2]\n" 292"1: ldrexd %0, %H0, [%2]\n"
288" strexd %0, %3, %H3, [%2]\n" 293" strexd %0, %3, %H3, [%2]\n"
@@ -299,6 +304,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
299 u64 result; 304 u64 result;
300 unsigned long tmp; 305 unsigned long tmp;
301 306
307 prefetchw(&v->counter);
302 __asm__ __volatile__("@ atomic64_add\n" 308 __asm__ __volatile__("@ atomic64_add\n"
303"1: ldrexd %0, %H0, [%3]\n" 309"1: ldrexd %0, %H0, [%3]\n"
304" adds %0, %0, %4\n" 310" adds %0, %0, %4\n"
@@ -339,6 +345,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
339 u64 result; 345 u64 result;
340 unsigned long tmp; 346 unsigned long tmp;
341 347
348 prefetchw(&v->counter);
342 __asm__ __volatile__("@ atomic64_sub\n" 349 __asm__ __volatile__("@ atomic64_sub\n"
343"1: ldrexd %0, %H0, [%3]\n" 350"1: ldrexd %0, %H0, [%3]\n"
344" subs %0, %0, %4\n" 351" subs %0, %0, %4\n"
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 413f3876341c..c3d5fc124a05 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -22,6 +22,7 @@
22#include <asm/hw_breakpoint.h> 22#include <asm/hw_breakpoint.h>
23#include <asm/ptrace.h> 23#include <asm/ptrace.h>
24#include <asm/types.h> 24#include <asm/types.h>
25#include <asm/unified.h>
25 26
26#ifdef __KERNEL__ 27#ifdef __KERNEL__
27#define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \ 28#define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \
@@ -87,6 +88,17 @@ unsigned long get_wchan(struct task_struct *p);
87#define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc 88#define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc
88#define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp 89#define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp
89 90
91#ifdef CONFIG_SMP
92#define __ALT_SMP_ASM(smp, up) \
93 "9998: " smp "\n" \
94 " .pushsection \".alt.smp.init\", \"a\"\n" \
95 " .long 9998b\n" \
96 " " up "\n" \
97 " .popsection\n"
98#else
99#define __ALT_SMP_ASM(smp, up) up
100#endif
101
90/* 102/*
91 * Prefetching support - only ARMv5. 103 * Prefetching support - only ARMv5.
92 */ 104 */
@@ -97,17 +109,22 @@ static inline void prefetch(const void *ptr)
97{ 109{
98 __asm__ __volatile__( 110 __asm__ __volatile__(
99 "pld\t%a0" 111 "pld\t%a0"
100 : 112 :: "p" (ptr));
101 : "p" (ptr)
102 : "cc");
103} 113}
104 114
115#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
105#define ARCH_HAS_PREFETCHW 116#define ARCH_HAS_PREFETCHW
106#define prefetchw(ptr) prefetch(ptr) 117static inline void prefetchw(const void *ptr)
107 118{
108#define ARCH_HAS_SPINLOCK_PREFETCH 119 __asm__ __volatile__(
109#define spin_lock_prefetch(x) do { } while (0) 120 ".arch_extension mp\n"
110 121 __ALT_SMP_ASM(
122 WASM(pldw) "\t%a0",
123 WASM(pld) "\t%a0"
124 )
125 :: "p" (ptr));
126}
127#endif
111#endif 128#endif
112 129
113#define HAVE_ARCH_PICK_MMAP_LAYOUT 130#define HAVE_ARCH_PICK_MMAP_LAYOUT
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 4f2c28060c9a..499900781d59 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -5,21 +5,13 @@
5#error SMP not supported on pre-ARMv6 CPUs 5#error SMP not supported on pre-ARMv6 CPUs
6#endif 6#endif
7 7
8#include <asm/processor.h> 8#include <linux/prefetch.h>
9 9
10/* 10/*
11 * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K 11 * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K
12 * extensions, so when running on UP, we have to patch these instructions away. 12 * extensions, so when running on UP, we have to patch these instructions away.
13 */ 13 */
14#define ALT_SMP(smp, up) \
15 "9998: " smp "\n" \
16 " .pushsection \".alt.smp.init\", \"a\"\n" \
17 " .long 9998b\n" \
18 " " up "\n" \
19 " .popsection\n"
20
21#ifdef CONFIG_THUMB2_KERNEL 14#ifdef CONFIG_THUMB2_KERNEL
22#define SEV ALT_SMP("sev.w", "nop.w")
23/* 15/*
24 * For Thumb-2, special care is needed to ensure that the conditional WFE 16 * For Thumb-2, special care is needed to ensure that the conditional WFE
25 * instruction really does assemble to exactly 4 bytes (as required by 17 * instruction really does assemble to exactly 4 bytes (as required by
@@ -31,17 +23,18 @@
31 * the assembler won't change IT instructions which are explicitly present 23 * the assembler won't change IT instructions which are explicitly present
32 * in the input. 24 * in the input.
33 */ 25 */
34#define WFE(cond) ALT_SMP( \ 26#define WFE(cond) __ALT_SMP_ASM( \
35 "it " cond "\n\t" \ 27 "it " cond "\n\t" \
36 "wfe" cond ".n", \ 28 "wfe" cond ".n", \
37 \ 29 \
38 "nop.w" \ 30 "nop.w" \
39) 31)
40#else 32#else
41#define SEV ALT_SMP("sev", "nop") 33#define WFE(cond) __ALT_SMP_ASM("wfe" cond, "nop")
42#define WFE(cond) ALT_SMP("wfe" cond, "nop")
43#endif 34#endif
44 35
36#define SEV __ALT_SMP_ASM(WASM(sev), WASM(nop))
37
45static inline void dsb_sev(void) 38static inline void dsb_sev(void)
46{ 39{
47#if __LINUX_ARM_ARCH__ >= 7 40#if __LINUX_ARM_ARCH__ >= 7
@@ -77,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
77 u32 newval; 70 u32 newval;
78 arch_spinlock_t lockval; 71 arch_spinlock_t lockval;
79 72
73 prefetchw(&lock->slock);
80 __asm__ __volatile__( 74 __asm__ __volatile__(
81"1: ldrex %0, [%3]\n" 75"1: ldrex %0, [%3]\n"
82" add %1, %0, %4\n" 76" add %1, %0, %4\n"
@@ -100,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
100 unsigned long contended, res; 94 unsigned long contended, res;
101 u32 slock; 95 u32 slock;
102 96
97 prefetchw(&lock->slock);
103 do { 98 do {
104 __asm__ __volatile__( 99 __asm__ __volatile__(
105 " ldrex %0, [%3]\n" 100 " ldrex %0, [%3]\n"
@@ -152,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
152{ 147{
153 unsigned long tmp; 148 unsigned long tmp;
154 149
150 prefetchw(&rw->lock);
155 __asm__ __volatile__( 151 __asm__ __volatile__(
156"1: ldrex %0, [%1]\n" 152"1: ldrex %0, [%1]\n"
157" teq %0, #0\n" 153" teq %0, #0\n"
@@ -170,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
170{ 166{
171 unsigned long contended, res; 167 unsigned long contended, res;
172 168
169 prefetchw(&rw->lock);
173 do { 170 do {
174 __asm__ __volatile__( 171 __asm__ __volatile__(
175 " ldrex %0, [%2]\n" 172 " ldrex %0, [%2]\n"
@@ -203,7 +200,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
203} 200}
204 201
205/* write_can_lock - would write_trylock() succeed? */ 202/* write_can_lock - would write_trylock() succeed? */
206#define arch_write_can_lock(x) ((x)->lock == 0) 203#define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0)
207 204
208/* 205/*
209 * Read locks are a bit more hairy: 206 * Read locks are a bit more hairy:
@@ -221,6 +218,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
221{ 218{
222 unsigned long tmp, tmp2; 219 unsigned long tmp, tmp2;
223 220
221 prefetchw(&rw->lock);
224 __asm__ __volatile__( 222 __asm__ __volatile__(
225"1: ldrex %0, [%2]\n" 223"1: ldrex %0, [%2]\n"
226" adds %0, %0, #1\n" 224" adds %0, %0, #1\n"
@@ -241,6 +239,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
241 239
242 smp_mb(); 240 smp_mb();
243 241
242 prefetchw(&rw->lock);
244 __asm__ __volatile__( 243 __asm__ __volatile__(
245"1: ldrex %0, [%2]\n" 244"1: ldrex %0, [%2]\n"
246" sub %0, %0, #1\n" 245" sub %0, %0, #1\n"
@@ -259,6 +258,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
259{ 258{
260 unsigned long contended, res; 259 unsigned long contended, res;
261 260
261 prefetchw(&rw->lock);
262 do { 262 do {
263 __asm__ __volatile__( 263 __asm__ __volatile__(
264 " ldrex %0, [%2]\n" 264 " ldrex %0, [%2]\n"
@@ -280,7 +280,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
280} 280}
281 281
282/* read_can_lock - would read_trylock() succeed? */ 282/* read_can_lock - would read_trylock() succeed? */
283#define arch_read_can_lock(x) ((x)->lock < 0x80000000) 283#define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000)
284 284
285#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 285#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
286#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) 286#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index b262d2f8b478..47663fcb10ad 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -25,7 +25,7 @@ typedef struct {
25#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } 25#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
26 26
27typedef struct { 27typedef struct {
28 volatile unsigned int lock; 28 u32 lock;
29} arch_rwlock_t; 29} arch_rwlock_t;
30 30
31#define __ARCH_RW_LOCK_UNLOCKED { 0 } 31#define __ARCH_RW_LOCK_UNLOCKED { 0 }
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index f5989f46b4d2..b88beaba6b4a 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -38,6 +38,8 @@
38#ifdef __ASSEMBLY__ 38#ifdef __ASSEMBLY__
39#define W(instr) instr.w 39#define W(instr) instr.w
40#define BSYM(sym) sym + 1 40#define BSYM(sym) sym + 1
41#else
42#define WASM(instr) #instr ".w"
41#endif 43#endif
42 44
43#else /* !CONFIG_THUMB2_KERNEL */ 45#else /* !CONFIG_THUMB2_KERNEL */
@@ -50,6 +52,8 @@
50#ifdef __ASSEMBLY__ 52#ifdef __ASSEMBLY__
51#define W(instr) instr 53#define W(instr) instr
52#define BSYM(sym) sym 54#define BSYM(sym) sym
55#else
56#define WASM(instr) #instr
53#endif 57#endif
54 58
55#endif /* CONFIG_THUMB2_KERNEL */ 59#endif /* CONFIG_THUMB2_KERNEL */
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index d6408d1ee543..e0c68d5bb7dc 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -10,6 +10,11 @@ UNWIND( .fnstart )
10 and r3, r0, #31 @ Get bit offset 10 and r3, r0, #31 @ Get bit offset
11 mov r0, r0, lsr #5 11 mov r0, r0, lsr #5
12 add r1, r1, r0, lsl #2 @ Get word offset 12 add r1, r1, r0, lsl #2 @ Get word offset
13#if __LINUX_ARM_ARCH__ >= 7
14 .arch_extension mp
15 ALT_SMP(W(pldw) [r1])
16 ALT_UP(W(nop))
17#endif
13 mov r3, r2, lsl r3 18 mov r3, r2, lsl r3
141: ldrex r2, [r1] 191: ldrex r2, [r1]
15 \instr r2, r2, r3 20 \instr r2, r2, r3