diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2013-10-23 18:38:28 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2013-10-23 18:38:28 -0400 |
commit | bdbf0a4cf2af7867dfdd09ed1d0e381eac188551 (patch) | |
tree | 75d6094e6a75452133caf6caaec73e2108815f2c /arch | |
parent | 901e7e34f83fc8fb4ce82cb29e5b500b7559cd86 (diff) | |
parent | d779c07dd72098a7416d907494f958213b7726f3 (diff) |
Merge branch 'for-rmk/prefetch' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into devel-stable
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/include/asm/atomic.h | 7 | ||||
-rw-r--r-- | arch/arm/include/asm/processor.h | 33 | ||||
-rw-r--r-- | arch/arm/include/asm/spinlock.h | 28 | ||||
-rw-r--r-- | arch/arm/include/asm/spinlock_types.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/unified.h | 4 | ||||
-rw-r--r-- | arch/arm/lib/bitops.h | 5 |
6 files changed, 56 insertions, 23 deletions
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index da1c77d39327..55ffc3b850f4 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #define __ASM_ARM_ATOMIC_H | 12 | #define __ASM_ARM_ATOMIC_H |
13 | 13 | ||
14 | #include <linux/compiler.h> | 14 | #include <linux/compiler.h> |
15 | #include <linux/prefetch.h> | ||
15 | #include <linux/types.h> | 16 | #include <linux/types.h> |
16 | #include <linux/irqflags.h> | 17 | #include <linux/irqflags.h> |
17 | #include <asm/barrier.h> | 18 | #include <asm/barrier.h> |
@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v) | |||
41 | unsigned long tmp; | 42 | unsigned long tmp; |
42 | int result; | 43 | int result; |
43 | 44 | ||
45 | prefetchw(&v->counter); | ||
44 | __asm__ __volatile__("@ atomic_add\n" | 46 | __asm__ __volatile__("@ atomic_add\n" |
45 | "1: ldrex %0, [%3]\n" | 47 | "1: ldrex %0, [%3]\n" |
46 | " add %0, %0, %4\n" | 48 | " add %0, %0, %4\n" |
@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v) | |||
79 | unsigned long tmp; | 81 | unsigned long tmp; |
80 | int result; | 82 | int result; |
81 | 83 | ||
84 | prefetchw(&v->counter); | ||
82 | __asm__ __volatile__("@ atomic_sub\n" | 85 | __asm__ __volatile__("@ atomic_sub\n" |
83 | "1: ldrex %0, [%3]\n" | 86 | "1: ldrex %0, [%3]\n" |
84 | " sub %0, %0, %4\n" | 87 | " sub %0, %0, %4\n" |
@@ -138,6 +141,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) | |||
138 | { | 141 | { |
139 | unsigned long tmp, tmp2; | 142 | unsigned long tmp, tmp2; |
140 | 143 | ||
144 | prefetchw(addr); | ||
141 | __asm__ __volatile__("@ atomic_clear_mask\n" | 145 | __asm__ __volatile__("@ atomic_clear_mask\n" |
142 | "1: ldrex %0, [%3]\n" | 146 | "1: ldrex %0, [%3]\n" |
143 | " bic %0, %0, %4\n" | 147 | " bic %0, %0, %4\n" |
@@ -283,6 +287,7 @@ static inline void atomic64_set(atomic64_t *v, u64 i) | |||
283 | { | 287 | { |
284 | u64 tmp; | 288 | u64 tmp; |
285 | 289 | ||
290 | prefetchw(&v->counter); | ||
286 | __asm__ __volatile__("@ atomic64_set\n" | 291 | __asm__ __volatile__("@ atomic64_set\n" |
287 | "1: ldrexd %0, %H0, [%2]\n" | 292 | "1: ldrexd %0, %H0, [%2]\n" |
288 | " strexd %0, %3, %H3, [%2]\n" | 293 | " strexd %0, %3, %H3, [%2]\n" |
@@ -299,6 +304,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) | |||
299 | u64 result; | 304 | u64 result; |
300 | unsigned long tmp; | 305 | unsigned long tmp; |
301 | 306 | ||
307 | prefetchw(&v->counter); | ||
302 | __asm__ __volatile__("@ atomic64_add\n" | 308 | __asm__ __volatile__("@ atomic64_add\n" |
303 | "1: ldrexd %0, %H0, [%3]\n" | 309 | "1: ldrexd %0, %H0, [%3]\n" |
304 | " adds %0, %0, %4\n" | 310 | " adds %0, %0, %4\n" |
@@ -339,6 +345,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) | |||
339 | u64 result; | 345 | u64 result; |
340 | unsigned long tmp; | 346 | unsigned long tmp; |
341 | 347 | ||
348 | prefetchw(&v->counter); | ||
342 | __asm__ __volatile__("@ atomic64_sub\n" | 349 | __asm__ __volatile__("@ atomic64_sub\n" |
343 | "1: ldrexd %0, %H0, [%3]\n" | 350 | "1: ldrexd %0, %H0, [%3]\n" |
344 | " subs %0, %0, %4\n" | 351 | " subs %0, %0, %4\n" |
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 413f3876341c..c3d5fc124a05 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/hw_breakpoint.h> | 22 | #include <asm/hw_breakpoint.h> |
23 | #include <asm/ptrace.h> | 23 | #include <asm/ptrace.h> |
24 | #include <asm/types.h> | 24 | #include <asm/types.h> |
25 | #include <asm/unified.h> | ||
25 | 26 | ||
26 | #ifdef __KERNEL__ | 27 | #ifdef __KERNEL__ |
27 | #define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \ | 28 | #define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \ |
@@ -87,6 +88,17 @@ unsigned long get_wchan(struct task_struct *p); | |||
87 | #define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc | 88 | #define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc |
88 | #define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp | 89 | #define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp |
89 | 90 | ||
91 | #ifdef CONFIG_SMP | ||
92 | #define __ALT_SMP_ASM(smp, up) \ | ||
93 | "9998: " smp "\n" \ | ||
94 | " .pushsection \".alt.smp.init\", \"a\"\n" \ | ||
95 | " .long 9998b\n" \ | ||
96 | " " up "\n" \ | ||
97 | " .popsection\n" | ||
98 | #else | ||
99 | #define __ALT_SMP_ASM(smp, up) up | ||
100 | #endif | ||
101 | |||
90 | /* | 102 | /* |
91 | * Prefetching support - only ARMv5. | 103 | * Prefetching support - only ARMv5. |
92 | */ | 104 | */ |
@@ -97,17 +109,22 @@ static inline void prefetch(const void *ptr) | |||
97 | { | 109 | { |
98 | __asm__ __volatile__( | 110 | __asm__ __volatile__( |
99 | "pld\t%a0" | 111 | "pld\t%a0" |
100 | : | 112 | :: "p" (ptr)); |
101 | : "p" (ptr) | ||
102 | : "cc"); | ||
103 | } | 113 | } |
104 | 114 | ||
115 | #if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) | ||
105 | #define ARCH_HAS_PREFETCHW | 116 | #define ARCH_HAS_PREFETCHW |
106 | #define prefetchw(ptr) prefetch(ptr) | 117 | static inline void prefetchw(const void *ptr) |
107 | 118 | { | |
108 | #define ARCH_HAS_SPINLOCK_PREFETCH | 119 | __asm__ __volatile__( |
109 | #define spin_lock_prefetch(x) do { } while (0) | 120 | ".arch_extension mp\n" |
110 | 121 | __ALT_SMP_ASM( | |
122 | WASM(pldw) "\t%a0", | ||
123 | WASM(pld) "\t%a0" | ||
124 | ) | ||
125 | :: "p" (ptr)); | ||
126 | } | ||
127 | #endif | ||
111 | #endif | 128 | #endif |
112 | 129 | ||
113 | #define HAVE_ARCH_PICK_MMAP_LAYOUT | 130 | #define HAVE_ARCH_PICK_MMAP_LAYOUT |
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 4f2c28060c9a..499900781d59 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h | |||
@@ -5,21 +5,13 @@ | |||
5 | #error SMP not supported on pre-ARMv6 CPUs | 5 | #error SMP not supported on pre-ARMv6 CPUs |
6 | #endif | 6 | #endif |
7 | 7 | ||
8 | #include <asm/processor.h> | 8 | #include <linux/prefetch.h> |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K | 11 | * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K |
12 | * extensions, so when running on UP, we have to patch these instructions away. | 12 | * extensions, so when running on UP, we have to patch these instructions away. |
13 | */ | 13 | */ |
14 | #define ALT_SMP(smp, up) \ | ||
15 | "9998: " smp "\n" \ | ||
16 | " .pushsection \".alt.smp.init\", \"a\"\n" \ | ||
17 | " .long 9998b\n" \ | ||
18 | " " up "\n" \ | ||
19 | " .popsection\n" | ||
20 | |||
21 | #ifdef CONFIG_THUMB2_KERNEL | 14 | #ifdef CONFIG_THUMB2_KERNEL |
22 | #define SEV ALT_SMP("sev.w", "nop.w") | ||
23 | /* | 15 | /* |
24 | * For Thumb-2, special care is needed to ensure that the conditional WFE | 16 | * For Thumb-2, special care is needed to ensure that the conditional WFE |
25 | * instruction really does assemble to exactly 4 bytes (as required by | 17 | * instruction really does assemble to exactly 4 bytes (as required by |
@@ -31,17 +23,18 @@ | |||
31 | * the assembler won't change IT instructions which are explicitly present | 23 | * the assembler won't change IT instructions which are explicitly present |
32 | * in the input. | 24 | * in the input. |
33 | */ | 25 | */ |
34 | #define WFE(cond) ALT_SMP( \ | 26 | #define WFE(cond) __ALT_SMP_ASM( \ |
35 | "it " cond "\n\t" \ | 27 | "it " cond "\n\t" \ |
36 | "wfe" cond ".n", \ | 28 | "wfe" cond ".n", \ |
37 | \ | 29 | \ |
38 | "nop.w" \ | 30 | "nop.w" \ |
39 | ) | 31 | ) |
40 | #else | 32 | #else |
41 | #define SEV ALT_SMP("sev", "nop") | 33 | #define WFE(cond) __ALT_SMP_ASM("wfe" cond, "nop") |
42 | #define WFE(cond) ALT_SMP("wfe" cond, "nop") | ||
43 | #endif | 34 | #endif |
44 | 35 | ||
36 | #define SEV __ALT_SMP_ASM(WASM(sev), WASM(nop)) | ||
37 | |||
45 | static inline void dsb_sev(void) | 38 | static inline void dsb_sev(void) |
46 | { | 39 | { |
47 | #if __LINUX_ARM_ARCH__ >= 7 | 40 | #if __LINUX_ARM_ARCH__ >= 7 |
@@ -77,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) | |||
77 | u32 newval; | 70 | u32 newval; |
78 | arch_spinlock_t lockval; | 71 | arch_spinlock_t lockval; |
79 | 72 | ||
73 | prefetchw(&lock->slock); | ||
80 | __asm__ __volatile__( | 74 | __asm__ __volatile__( |
81 | "1: ldrex %0, [%3]\n" | 75 | "1: ldrex %0, [%3]\n" |
82 | " add %1, %0, %4\n" | 76 | " add %1, %0, %4\n" |
@@ -100,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) | |||
100 | unsigned long contended, res; | 94 | unsigned long contended, res; |
101 | u32 slock; | 95 | u32 slock; |
102 | 96 | ||
97 | prefetchw(&lock->slock); | ||
103 | do { | 98 | do { |
104 | __asm__ __volatile__( | 99 | __asm__ __volatile__( |
105 | " ldrex %0, [%3]\n" | 100 | " ldrex %0, [%3]\n" |
@@ -152,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) | |||
152 | { | 147 | { |
153 | unsigned long tmp; | 148 | unsigned long tmp; |
154 | 149 | ||
150 | prefetchw(&rw->lock); | ||
155 | __asm__ __volatile__( | 151 | __asm__ __volatile__( |
156 | "1: ldrex %0, [%1]\n" | 152 | "1: ldrex %0, [%1]\n" |
157 | " teq %0, #0\n" | 153 | " teq %0, #0\n" |
@@ -170,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) | |||
170 | { | 166 | { |
171 | unsigned long contended, res; | 167 | unsigned long contended, res; |
172 | 168 | ||
169 | prefetchw(&rw->lock); | ||
173 | do { | 170 | do { |
174 | __asm__ __volatile__( | 171 | __asm__ __volatile__( |
175 | " ldrex %0, [%2]\n" | 172 | " ldrex %0, [%2]\n" |
@@ -203,7 +200,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) | |||
203 | } | 200 | } |
204 | 201 | ||
205 | /* write_can_lock - would write_trylock() succeed? */ | 202 | /* write_can_lock - would write_trylock() succeed? */ |
206 | #define arch_write_can_lock(x) ((x)->lock == 0) | 203 | #define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0) |
207 | 204 | ||
208 | /* | 205 | /* |
209 | * Read locks are a bit more hairy: | 206 | * Read locks are a bit more hairy: |
@@ -221,6 +218,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) | |||
221 | { | 218 | { |
222 | unsigned long tmp, tmp2; | 219 | unsigned long tmp, tmp2; |
223 | 220 | ||
221 | prefetchw(&rw->lock); | ||
224 | __asm__ __volatile__( | 222 | __asm__ __volatile__( |
225 | "1: ldrex %0, [%2]\n" | 223 | "1: ldrex %0, [%2]\n" |
226 | " adds %0, %0, #1\n" | 224 | " adds %0, %0, #1\n" |
@@ -241,6 +239,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) | |||
241 | 239 | ||
242 | smp_mb(); | 240 | smp_mb(); |
243 | 241 | ||
242 | prefetchw(&rw->lock); | ||
244 | __asm__ __volatile__( | 243 | __asm__ __volatile__( |
245 | "1: ldrex %0, [%2]\n" | 244 | "1: ldrex %0, [%2]\n" |
246 | " sub %0, %0, #1\n" | 245 | " sub %0, %0, #1\n" |
@@ -259,6 +258,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) | |||
259 | { | 258 | { |
260 | unsigned long contended, res; | 259 | unsigned long contended, res; |
261 | 260 | ||
261 | prefetchw(&rw->lock); | ||
262 | do { | 262 | do { |
263 | __asm__ __volatile__( | 263 | __asm__ __volatile__( |
264 | " ldrex %0, [%2]\n" | 264 | " ldrex %0, [%2]\n" |
@@ -280,7 +280,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) | |||
280 | } | 280 | } |
281 | 281 | ||
282 | /* read_can_lock - would read_trylock() succeed? */ | 282 | /* read_can_lock - would read_trylock() succeed? */ |
283 | #define arch_read_can_lock(x) ((x)->lock < 0x80000000) | 283 | #define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000) |
284 | 284 | ||
285 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) | 285 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) |
286 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) | 286 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) |
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index b262d2f8b478..47663fcb10ad 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h | |||
@@ -25,7 +25,7 @@ typedef struct { | |||
25 | #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } | 25 | #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } |
26 | 26 | ||
27 | typedef struct { | 27 | typedef struct { |
28 | volatile unsigned int lock; | 28 | u32 lock; |
29 | } arch_rwlock_t; | 29 | } arch_rwlock_t; |
30 | 30 | ||
31 | #define __ARCH_RW_LOCK_UNLOCKED { 0 } | 31 | #define __ARCH_RW_LOCK_UNLOCKED { 0 } |
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index f5989f46b4d2..b88beaba6b4a 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h | |||
@@ -38,6 +38,8 @@ | |||
38 | #ifdef __ASSEMBLY__ | 38 | #ifdef __ASSEMBLY__ |
39 | #define W(instr) instr.w | 39 | #define W(instr) instr.w |
40 | #define BSYM(sym) sym + 1 | 40 | #define BSYM(sym) sym + 1 |
41 | #else | ||
42 | #define WASM(instr) #instr ".w" | ||
41 | #endif | 43 | #endif |
42 | 44 | ||
43 | #else /* !CONFIG_THUMB2_KERNEL */ | 45 | #else /* !CONFIG_THUMB2_KERNEL */ |
@@ -50,6 +52,8 @@ | |||
50 | #ifdef __ASSEMBLY__ | 52 | #ifdef __ASSEMBLY__ |
51 | #define W(instr) instr | 53 | #define W(instr) instr |
52 | #define BSYM(sym) sym | 54 | #define BSYM(sym) sym |
55 | #else | ||
56 | #define WASM(instr) #instr | ||
53 | #endif | 57 | #endif |
54 | 58 | ||
55 | #endif /* CONFIG_THUMB2_KERNEL */ | 59 | #endif /* CONFIG_THUMB2_KERNEL */ |
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index d6408d1ee543..e0c68d5bb7dc 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h | |||
@@ -10,6 +10,11 @@ UNWIND( .fnstart ) | |||
10 | and r3, r0, #31 @ Get bit offset | 10 | and r3, r0, #31 @ Get bit offset |
11 | mov r0, r0, lsr #5 | 11 | mov r0, r0, lsr #5 |
12 | add r1, r1, r0, lsl #2 @ Get word offset | 12 | add r1, r1, r0, lsl #2 @ Get word offset |
13 | #if __LINUX_ARM_ARCH__ >= 7 | ||
14 | .arch_extension mp | ||
15 | ALT_SMP(W(pldw) [r1]) | ||
16 | ALT_UP(W(nop)) | ||
17 | #endif | ||
13 | mov r3, r2, lsl r3 | 18 | mov r3, r2, lsl r3 |
14 | 1: ldrex r2, [r1] | 19 | 1: ldrex r2, [r1] |
15 | \instr r2, r2, r3 | 20 | \instr r2, r2, r3 |