diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2013-09-26 13:24:53 -0400 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2013-09-30 10:34:41 -0400 |
commit | f862eefec0b68e099a9fa58d3761ffb10bad97e1 (patch) | |
tree | e82f50d999230deac554c91de5665a6475836243 /arch/tile/include | |
parent | 3f725c5b924e14eb00c58892d21d92100121e5ce (diff) |
tile: use a more conservative __my_cpu_offset in CONFIG_PREEMPT
It turns out the kernel relies on barrier() to force a reload of the
percpu offset value. Since we can't easily modify the definition of
barrier() to include "tp" as an output register, we instead provide a
definition of __my_cpu_offset as extended assembly that includes a fake
stack read to hazard against barrier(), forcing gcc to know that it
must reread "tp" and recompute anything based on "tp" after a barrier.
This fixes observed hangs in the slub allocator when we are looping
on a percpu cmpxchg_double.
A similar fix for ARMv7 was made in June in change 509eb76ebf97.
Cc: stable@vger.kernel.org
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/include')
-rw-r--r-- | arch/tile/include/asm/percpu.h | 34 |
1 files changed, 31 insertions, 3 deletions
diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h index 63294f5a8efb..4f7ae39fa202 100644 --- a/arch/tile/include/asm/percpu.h +++ b/arch/tile/include/asm/percpu.h | |||
@@ -15,9 +15,37 @@ | |||
15 | #ifndef _ASM_TILE_PERCPU_H | 15 | #ifndef _ASM_TILE_PERCPU_H |
16 | #define _ASM_TILE_PERCPU_H | 16 | #define _ASM_TILE_PERCPU_H |
17 | 17 | ||
18 | register unsigned long __my_cpu_offset __asm__("tp"); | 18 | register unsigned long my_cpu_offset_reg asm("tp"); |
19 | #define __my_cpu_offset __my_cpu_offset | 19 | |
20 | #define set_my_cpu_offset(tp) (__my_cpu_offset = (tp)) | 20 | #ifdef CONFIG_PREEMPT |
21 | /* | ||
22 | * For full preemption, we can't just use the register variable | ||
23 | * directly, since we need barrier() to hazard against it, causing the | ||
24 | * compiler to reload anything computed from a previous "tp" value. | ||
25 | * But we also don't want to use volatile asm, since we'd like the | ||
26 | * compiler to be able to cache the value across multiple percpu reads. | ||
27 | * So we use a fake stack read as a hazard against barrier(). | ||
28 | * The 'U' constraint is like 'm' but disallows postincrement. | ||
29 | */ | ||
30 | static inline unsigned long __my_cpu_offset(void) | ||
31 | { | ||
32 | unsigned long tp; | ||
33 | register unsigned long *sp asm("sp"); | ||
34 | asm("move %0, tp" : "=r" (tp) : "U" (*sp)); | ||
35 | return tp; | ||
36 | } | ||
37 | #define __my_cpu_offset __my_cpu_offset() | ||
38 | #else | ||
39 | /* | ||
40 | * We don't need to hazard against barrier() since "tp" doesn't ever | ||
41 | * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only | ||
42 | * changes at function call points, at which we are already re-reading | ||
43 | * the value of "tp" due to "my_cpu_offset_reg" being a global variable. | ||
44 | */ | ||
45 | #define __my_cpu_offset my_cpu_offset_reg | ||
46 | #endif | ||
47 | |||
48 | #define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp)) | ||
21 | 49 | ||
22 | #include <asm-generic/percpu.h> | 50 | #include <asm-generic/percpu.h> |
23 | 51 | ||