aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/include
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2013-09-26 13:24:53 -0400
committerChris Metcalf <cmetcalf@tilera.com>2013-09-30 10:34:41 -0400
commitf862eefec0b68e099a9fa58d3761ffb10bad97e1 (patch)
treee82f50d999230deac554c91de5665a6475836243 /arch/tile/include
parent3f725c5b924e14eb00c58892d21d92100121e5ce (diff)
tile: use a more conservative __my_cpu_offset in CONFIG_PREEMPT
It turns out the kernel relies on barrier() to force a reload of the percpu offset value. Since we can't easily modify the definition of barrier() to include "tp" as an output register, we instead provide a definition of __my_cpu_offset as extended assembly that includes a fake stack read to hazard against barrier(), forcing gcc to know that it must reread "tp" and recompute anything based on "tp" after a barrier. This fixes observed hangs in the slub allocator when we are looping on a percpu cmpxchg_double. A similar fix for ARMv7 was made in June in change 509eb76ebf97. Cc: stable@vger.kernel.org Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/include')
-rw-r--r--arch/tile/include/asm/percpu.h34
1 files changed, 31 insertions, 3 deletions
diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h
index 63294f5a8efb..4f7ae39fa202 100644
--- a/arch/tile/include/asm/percpu.h
+++ b/arch/tile/include/asm/percpu.h
@@ -15,9 +15,37 @@
15#ifndef _ASM_TILE_PERCPU_H 15#ifndef _ASM_TILE_PERCPU_H
16#define _ASM_TILE_PERCPU_H 16#define _ASM_TILE_PERCPU_H
17 17
18register unsigned long __my_cpu_offset __asm__("tp"); 18register unsigned long my_cpu_offset_reg asm("tp");
19#define __my_cpu_offset __my_cpu_offset 19
20#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp)) 20#ifdef CONFIG_PREEMPT
21/*
22 * For full preemption, we can't just use the register variable
23 * directly, since we need barrier() to hazard against it, causing the
24 * compiler to reload anything computed from a previous "tp" value.
25 * But we also don't want to use volatile asm, since we'd like the
26 * compiler to be able to cache the value across multiple percpu reads.
27 * So we use a fake stack read as a hazard against barrier().
28 * The 'U' constraint is like 'm' but disallows postincrement.
29 */
30static inline unsigned long __my_cpu_offset(void)
31{
32 unsigned long tp;
33 register unsigned long *sp asm("sp");
34 asm("move %0, tp" : "=r" (tp) : "U" (*sp));
35 return tp;
36}
37#define __my_cpu_offset __my_cpu_offset()
38#else
39/*
40 * We don't need to hazard against barrier() since "tp" doesn't ever
41 * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only
42 * changes at function call points, at which we are already re-reading
43 * the value of "tp" due to "my_cpu_offset_reg" being a global variable.
44 */
45#define __my_cpu_offset my_cpu_offset_reg
46#endif
47
48#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp))
21 49
22#include <asm-generic/percpu.h> 50#include <asm-generic/percpu.h>
23 51