From 6323f0ccedf756dfe5f46549cec69a2d6d97937b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 16 Jan 2011 18:02:17 +0000 Subject: ARM: bitops: switch set/clear/change bitops to use ldrex/strex Switch the set/clear/change bitops to use the word-based exclusive operations, which are only present in a wider range of ARM architectures than the byte-based exclusive operations. Tested record: - Nicolas Pitre: ext3,rw,le - Sourav Poddar: nfs,le - Will Deacon: ext3,rw,le - Tony Lindgren: ext3+nfs,le Reviewed-by: Nicolas Pitre Tested-by: Sourav Poddar Tested-by: Will Deacon Tested-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/include/asm/bitops.h | 60 ++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 38 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index 7b1bb2bbaf88..af54ed102f5f 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -148,15 +148,19 @@ ____atomic_test_and_change_bit(unsigned int bit, volatile unsigned long *p) * Note that bit 0 is defined to be 32-bit word bit 0, not byte 0 bit 0. */ +/* + * Native endian assembly bitops. nr = 0 -> word 0 bit 0. + */ +extern void _set_bit(int nr, volatile unsigned long * p); +extern void _clear_bit(int nr, volatile unsigned long * p); +extern void _change_bit(int nr, volatile unsigned long * p); +extern int _test_and_set_bit(int nr, volatile unsigned long * p); +extern int _test_and_clear_bit(int nr, volatile unsigned long * p); +extern int _test_and_change_bit(int nr, volatile unsigned long * p); + /* * Little endian assembly bitops. nr = 0 -> byte 0 bit 0. */ -extern void _set_bit_le(int nr, volatile unsigned long * p); -extern void _clear_bit_le(int nr, volatile unsigned long * p); -extern void _change_bit_le(int nr, volatile unsigned long * p); -extern int _test_and_set_bit_le(int nr, volatile unsigned long * p); -extern int _test_and_clear_bit_le(int nr, volatile unsigned long * p); -extern int _test_and_change_bit_le(int nr, volatile unsigned long * p); extern int _find_first_zero_bit_le(const void * p, unsigned size); extern int _find_next_zero_bit_le(const void * p, int size, int offset); extern int _find_first_bit_le(const unsigned long *p, unsigned size); @@ -165,12 +169,6 @@ extern int _find_next_bit_le(const unsigned long *p, int size, int offset); /* * Big endian assembly bitops. nr = 0 -> byte 3 bit 0. */ -extern void _set_bit_be(int nr, volatile unsigned long * p); -extern void _clear_bit_be(int nr, volatile unsigned long * p); -extern void _change_bit_be(int nr, volatile unsigned long * p); -extern int _test_and_set_bit_be(int nr, volatile unsigned long * p); -extern int _test_and_clear_bit_be(int nr, volatile unsigned long * p); -extern int _test_and_change_bit_be(int nr, volatile unsigned long * p); extern int _find_first_zero_bit_be(const void * p, unsigned size); extern int _find_next_zero_bit_be(const void * p, int size, int offset); extern int _find_first_bit_be(const unsigned long *p, unsigned size); @@ -180,33 +178,26 @@ extern int _find_next_bit_be(const unsigned long *p, int size, int offset); /* * The __* form of bitops are non-atomic and may be reordered. */ -#define ATOMIC_BITOP_LE(name,nr,p) \ - (__builtin_constant_p(nr) ? \ - ____atomic_##name(nr, p) : \ - _##name##_le(nr,p)) - -#define ATOMIC_BITOP_BE(name,nr,p) \ - (__builtin_constant_p(nr) ? \ - ____atomic_##name(nr, p) : \ - _##name##_be(nr,p)) +#define ATOMIC_BITOP(name,nr,p) \ + (__builtin_constant_p(nr) ? ____atomic_##name(nr, p) : _##name(nr,p)) #else -#define ATOMIC_BITOP_LE(name,nr,p) _##name##_le(nr,p) -#define ATOMIC_BITOP_BE(name,nr,p) _##name##_be(nr,p) +#define ATOMIC_BITOP(name,nr,p) _##name(nr,p) #endif -#define NONATOMIC_BITOP(name,nr,p) \ - (____nonatomic_##name(nr, p)) +/* + * Native endian atomic definitions. + */ +#define set_bit(nr,p) ATOMIC_BITOP(set_bit,nr,p) +#define clear_bit(nr,p) ATOMIC_BITOP(clear_bit,nr,p) +#define change_bit(nr,p) ATOMIC_BITOP(change_bit,nr,p) +#define test_and_set_bit(nr,p) ATOMIC_BITOP(test_and_set_bit,nr,p) +#define test_and_clear_bit(nr,p) ATOMIC_BITOP(test_and_clear_bit,nr,p) +#define test_and_change_bit(nr,p) ATOMIC_BITOP(test_and_change_bit,nr,p) #ifndef __ARMEB__ /* * These are the little endian, atomic definitions. */ -#define set_bit(nr,p) ATOMIC_BITOP_LE(set_bit,nr,p) -#define clear_bit(nr,p) ATOMIC_BITOP_LE(clear_bit,nr,p) -#define change_bit(nr,p) ATOMIC_BITOP_LE(change_bit,nr,p) -#define test_and_set_bit(nr,p) ATOMIC_BITOP_LE(test_and_set_bit,nr,p) -#define test_and_clear_bit(nr,p) ATOMIC_BITOP_LE(test_and_clear_bit,nr,p) -#define test_and_change_bit(nr,p) ATOMIC_BITOP_LE(test_and_change_bit,nr,p) #define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz) #define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off) #define find_first_bit(p,sz) _find_first_bit_le(p,sz) @@ -215,16 +206,9 @@ extern int _find_next_bit_be(const unsigned long *p, int size, int offset); #define WORD_BITOFF_TO_LE(x) ((x)) #else - /* * These are the big endian, atomic definitions. */ -#define set_bit(nr,p) ATOMIC_BITOP_BE(set_bit,nr,p) -#define clear_bit(nr,p) ATOMIC_BITOP_BE(clear_bit,nr,p) -#define change_bit(nr,p) ATOMIC_BITOP_BE(change_bit,nr,p) -#define test_and_set_bit(nr,p) ATOMIC_BITOP_BE(test_and_set_bit,nr,p) -#define test_and_clear_bit(nr,p) ATOMIC_BITOP_BE(test_and_clear_bit,nr,p) -#define test_and_change_bit(nr,p) ATOMIC_BITOP_BE(test_and_change_bit,nr,p) #define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz) #define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off) #define find_first_bit(p,sz) _find_first_bit_be(p,sz) -- cgit v1.2.2 From 000d9c78eb5cd7f18e3d6a381d66e606bc9b8196 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Jan 2011 16:22:12 +0000 Subject: ARM: v6k: remove CPU_32v6K dependencies in asm/spinlock.h SMP requires at least the ARMv6K extensions to be present, so if we're running on SMP, the WFE and SEV instructions must be available. However, when we run on UP, the v6K extensions may not be available, and so we don't want WFE/SEV to be in the instruction stream. Use the SMP alternatives infrastructure to replace these instructions with NOPs if we build for SMP but run on UP. Tested-by: Tony Lindgren Tested-by: Sourav Poddar Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 17eb355707dd..da1af5240159 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -5,17 +5,36 @@ #error SMP not supported on pre-ARMv6 CPUs #endif +/* + * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K + * extensions, so when running on UP, we have to patch these instructions away. + */ +#define ALT_SMP(smp, up) \ + "9998: " smp "\n" \ + " .pushsection \".alt.smp.init\", \"a\"\n" \ + " .long 9998b\n" \ + " " up "\n" \ + " .popsection\n" + +#ifdef CONFIG_THUMB2_KERNEL +#define SEV ALT_SMP("sev.w", "nop.w") +#define WFE(cond) ALT_SMP("wfe" cond ".w", "nop.w") +#else +#define SEV ALT_SMP("sev", "nop") +#define WFE(cond) ALT_SMP("wfe" cond, "nop") +#endif + static inline void dsb_sev(void) { #if __LINUX_ARM_ARCH__ >= 7 __asm__ __volatile__ ( "dsb\n" - "sev" + SEV ); -#elif defined(CONFIG_CPU_32v6K) +#else __asm__ __volatile__ ( "mcr p15, 0, %0, c7, c10, 4\n" - "sev" + SEV : : "r" (0) ); #endif @@ -46,9 +65,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: ldrex %0, [%1]\n" " teq %0, #0\n" -#ifdef CONFIG_CPU_32v6K -" wfene\n" -#endif + WFE("ne") " strexeq %0, %2, [%1]\n" " teqeq %0, #0\n" " bne 1b" @@ -107,9 +124,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) __asm__ __volatile__( "1: ldrex %0, [%1]\n" " teq %0, #0\n" -#ifdef CONFIG_CPU_32v6K -" wfene\n" -#endif + WFE("ne") " strexeq %0, %2, [%1]\n" " teq %0, #0\n" " bne 1b" @@ -176,9 +191,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) "1: ldrex %0, [%2]\n" " adds %0, %0, #1\n" " strexpl %1, %0, [%2]\n" -#ifdef CONFIG_CPU_32v6K -" wfemi\n" -#endif + WFE("mi") " rsbpls %0, %1, #0\n" " bmi 1b" : "=&r" (tmp), "=&r" (tmp2) -- cgit v1.2.2 From e399b1a4e1d205bdc816cb550d2064f2eb1ddc4c Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 17 Jan 2011 15:08:32 +0000 Subject: ARM: v6k: introduce CPU_V6K option Introduce a CPU_V6K configuration option for platforms to select if they have a V6K CPU core. This allows us to identify whether we need to support ARMv6 CPUs without the V6K SMP extensions at build time. Currently CPU_V6K is just an alias for CPU_V6, and all places which reference CPU_V6 are replaced by (CPU_V6 || CPU_V6K). Select CPU_V6K from platforms which are known to be V6K-only. Acked-by: Tony Lindgren Tested-by: Sourav Poddar Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 5 +++-- arch/arm/include/asm/proc-fns.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 3acd8fa25e34..7d0614f599a7 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -116,7 +116,7 @@ # define MULTI_CACHE 1 #endif -#if defined(CONFIG_CPU_V6) +#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) //# ifdef _CACHE # define MULTI_CACHE 1 //# else @@ -316,7 +316,8 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, * Optimized __flush_icache_all for the common cases. Note that UP ARMv7 * will fall through to use __flush_icache_all_generic. */ -#if (defined(CONFIG_CPU_V7) && defined(CONFIG_CPU_V6)) || \ +#if (defined(CONFIG_CPU_V7) && \ + (defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K))) || \ defined(CONFIG_SMP_ON_UP) #define __flush_icache_preferred __cpuc_flush_icache_all #elif __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 8fdae9bc9abb..296ca47489f9 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -231,7 +231,7 @@ # endif #endif -#ifdef CONFIG_CPU_V6 +#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) # ifdef CPU_NAME # undef MULTI_CPU # define MULTI_CPU -- cgit v1.2.2 From 4ed67a53591db641543d57f31c182591a429dc93 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 17 Jan 2011 15:42:42 +0000 Subject: ARM: v6k: select cmpxchg code sequences according to V6 variants If CONFIG_CPU_V6 is enabled, we must avoid the byte/halfword/doubleword exclusive operations, which aren't implemented before V6K. Use the generic versions (or omit them) instead. If CONFIG_CPU_V6 is not set, but CONFIG_CPU_32v6K is enabled, we have the K extnesions, so use these new instructions. Acked-by: Tony Lindgren Tested-by: Sourav Poddar Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/system.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 97f6d60297d5..9a87823642d0 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -347,6 +347,7 @@ void cpu_idle_wait(void); #include #if __LINUX_ARM_ARCH__ < 6 +/* min ARCH < ARMv6 */ #ifdef CONFIG_SMP #error "SMP is not supported on this platform" @@ -365,7 +366,7 @@ void cpu_idle_wait(void); #include #endif -#else /* __LINUX_ARM_ARCH__ >= 6 */ +#else /* min ARCH >= ARMv6 */ extern void __bad_cmpxchg(volatile void *ptr, int size); @@ -379,7 +380,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long oldval, res; switch (size) { -#ifdef CONFIG_CPU_32v6K +#ifndef CONFIG_CPU_V6 /* min ARCH >= ARMv6K */ case 1: do { asm volatile("@ __cmpxchg1\n" @@ -404,7 +405,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, : "memory", "cc"); } while (res); break; -#endif /* CONFIG_CPU_32v6K */ +#endif case 4: do { asm volatile("@ __cmpxchg4\n" @@ -450,12 +451,12 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, unsigned long ret; switch (size) { -#ifndef CONFIG_CPU_32v6K +#ifdef CONFIG_CPU_V6 /* min ARCH == ARMv6 */ case 1: case 2: ret = __cmpxchg_local_generic(ptr, old, new, size); break; -#endif /* !CONFIG_CPU_32v6K */ +#endif default: ret = __cmpxchg(ptr, old, new, size); } @@ -469,7 +470,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, (unsigned long)(n), \ sizeof(*(ptr)))) -#ifdef CONFIG_CPU_32v6K +#ifndef CONFIG_CPU_V6 /* min ARCH >= ARMv6K */ /* * Note : ARMv7-M (currently unsupported by Linux) does not support @@ -524,11 +525,11 @@ static inline unsigned long long __cmpxchg64_mb(volatile void *ptr, (unsigned long long)(o), \ (unsigned long long)(n))) -#else /* !CONFIG_CPU_32v6K */ +#else /* min ARCH = ARMv6 */ #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) -#endif /* CONFIG_CPU_32v6K */ +#endif #endif /* __LINUX_ARM_ARCH__ >= 6 */ -- cgit v1.2.2 From 37bc618fe2689a7f8de8fac82e72b00ecea4d43d Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 17 Jan 2011 16:38:56 +0000 Subject: ARM: v6k: select TLS register code according to V6 variants If CONFIG_CPU_V6 is enabled, we may or may not have the TLS register. Use the conditional code which copes with this variability. Otherwise, if CONFIG_CPU_32v6K is set, we know we have the TLS register on all supported CPUs, so use it unconditionally. Acked-by: Nicolas Pitre Acked-by: Tony Lindgren Tested-by: Sourav Poddar Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/tls.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h index e71d6ff8d104..60843eb0f61c 100644 --- a/arch/arm/include/asm/tls.h +++ b/arch/arm/include/asm/tls.h @@ -28,15 +28,14 @@ #define tls_emu 1 #define has_tls_reg 1 #define set_tls set_tls_none -#elif __LINUX_ARM_ARCH__ >= 7 || \ - (__LINUX_ARM_ARCH__ == 6 && defined(CONFIG_CPU_32v6K)) -#define tls_emu 0 -#define has_tls_reg 1 -#define set_tls set_tls_v6k -#elif __LINUX_ARM_ARCH__ == 6 +#elif defined(CONFIG_CPU_V6) #define tls_emu 0 #define has_tls_reg (elf_hwcap & HWCAP_TLS) #define set_tls set_tls_v6 +#elif defined(CONFIG_CPU_32v6K) +#define tls_emu 0 +#define has_tls_reg 1 +#define set_tls set_tls_v6k #else #define tls_emu 0 #define has_tls_reg 0 -- cgit v1.2.2 From 774c096bf9e49eebf7b5d2d9fdddf632c29ccea0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 23 Jan 2011 13:04:53 +0000 Subject: ARM: v6/v7 cache: allow cache calls to be optimized The v6 cache call optimization was disabled to allow the optional block cache operations to be subsituted on CPUs which supported those operations. However, as that functionality was removed, we no longer need to prevent this optimization being taken advantage of. The v7 cache call optimization was just a copy of the v6, so also fix that too. Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 7d0614f599a7..d9b4c42d62ff 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -116,20 +116,20 @@ # define MULTI_CACHE 1 #endif -#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) -//# ifdef _CACHE +#if defined(CONFIG_CPU_CACHE_V6) +# ifdef _CACHE # define MULTI_CACHE 1 -//# else -//# define _CACHE v6 -//# endif +# else +# define _CACHE v6 +# endif #endif -#if defined(CONFIG_CPU_V7) -//# ifdef _CACHE +#if defined(CONFIG_CPU_CACHE_V7) +# ifdef _CACHE # define MULTI_CACHE 1 -//# else -//# define _CACHE v7 -//# endif +# else +# define _CACHE v7 +# endif #endif #if !defined(_CACHE) && !defined(MULTI_CACHE) -- cgit v1.2.2 From 917692f5f7ec63de3b093c825913d68e910db282 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 9 Feb 2011 12:06:59 +0100 Subject: ARM: 6655/1: Correct WFE() in asm/spinlock.h for Thumb-2 The content for ALT_SMP() in the definition of WFE() expands to 6 bytes (IT cc ; WFEcc.W), which breaks the assumptions of the fixup code, leading to lockups when the affected code gets run. This patch works around the problem by explicitly using an IT + WFEcc.N pair. Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index da1af5240159..fdd3820edff8 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -18,7 +18,23 @@ #ifdef CONFIG_THUMB2_KERNEL #define SEV ALT_SMP("sev.w", "nop.w") -#define WFE(cond) ALT_SMP("wfe" cond ".w", "nop.w") +/* + * For Thumb-2, special care is needed to ensure that the conditional WFE + * instruction really does assemble to exactly 4 bytes (as required by + * the SMP_ON_UP fixup code). By itself "wfene" might cause the + * assembler to insert a extra (16-bit) IT instruction, depending on the + * presence or absence of neighbouring conditional instructions. + * + * To avoid this unpredictableness, an approprite IT is inserted explicitly: + * the assembler won't change IT instructions which are explicitly present + * in the input. + */ +#define WFE(cond) ALT_SMP( \ + "it " cond "\n\t" \ + "wfe" cond ".n", \ + \ + "nop.w" \ +) #else #define SEV ALT_SMP("sev", "nop") #define WFE(cond) ALT_SMP("wfe" cond, "nop") -- cgit v1.2.2