ARCv2: Implement atomic64 based on LLOCKD/SCONDD instructions

ARCv2 ISA provides 64-bit exclusive load/stores so use them to implement the 64-bit atomics and elide the spinlock based generic 64-bit atomics boot tested with atomic64 self-test (and GOD bless the person who wrote them, I realized my inline assmebly is sloppy as hell) Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
author: Vineet Gupta <vgupta@synopsys.com> 2015-07-27 07:53:28 -0400
committer: Vineet Gupta <vgupta@synopsys.com> 2016-09-30 17:48:17 -0400
commit: ce6365270ecd1216b48fb1440978e454ae0144de (patch)
tree: 2e46017d408be15aa5834675cd11c06122fc353c
parent: 26c01c49d559268527d78f45a6818fae0c204a45 (diff)
2 files changed, 260 insertions, 3 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 0d3e59f56974..073b3582544b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -13,7 +13,7 @@ config ARC
        select CLKSRC_OF
        select CLONE_BACKWARDS
        select COMMON_CLK
-        select GENERIC_ATOMIC64
+        select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
        select GENERIC_CLOCKEVENTS
        select GENERIC_FIND_FIRST_BIT
        # for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 4e3c1b6b0806..d0e222e3776b 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -20,6 +20,7 @@
 #ifndef CONFIG_ARC_PLAT_EZNPS
 #define atomic_read(v)  READ_ONCE((v)->counter)
+#define ATOMIC_INIT(i)  { (i) }
 #ifdef CONFIG_ARC_HAS_LLSC
@@ -343,10 +344,266 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #define atomic_add_negative(i, v)       (atomic_add_return(i, v) < 0)
-#define ATOMIC_INIT(i)                  { (i) }
+#ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
-#endif
+#else   /* Kconfig ensures this is only enabled with needed h/w assist */
+/*
+ * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
+ *  - The address HAS to be 64-bit aligned
+ *  - There are 2 semantics involved here:
+ *    = exclusive implies no interim update between load/store to same addr
+ *    = both words are observed/updated together: this is guaranteed even
+ *      for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
+ *      is NOT required to use LLOCKD+SCONDD, STD suffices
+ */
+typedef struct {
+        aligned_u64 counter;
+} atomic64_t;
+#define ATOMIC64_INIT(a) { (a) }
+static inline long long atomic64_read(const atomic64_t *v)
+{
+        unsigned long long val;
+        __asm__ __volatile__(
+        "       ldd   %0, [%1]  \n"
+        : "=r"(val)
+        : "r"(&v->counter));
+        return val;
+}
+static inline void atomic64_set(atomic64_t *v, long long a)
+{
+        /*
+         * This could have been a simple assignment in "C" but would need
+         * explicit volatile. Otherwise gcc optimizers could elide the store
+         * which borked atomic64 self-test
+         * In the inline asm version, memory clobber needed for exact same
+         * reason, to tell gcc about the store.
+         *
+         * This however is not needed for sibling atomic64_add() etc since both
+         * load/store are explicitly done in inline asm. As long as API is used
+         * for each access, gcc has no way to optimize away any load/store
+         */
+        __asm__ __volatile__(
+        "       std   %0, [%1]  \n"
+        :
+        : "r"(a), "r"(&v->counter)
+        : "memory");
+}
+#define ATOMIC64_OP(op, op1, op2)                                       \
+static inline void atomic64_##op(long long a, atomic64_t *v)            \
+{                                                                       \
+        unsigned long long val;                                         \
+                                                                        \
+        __asm__ __volatile__(                                           \
+        "1:                             \n"                             \
+        "       llockd  %0, [%1]        \n"                             \
+        "       " #op1 " %L0, %L0, %L2  \n"                             \
+        "       " #op2 " %H0, %H0, %H2  \n"                             \
+        "       scondd   %0, [%1]       \n"                             \
+        "       bnz     1b              \n"                             \
+        : "=&r"(val)                                                    \
+        : "r"(&v->counter), "ir"(a)                                     \
+        : "cc");                                                \
+}                                                                       \
+#define ATOMIC64_OP_RETURN(op, op1, op2)                                \
+static inline long long atomic64_##op##_return(long long a, atomic64_t *v)      \
+{                                                                       \
+        unsigned long long val;                                         \
+                                                                        \
+        smp_mb();                                                       \
+                                                                        \
+        __asm__ __volatile__(                                           \
+        "1:                             \n"                             \
+        "       llockd   %0, [%1]       \n"                             \
+        "       " #op1 " %L0, %L0, %L2  \n"                             \
+        "       " #op2 " %H0, %H0, %H2  \n"                             \
+        "       scondd   %0, [%1]       \n"                             \
+        "       bnz     1b              \n"                             \
+        : [val] "=&r"(val)                                              \
+        : "r"(&v->counter), "ir"(a)                                     \
+        : "cc");        /* memory clobber comes from smp_mb() */        \
+                                                                        \
+        smp_mb();                                                       \
+                                                                        \
+        return val;                                                     \
+}
+#define ATOMIC64_FETCH_OP(op, op1, op2)                                 \
+static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+{                                                                       \
+        unsigned long long val, orig;                                   \
+                                                                        \
+        smp_mb();                                                       \
+                                                                        \
+        __asm__ __volatile__(                                           \
+        "1:                             \n"                             \
+        "       llockd   %0, [%2]       \n"                             \
+        "       " #op1 " %L1, %L0, %L3  \n"                             \
+        "       " #op2 " %H1, %H0, %H3  \n"                             \
+        "       scondd   %1, [%2]       \n"                             \
+        "       bnz     1b              \n"                             \
+        : "=&r"(orig), "=&r"(val)                                       \
+        : "r"(&v->counter), "ir"(a)                                     \
+        : "cc");        /* memory clobber comes from smp_mb() */        \
+                                                                        \
+        smp_mb();                                                       \
+                                                                        \
+        return orig;                                                    \
+}
+#define ATOMIC64_OPS(op, op1, op2)                                      \
+        ATOMIC64_OP(op, op1, op2)                                       \
+        ATOMIC64_OP_RETURN(op, op1, op2)                                \
+        ATOMIC64_FETCH_OP(op, op1, op2)
+#define atomic64_andnot atomic64_andnot
+ATOMIC64_OPS(add, add.f, adc)
+ATOMIC64_OPS(sub, sub.f, sbc)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, or, or)
+ATOMIC64_OPS(xor, xor, xor)
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+static inline long long
+atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
+{
+        long long prev;
+        smp_mb();
+        __asm__ __volatile__(
+        "1:     llockd  %0, [%1]        \n"
+        "       brne    %L0, %L2, 2f    \n"
+        "       brne    %H0, %H2, 2f    \n"
+        "       scondd  %3, [%1]        \n"
+        "       bnz     1b              \n"
+        "2:                             \n"
+        : "=&r"(prev)
+        : "r"(ptr), "ir"(expected), "r"(new)
+        : "cc");        /* memory clobber comes from smp_mb() */
+        smp_mb();
+        return prev;
+}
+static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
+{
+        long long prev;
+        smp_mb();
+        __asm__ __volatile__(
+        "1:     llockd  %0, [%1]        \n"
+        "       scondd  %2, [%1]        \n"
+        "       bnz     1b              \n"
+        "2:                             \n"
+        : "=&r"(prev)
+        : "r"(ptr), "r"(new)
+        : "cc");        /* memory clobber comes from smp_mb() */
+        smp_mb();
+        return prev;
+}
+/**
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline long long atomic64_dec_if_positive(atomic64_t *v)
+{
+        long long val;
+        smp_mb();
+        __asm__ __volatile__(
+        "1:     llockd  %0, [%1]        \n"
+        "       sub.f   %L0, %L0, 1     # w0 - 1, set C on borrow\n"
+        "       sub.c   %H0, %H0, 1     # if C set, w1 - 1\n"
+        "       brlt    %H0, 0, 2f      \n"
+        "       scondd  %0, [%1]        \n"
+        "       bnz     1b              \n"
+        "2:                             \n"
+        : "=&r"(val)
+        : "r"(&v->counter)
+        : "cc");        /* memory clobber comes from smp_mb() */
+        smp_mb();
+        return val;
+}
+/**
+ * atomic64_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * if (v != u) { v += a; ret = 1} else {ret = 0}
+ * Returns 1 iff @v was not @u (i.e. if add actually happened)
+ */
+static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+        long long val;
+        int op_done;
+        smp_mb();
+        __asm__ __volatile__(
+        "1:     llockd  %0, [%2]        \n"
+        "       mov     %1, 1           \n"
+        "       brne    %L0, %L4, 2f    # continue to add since v != u \n"
+        "       breq.d  %H0, %H4, 3f    # return since v == u \n"
+        "       mov     %1, 0           \n"
+        "2:                             \n"
+        "       add.f   %L0, %L0, %L3   \n"
+        "       adc     %H0, %H0, %H3   \n"
+        "       scondd  %0, [%2]        \n"
+        "       bnz     1b              \n"
+        "3:                             \n"
+        : "=&r"(val), "=&r" (op_done)
+        : "r"(&v->counter), "r"(a), "r"(u)
+        : "cc");        /* memory clobber comes from smp_mb() */
+        smp_mb();
+        return op_done;
+}
+#define atomic64_add_negative(a, v)     (atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v)                 atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)          atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v)        (atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v)     (atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v)                 atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)          atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)        (atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v)        atomic64_add_unless((v), 1LL, 0LL)
+#endif  /* !CONFIG_GENERIC_ATOMIC64 */
+#endif  /* !__ASSEMBLY__ */
 #endif
author	Vineet Gupta <vgupta@synopsys.com>	2015-07-27 07:53:28 -0400
committer	Vineet Gupta <vgupta@synopsys.com>	2016-09-30 17:48:17 -0400
commit	ce6365270ecd1216b48fb1440978e454ae0144de (patch)
tree	2e46017d408be15aa5834675cd11c06122fc353c
parent	26c01c49d559268527d78f45a6818fae0c204a45 (diff)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 0d3e59f56974..073b3582544b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig
@@ -13,7 +13,7 @@ config ARC
13	select CLKSRC_OF	13	select CLKSRC_OF
14	select CLONE_BACKWARDS	14	select CLONE_BACKWARDS
15	select COMMON_CLK	15	select COMMON_CLK
16	select GENERIC_ATOMIC64	16	select GENERIC_ATOMIC64 if !ISA_ARCV2 \|\| !(ARC_HAS_LL64 && ARC_HAS_LLSC)
17	select GENERIC_CLOCKEVENTS	17	select GENERIC_CLOCKEVENTS
18	select GENERIC_FIND_FIRST_BIT	18	select GENERIC_FIND_FIRST_BIT
19	# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP	19	# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP


diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 4e3c1b6b0806..d0e222e3776b 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h
@@ -20,6 +20,7 @@
20	#ifndef CONFIG_ARC_PLAT_EZNPS	20	#ifndef CONFIG_ARC_PLAT_EZNPS
21		21
22	#define atomic_read(v) READ_ONCE((v)->counter)	22	#define atomic_read(v) READ_ONCE((v)->counter)
		23	#define ATOMIC_INIT(i) { (i) }
23		24
24	#ifdef CONFIG_ARC_HAS_LLSC	25	#ifdef CONFIG_ARC_HAS_LLSC
25		26
@@ -343,10 +344,266 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
343		344
344	#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)	345	#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
345		346
346	#define ATOMIC_INIT(i) { (i) }	347
		348	#ifdef CONFIG_GENERIC_ATOMIC64
347		349
348	#include <asm-generic/atomic64.h>	350	#include <asm-generic/atomic64.h>
349		351
350	#endif	352	#else /* Kconfig ensures this is only enabled with needed h/w assist */
		353
		354	/*
		355	* ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
		356	* - The address HAS to be 64-bit aligned
		357	* - There are 2 semantics involved here:
		358	* = exclusive implies no interim update between load/store to same addr
		359	* = both words are observed/updated together: this is guaranteed even
		360	* for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
		361	* is NOT required to use LLOCKD+SCONDD, STD suffices
		362	*/
		363
		364	typedef struct {
		365	aligned_u64 counter;
		366	} atomic64_t;
		367
		368	#define ATOMIC64_INIT(a) { (a) }
		369
		370	static inline long long atomic64_read(const atomic64_t *v)
		371	{
		372	unsigned long long val;
		373
		374	__asm__ __volatile__(
		375	" ldd %0, [%1] \n"
		376	: "=r"(val)
		377	: "r"(&v->counter));
		378
		379	return val;
		380	}
		381
		382	static inline void atomic64_set(atomic64_t *v, long long a)
		383	{
		384	/*
		385	* This could have been a simple assignment in "C" but would need
		386	* explicit volatile. Otherwise gcc optimizers could elide the store
		387	* which borked atomic64 self-test
		388	* In the inline asm version, memory clobber needed for exact same
		389	* reason, to tell gcc about the store.
		390	*
		391	* This however is not needed for sibling atomic64_add() etc since both
		392	* load/store are explicitly done in inline asm. As long as API is used
		393	* for each access, gcc has no way to optimize away any load/store
		394	*/
		395	__asm__ __volatile__(
		396	" std %0, [%1] \n"
		397	:
		398	: "r"(a), "r"(&v->counter)
		399	: "memory");
		400	}
		401
		402	#define ATOMIC64_OP(op, op1, op2) \
		403	static inline void atomic64_##op(long long a, atomic64_t *v) \
		404	{ \
		405	unsigned long long val; \
		406	\
		407	__asm__ __volatile__( \
		408	"1: \n" \
		409	" llockd %0, [%1] \n" \
		410	" " #op1 " %L0, %L0, %L2 \n" \
		411	" " #op2 " %H0, %H0, %H2 \n" \
		412	" scondd %0, [%1] \n" \
		413	" bnz 1b \n" \
		414	: "=&r"(val) \
		415	: "r"(&v->counter), "ir"(a) \
		416	: "cc"); \
		417	} \
		418
		419	#define ATOMIC64_OP_RETURN(op, op1, op2) \
		420	static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \
		421	{ \
		422	unsigned long long val; \
		423	\
		424	smp_mb(); \
		425	\
		426	__asm__ __volatile__( \
		427	"1: \n" \
		428	" llockd %0, [%1] \n" \
		429	" " #op1 " %L0, %L0, %L2 \n" \
		430	" " #op2 " %H0, %H0, %H2 \n" \
		431	" scondd %0, [%1] \n" \
		432	" bnz 1b \n" \
		433	: [val] "=&r"(val) \
		434	: "r"(&v->counter), "ir"(a) \
		435	: "cc"); /* memory clobber comes from smp_mb() */ \
		436	\
		437	smp_mb(); \
		438	\
		439	return val; \
		440	}
		441
		442	#define ATOMIC64_FETCH_OP(op, op1, op2) \
		443	static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \
		444	{ \
		445	unsigned long long val, orig; \
		446	\
		447	smp_mb(); \
		448	\
		449	__asm__ __volatile__( \
		450	"1: \n" \
		451	" llockd %0, [%2] \n" \
		452	" " #op1 " %L1, %L0, %L3 \n" \
		453	" " #op2 " %H1, %H0, %H3 \n" \
		454	" scondd %1, [%2] \n" \
		455	" bnz 1b \n" \
		456	: "=&r"(orig), "=&r"(val) \
		457	: "r"(&v->counter), "ir"(a) \
		458	: "cc"); /* memory clobber comes from smp_mb() */ \
		459	\
		460	smp_mb(); \
		461	\
		462	return orig; \
		463	}
		464
		465	#define ATOMIC64_OPS(op, op1, op2) \
		466	ATOMIC64_OP(op, op1, op2) \
		467	ATOMIC64_OP_RETURN(op, op1, op2) \
		468	ATOMIC64_FETCH_OP(op, op1, op2)
		469
		470	#define atomic64_andnot atomic64_andnot
		471
		472	ATOMIC64_OPS(add, add.f, adc)
		473	ATOMIC64_OPS(sub, sub.f, sbc)
		474	ATOMIC64_OPS(and, and, and)
		475	ATOMIC64_OPS(andnot, bic, bic)
		476	ATOMIC64_OPS(or, or, or)
		477	ATOMIC64_OPS(xor, xor, xor)
		478
		479	#undef ATOMIC64_OPS
		480	#undef ATOMIC64_FETCH_OP
		481	#undef ATOMIC64_OP_RETURN
		482	#undef ATOMIC64_OP
		483
		484	static inline long long
		485	atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
		486	{
		487	long long prev;
		488
		489	smp_mb();
		490
		491	__asm__ __volatile__(
		492	"1: llockd %0, [%1] \n"
		493	" brne %L0, %L2, 2f \n"
		494	" brne %H0, %H2, 2f \n"
		495	" scondd %3, [%1] \n"
		496	" bnz 1b \n"
		497	"2: \n"
		498	: "=&r"(prev)
		499	: "r"(ptr), "ir"(expected), "r"(new)
		500	: "cc"); /* memory clobber comes from smp_mb() */
		501
		502	smp_mb();
		503
		504	return prev;
		505	}
		506
		507	static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
		508	{
		509	long long prev;
		510
		511	smp_mb();
		512
		513	__asm__ __volatile__(
		514	"1: llockd %0, [%1] \n"
		515	" scondd %2, [%1] \n"
		516	" bnz 1b \n"
		517	"2: \n"
		518	: "=&r"(prev)
		519	: "r"(ptr), "r"(new)
		520	: "cc"); /* memory clobber comes from smp_mb() */
		521
		522	smp_mb();
		523
		524	return prev;
		525	}
		526
		527	/**
		528	* atomic64_dec_if_positive - decrement by 1 if old value positive
		529	* @v: pointer of type atomic64_t
		530	*
		531	* The function returns the old value of *v minus 1, even if
		532	* the atomic variable, v, was not decremented.
		533	*/
		534
		535	static inline long long atomic64_dec_if_positive(atomic64_t *v)
		536	{
		537	long long val;
		538
		539	smp_mb();
		540
		541	__asm__ __volatile__(
		542	"1: llockd %0, [%1] \n"
		543	" sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n"
		544	" sub.c %H0, %H0, 1 # if C set, w1 - 1\n"
		545	" brlt %H0, 0, 2f \n"
		546	" scondd %0, [%1] \n"
		547	" bnz 1b \n"
		548	"2: \n"
		549	: "=&r"(val)
		550	: "r"(&v->counter)
		551	: "cc"); /* memory clobber comes from smp_mb() */
		552
		553	smp_mb();
		554
		555	return val;
		556	}
		557
		558	/**
		559	* atomic64_add_unless - add unless the number is a given value
		560	* @v: pointer of type atomic64_t
		561	* @a: the amount to add to v...
		562	* @u: ...unless v is equal to u.
		563	*
		564	* if (v != u) { v += a; ret = 1} else {ret = 0}
		565	* Returns 1 iff @v was not @u (i.e. if add actually happened)
		566	*/
		567	static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
		568	{
		569	long long val;
		570	int op_done;
		571
		572	smp_mb();
		573
		574	__asm__ __volatile__(
		575	"1: llockd %0, [%2] \n"
		576	" mov %1, 1 \n"
		577	" brne %L0, %L4, 2f # continue to add since v != u \n"
		578	" breq.d %H0, %H4, 3f # return since v == u \n"
		579	" mov %1, 0 \n"
		580	"2: \n"
		581	" add.f %L0, %L0, %L3 \n"
		582	" adc %H0, %H0, %H3 \n"
		583	" scondd %0, [%2] \n"
		584	" bnz 1b \n"
		585	"3: \n"
		586	: "=&r"(val), "=&r" (op_done)
		587	: "r"(&v->counter), "r"(a), "r"(u)
		588	: "cc"); /* memory clobber comes from smp_mb() */
		589
		590	smp_mb();
		591
		592	return op_done;
		593	}
		594
		595	#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
		596	#define atomic64_inc(v) atomic64_add(1LL, (v))
		597	#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
		598	#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
		599	#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
		600	#define atomic64_dec(v) atomic64_sub(1LL, (v))
		601	#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
		602	#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
		603	#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
		604
		605	#endif /* !CONFIG_GENERIC_ATOMIC64 */
		606
		607	#endif /* !__ASSEMBLY__ */
351		608
352	#endif	609	#endif