aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/include
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2013-10-21 01:57:41 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2013-10-24 11:17:13 -0400
commitf84cd97e5c61fe3ce66af838dd955dee02076639 (patch)
treea3477ba33ab836d729b395741f80618ccfdab166 /arch/s390/include
parent0702fbf572ac5e513873628bf534da4a8a2025b4 (diff)
s390/percpu: make use of interlocked-access facility 1 instructions
Optimize this_cpu_* functions for 64 bit by making use of new instructions that came with the interlocked-access facility 1 (load-and-*) and the general-instructions-extension facility (asi, agsi). That way we get rid of the compare-and-swap loop in most cases. Code size reduction (defconfig, -march=z196): 11,555 bytes. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/include')
-rw-r--r--arch/s390/include/asm/percpu.h125
1 files changed, 100 insertions, 25 deletions
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 41baca870d0c..061ab45faf70 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -21,7 +21,11 @@
21#define ARCH_NEEDS_WEAK_PER_CPU 21#define ARCH_NEEDS_WEAK_PER_CPU
22#endif 22#endif
23 23
24#define arch_this_cpu_to_op(pcp, val, op) \ 24/*
25 * We use a compare-and-swap loop since that uses less cpu cycles than
26 * disabling and enabling interrupts like the generic variant would do.
27 */
28#define arch_this_cpu_to_op_simple(pcp, val, op) \
25({ \ 29({ \
26 typedef typeof(pcp) pcp_op_T__; \ 30 typedef typeof(pcp) pcp_op_T__; \
27 pcp_op_T__ old__, new__, prev__; \ 31 pcp_op_T__ old__, new__, prev__; \
@@ -38,30 +42,101 @@
38 new__; \ 42 new__; \
39}) 43})
40 44
41#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +) 45#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
42#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +) 46#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
43#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +) 47#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
44#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +) 48#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
45 49#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
46#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op(pcp, val, +) 50#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
47#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op(pcp, val, +) 51#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
48#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op(pcp, val, +) 52#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
49#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op(pcp, val, +) 53#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, ^)
50 54#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, ^)
51#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &) 55
52#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &) 56#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
53#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &) 57
54#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, &) 58#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
55 59#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
56#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op(pcp, val, |) 60#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
57#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op(pcp, val, |) 61#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
58#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, |) 62#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
59#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, |) 63#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
60 64#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
61#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op(pcp, val, ^) 65#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
62#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op(pcp, val, ^) 66#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, ^)
63#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^) 67#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, ^)
64#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^) 68
69#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
70
71#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \
72{ \
73 typedef typeof(pcp) pcp_op_T__; \
74 pcp_op_T__ val__ = (val); \
75 pcp_op_T__ old__, *ptr__; \
76 preempt_disable(); \
77 ptr__ = __this_cpu_ptr(&(pcp)); \
78 if (__builtin_constant_p(val__) && \
79 ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \
80 asm volatile( \
81 op2 " %[ptr__],%[val__]\n" \
82 : [ptr__] "+Q" (*ptr__) \
83 : [val__] "i" ((szcast)val__) \
84 : "cc"); \
85 } else { \
86 asm volatile( \
87 op1 " %[old__],%[val__],%[ptr__]\n" \
88 : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
89 : [val__] "d" (val__) \
90 : "cc"); \
91 } \
92 preempt_enable(); \
93}
94
95#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
96#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long)
97
98#define arch_this_cpu_add_return(pcp, val, op) \
99({ \
100 typedef typeof(pcp) pcp_op_T__; \
101 pcp_op_T__ val__ = (val); \
102 pcp_op_T__ old__, *ptr__; \
103 preempt_disable(); \
104 ptr__ = __this_cpu_ptr(&(pcp)); \
105 asm volatile( \
106 op " %[old__],%[val__],%[ptr__]\n" \
107 : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
108 : [val__] "d" (val__) \
109 : "cc"); \
110 preempt_enable(); \
111 old__ + val__; \
112})
113
114#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa")
115#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag")
116
117#define arch_this_cpu_to_op(pcp, val, op) \
118{ \
119 typedef typeof(pcp) pcp_op_T__; \
120 pcp_op_T__ val__ = (val); \
121 pcp_op_T__ old__, *ptr__; \
122 preempt_disable(); \
123 ptr__ = __this_cpu_ptr(&(pcp)); \
124 asm volatile( \
125 op " %[old__],%[val__],%[ptr__]\n" \
126 : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
127 : [val__] "d" (val__) \
128 : "cc"); \
129 preempt_enable(); \
130}
131
132#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan")
133#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, "lang")
134#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lao")
135#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laog")
136#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lax")
137#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laxg")
138
139#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
65 140
66#define arch_this_cpu_cmpxchg(pcp, oval, nval) \ 141#define arch_this_cpu_cmpxchg(pcp, oval, nval) \
67({ \ 142({ \