diff options
author | Christoph Lameter <cl@linux-foundation.org> | 2010-01-05 01:34:50 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-01-05 01:34:50 -0500 |
commit | 5917dae83cb02dfe74c9167b79e86e6d65183fa3 (patch) | |
tree | ffd3a8c0b0c27868600f2a426ad14c0885bf4d7d /arch | |
parent | 79615760f380ec86cd58204744e774c33fab9211 (diff) |
percpu, x86: Generic inc / dec percpu instructions
Optimize code generated for percpu access by checking for increment and
decrements.
tj: fix incorrect usage of __builtin_constant_p() and restructure
percpu_add_op() macro.
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/percpu.h | 86 |
1 files changed, 72 insertions, 14 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 4c170ccc72ed..66a272dfd8b8 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -103,6 +103,64 @@ do { \ | |||
103 | } \ | 103 | } \ |
104 | } while (0) | 104 | } while (0) |
105 | 105 | ||
106 | /* | ||
107 | * Generate a percpu add to memory instruction and optimize code | ||
108 | * if a one is added or subtracted. | ||
109 | */ | ||
110 | #define percpu_add_op(var, val) \ | ||
111 | do { \ | ||
112 | typedef typeof(var) pao_T__; \ | ||
113 | const int pao_ID__ = (__builtin_constant_p(val) && \ | ||
114 | ((val) == 1 || (val) == -1)) ? (val) : 0; \ | ||
115 | if (0) { \ | ||
116 | pao_T__ pao_tmp__; \ | ||
117 | pao_tmp__ = (val); \ | ||
118 | } \ | ||
119 | switch (sizeof(var)) { \ | ||
120 | case 1: \ | ||
121 | if (pao_ID__ == 1) \ | ||
122 | asm("incb "__percpu_arg(0) : "+m" (var)); \ | ||
123 | else if (pao_ID__ == -1) \ | ||
124 | asm("decb "__percpu_arg(0) : "+m" (var)); \ | ||
125 | else \ | ||
126 | asm("addb %1, "__percpu_arg(0) \ | ||
127 | : "+m" (var) \ | ||
128 | : "qi" ((pao_T__)(val))); \ | ||
129 | break; \ | ||
130 | case 2: \ | ||
131 | if (pao_ID__ == 1) \ | ||
132 | asm("incw "__percpu_arg(0) : "+m" (var)); \ | ||
133 | else if (pao_ID__ == -1) \ | ||
134 | asm("decw "__percpu_arg(0) : "+m" (var)); \ | ||
135 | else \ | ||
136 | asm("addw %1, "__percpu_arg(0) \ | ||
137 | : "+m" (var) \ | ||
138 | : "ri" ((pao_T__)(val))); \ | ||
139 | break; \ | ||
140 | case 4: \ | ||
141 | if (pao_ID__ == 1) \ | ||
142 | asm("incl "__percpu_arg(0) : "+m" (var)); \ | ||
143 | else if (pao_ID__ == -1) \ | ||
144 | asm("decl "__percpu_arg(0) : "+m" (var)); \ | ||
145 | else \ | ||
146 | asm("addl %1, "__percpu_arg(0) \ | ||
147 | : "+m" (var) \ | ||
148 | : "ri" ((pao_T__)(val))); \ | ||
149 | break; \ | ||
150 | case 8: \ | ||
151 | if (pao_ID__ == 1) \ | ||
152 | asm("incq "__percpu_arg(0) : "+m" (var)); \ | ||
153 | else if (pao_ID__ == -1) \ | ||
154 | asm("decq "__percpu_arg(0) : "+m" (var)); \ | ||
155 | else \ | ||
156 | asm("addq %1, "__percpu_arg(0) \ | ||
157 | : "+m" (var) \ | ||
158 | : "re" ((pao_T__)(val))); \ | ||
159 | break; \ | ||
160 | default: __bad_percpu_size(); \ | ||
161 | } \ | ||
162 | } while (0) | ||
163 | |||
106 | #define percpu_from_op(op, var, constraint) \ | 164 | #define percpu_from_op(op, var, constraint) \ |
107 | ({ \ | 165 | ({ \ |
108 | typeof(var) pfo_ret__; \ | 166 | typeof(var) pfo_ret__; \ |
@@ -144,8 +202,8 @@ do { \ | |||
144 | #define percpu_read(var) percpu_from_op("mov", var, "m" (var)) | 202 | #define percpu_read(var) percpu_from_op("mov", var, "m" (var)) |
145 | #define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) | 203 | #define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) |
146 | #define percpu_write(var, val) percpu_to_op("mov", var, val) | 204 | #define percpu_write(var, val) percpu_to_op("mov", var, val) |
147 | #define percpu_add(var, val) percpu_to_op("add", var, val) | 205 | #define percpu_add(var, val) percpu_add_op(var, val) |
148 | #define percpu_sub(var, val) percpu_to_op("sub", var, val) | 206 | #define percpu_sub(var, val) percpu_add_op(var, -(val)) |
149 | #define percpu_and(var, val) percpu_to_op("and", var, val) | 207 | #define percpu_and(var, val) percpu_to_op("and", var, val) |
150 | #define percpu_or(var, val) percpu_to_op("or", var, val) | 208 | #define percpu_or(var, val) percpu_to_op("or", var, val) |
151 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) | 209 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) |
@@ -157,9 +215,9 @@ do { \ | |||
157 | #define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) | 215 | #define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) |
158 | #define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) | 216 | #define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) |
159 | #define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) | 217 | #define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) |
160 | #define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) | 218 | #define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
161 | #define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) | 219 | #define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
162 | #define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) | 220 | #define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
163 | #define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 221 | #define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
164 | #define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 222 | #define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
165 | #define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 223 | #define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
@@ -176,9 +234,9 @@ do { \ | |||
176 | #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) | 234 | #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) |
177 | #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) | 235 | #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) |
178 | #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) | 236 | #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) |
179 | #define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) | 237 | #define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
180 | #define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) | 238 | #define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
181 | #define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) | 239 | #define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
182 | #define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 240 | #define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
183 | #define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 241 | #define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
184 | #define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 242 | #define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
@@ -189,9 +247,9 @@ do { \ | |||
189 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 247 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
190 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 248 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
191 | 249 | ||
192 | #define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) | 250 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
193 | #define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) | 251 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
194 | #define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) | 252 | #define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
195 | #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 253 | #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
196 | #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 254 | #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
197 | #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 255 | #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
@@ -209,19 +267,19 @@ do { \ | |||
209 | #ifdef CONFIG_X86_64 | 267 | #ifdef CONFIG_X86_64 |
210 | #define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 268 | #define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
211 | #define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 269 | #define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
212 | #define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) | 270 | #define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
213 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 271 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
214 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 272 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
215 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 273 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
216 | 274 | ||
217 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 275 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
218 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 276 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
219 | #define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) | 277 | #define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
220 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 278 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
221 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 279 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
222 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 280 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
223 | 281 | ||
224 | #define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) | 282 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
225 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 283 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
226 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 284 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
227 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 285 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |