aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2010-12-14 11:28:44 -0500
committerTejun Heo <tj@kernel.org>2010-12-18 09:54:04 -0500
commit7296e08abac0a22a2534a4f6e493c764f2c77583 (patch)
tree3aa79d3046bf227b14d1f718fa097f87f6257fb9
parent2b7124428561c7c3cfa4a58cc4c6feea53f3148e (diff)
x86: this_cpu_cmpxchg and this_cpu_xchg operations
Provide support as far as the hardware capabilities of the x86 cpus allow. Define CONFIG_CMPXCHG_LOCAL in Kconfig.cpu to allow core code to test for fast cpuops implementations. V1->V2: - Take out the definition for this_cpu_cmpxchg_8 and move it into a separate patch. tj: - Reordered ops to better follow this_cpu_* organization. - Renamed macro temp variables similar to their existing neighbours. Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/include/asm/percpu.h107
2 files changed, 109 insertions, 1 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2ac9069890cd..15588a0ef466 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT
310config X86_CMPXCHG 310config X86_CMPXCHG
311 def_bool X86_64 || (X86_32 && !M386) 311 def_bool X86_64 || (X86_32 && !M386)
312 312
313config CMPXCHG_LOCAL
314 def_bool X86_64 || (X86_32 && !M386)
315
313config X86_L1_CACHE_SHIFT 316config X86_L1_CACHE_SHIFT
314 int 317 int
315 default "7" if MPENTIUM4 || MPSC 318 default "7" if MPENTIUM4 || MPSC
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index dd0cd4b6a76f..b85ade511a53 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -263,6 +263,83 @@ do { \
263}) 263})
264 264
265/* 265/*
266 * Beware: xchg on x86 has an implied lock prefix. There will be the cost of
267 * full lock semantics even though they are not needed.
268 */
269#define percpu_xchg_op(var, nval) \
270({ \
271 typeof(var) pxo_ret__; \
272 typeof(var) pxo_new__ = (nval); \
273 switch (sizeof(var)) { \
274 case 1: \
275 asm("xchgb %2, "__percpu_arg(1) \
276 : "=a" (pxo_ret__), "+m" (var) \
277 : "q" (pxo_new__) \
278 : "memory"); \
279 break; \
280 case 2: \
281 asm("xchgw %2, "__percpu_arg(1) \
282 : "=a" (pxo_ret__), "+m" (var) \
283 : "r" (pxo_new__) \
284 : "memory"); \
285 break; \
286 case 4: \
287 asm("xchgl %2, "__percpu_arg(1) \
288 : "=a" (pxo_ret__), "+m" (var) \
289 : "r" (pxo_new__) \
290 : "memory"); \
291 break; \
292 case 8: \
293 asm("xchgq %2, "__percpu_arg(1) \
294 : "=a" (pxo_ret__), "+m" (var) \
295 : "r" (pxo_new__) \
296 : "memory"); \
297 break; \
298 default: __bad_percpu_size(); \
299 } \
300 pxo_ret__; \
301})
302
303/*
304 * cmpxchg has no such implied lock semantics as a result it is much
305 * more efficient for cpu local operations.
306 */
307#define percpu_cmpxchg_op(var, oval, nval) \
308({ \
309 typeof(var) pco_ret__; \
310 typeof(var) pco_old__ = (oval); \
311 typeof(var) pco_new__ = (nval); \
312 switch (sizeof(var)) { \
313 case 1: \
314 asm("cmpxchgb %2, "__percpu_arg(1) \
315 : "=a" (pco_ret__), "+m" (var) \
316 : "q" (pco_new__), "0" (pco_old__) \
317 : "memory"); \
318 break; \
319 case 2: \
320 asm("cmpxchgw %2, "__percpu_arg(1) \
321 : "=a" (pco_ret__), "+m" (var) \
322 : "r" (pco_new__), "0" (pco_old__) \
323 : "memory"); \
324 break; \
325 case 4: \
326 asm("cmpxchgl %2, "__percpu_arg(1) \
327 : "=a" (pco_ret__), "+m" (var) \
328 : "r" (pco_new__), "0" (pco_old__) \
329 : "memory"); \
330 break; \
331 case 8: \
332 asm("cmpxchgq %2, "__percpu_arg(1) \
333 : "=a" (pco_ret__), "+m" (var) \
334 : "r" (pco_new__), "0" (pco_old__) \
335 : "memory"); \
336 break; \
337 default: __bad_percpu_size(); \
338 } \
339 pco_ret__; \
340})
341
342/*
266 * percpu_read() makes gcc load the percpu variable every time it is 343 * percpu_read() makes gcc load the percpu variable every time it is
267 * accessed while percpu_read_stable() allows the value to be cached. 344 * accessed while percpu_read_stable() allows the value to be cached.
268 * percpu_read_stable() is more efficient and can be used if its value 345 * percpu_read_stable() is more efficient and can be used if its value
@@ -300,6 +377,12 @@ do { \
300#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) 377#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
301#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) 378#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
302#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) 379#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
380/*
381 * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
382 * faster than an xchg with forced lock semantics.
383 */
384#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
385#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
303 386
304#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 387#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
305#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 388#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -319,6 +402,11 @@ do { \
319#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) 402#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
320#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) 403#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
321#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) 404#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
405#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
406#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
407#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
408#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
409#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
322 410
323#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) 411#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
324#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) 412#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
@@ -332,15 +420,32 @@ do { \
332#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) 420#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
333#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) 421#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
334#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) 422#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
423#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
424#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
425#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
426#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
427#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
335 428
336#ifndef CONFIG_M386 429#ifndef CONFIG_M386
337#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 430#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
338#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 431#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
339#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 432#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
433#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
434#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
435#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
436
340#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 437#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
341#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 438#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
342#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 439#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
343#endif 440#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
441#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
442#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
443
444#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
445#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
446#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
447#endif /* !CONFIG_M386 */
448
344/* 449/*
345 * Per cpu atomic 64 bit operations are only available under 64 bit. 450 * Per cpu atomic 64 bit operations are only available under 64 bit.
346 * 32 bit must fall back to generic operations. 451 * 32 bit must fall back to generic operations.