aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2014-04-07 18:39:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 19:36:13 -0400
commitb3ca1c10d7b32fdfdfaf5484eda486323f52d9be (patch)
tree6d00dff881bd055b650f5bc6dc85824b97ab954c /arch/x86
parent54b6a731025f9528d44945a72b1f4e5946bb2d80 (diff)
percpu: add raw_cpu_ops
The kernel has never been audited to ensure that this_cpu operations are consistently used throughout the kernel. The code generated in many places can be improved through the use of this_cpu operations (which uses a segment register for relocation of per cpu offsets instead of performing address calculations). The patch set also addresses various consistency issues in general with the per cpu macros. A. The semantics of __this_cpu_ptr() differs from this_cpu_ptr only because checks are skipped. This is typically shown through a raw_ prefix. So this patch set changes the places where __this_cpu_ptr() is used to raw_cpu_ptr(). B. There has been the long term wish by some that __this_cpu operations would check for preemption. However, there are cases where preemption checks need to be skipped. This patch set adds raw_cpu operations that do not check for preemption and then adds preemption checks to the __this_cpu operations. C. The use of __get_cpu_var is always a reference to a percpu variable that can also be handled via a this_cpu operation. This patch set replaces all uses of __get_cpu_var with this_cpu operations. D. We can then use this_cpu RMW operations in various places replacing sequences of instructions by a single one. E. The use of this_cpu operations throughout will allow other arches than x86 to implement optimized references and RMV operations to work with per cpu local data. F. The use of this_cpu operations opens up the possibility to further optimize code that relies on synchronization through per cpu data. The patch set works in a couple of stages: I. Patch 1 adds the additional raw_cpu operations and raw_cpu_ptr(). Also converts the existing __this_cpu_xx_# primitive in the x86 code to raw_cpu_xx_#. II. Patch 2-4 use the raw_cpu operations in places that would give us false positives once they are enabled. III. Patch 5 adds preemption checks to __this_cpu operations to allow checking if preemption is properly disabled when these functions are used. IV. Patches 6-20 are patches that simply replace uses of __get_cpu_var with this_cpu_ptr. They do not depend on any changes to the percpu code. No preemption tests are skipped if they are applied. V. Patches 21-46 are conversion patches that use this_cpu operations in various kernel subsystems/drivers or arch code. VI. Patches 47/48 (not included in this series) remove no longer used functions (__this_cpu_ptr and __get_cpu_var). These should only be applied after all the conversion patches have made it and after we have done additional passes through the kernel to ensure that none of the uses of these functions remain. This patch (of 46): The patches following this one will add preemption checks to __this_cpu ops so we need to have an alternative way to use this_cpu operations without preemption checks. raw_cpu_ops will be the basis for all other ops since these will be the operations that do not implement any checks. Primitive operations are renamed by this patch from __this_cpu_xxx to raw_cpu_xxxx. Also change the uses of the x86 percpu primitives in preempt.h. These depend directly on asm/percpu.h (header #include nesting issue). Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Christoph Lameter <cl@linux.com> Acked-by: Ingo Molnar <mingo@kernel.org> Cc: Tejun Heo <tj@kernel.org> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Alex Shi <alex.shi@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Bryan Wu <cooloney@gmail.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: David Daney <david.daney@cavium.com> Cc: David Miller <davem@davemloft.net> Cc: David S. Miller <davem@davemloft.net> Cc: Dimitri Sivanich <sivanich@sgi.com> Cc: Dipankar Sarma <dipankar@in.ibm.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@linux.intel.com> Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Hedi Berriche <hedi@sgi.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: James Hogan <james.hogan@imgtec.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: John Stultz <john.stultz@linaro.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Mike Frysinger <vapier@gentoo.org> Cc: Mike Travis <travis@sgi.com> Cc: Neil Brown <neilb@suse.de> Cc: Nicolas Pitre <nicolas.pitre@linaro.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Rafael J. Wysocki <rjw@sisk.pl> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Richard Henderson <rth@twiddle.net> Cc: Robert Richter <rric@kernel.org> Cc: Russell King <linux@arm.linux.org.uk> Cc: Russell King <rmk+kernel@arm.linux.org.uk> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Wim Van Sebroeck <wim@iguana.be> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/percpu.h98
-rw-r--r--arch/x86/include/asm/preempt.h16
2 files changed, 57 insertions, 57 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 94220d14d5cc..851bcdc5db04 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -52,7 +52,7 @@
52 * Compared to the generic __my_cpu_offset version, the following 52 * Compared to the generic __my_cpu_offset version, the following
53 * saves one instruction and avoids clobbering a temp register. 53 * saves one instruction and avoids clobbering a temp register.
54 */ 54 */
55#define __this_cpu_ptr(ptr) \ 55#define raw_cpu_ptr(ptr) \
56({ \ 56({ \
57 unsigned long tcp_ptr__; \ 57 unsigned long tcp_ptr__; \
58 __verify_pcpu_ptr(ptr); \ 58 __verify_pcpu_ptr(ptr); \
@@ -362,25 +362,25 @@ do { \
362 */ 362 */
363#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) 363#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
364 364
365#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 365#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
366#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 366#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
367#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 367#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
368 368
369#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 369#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
370#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 370#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
371#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) 371#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
372#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) 372#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
373#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) 373#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
374#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) 374#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
375#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) 375#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
376#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) 376#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
377#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) 377#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
378#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) 378#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
379#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) 379#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
380#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) 380#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
381#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) 381#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
382#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) 382#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
383#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) 383#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
384 384
385#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 385#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
386#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 386#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -401,16 +401,16 @@ do { \
401#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) 401#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
402#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) 402#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
403 403
404#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 404#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
405#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 405#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
406#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 406#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
407#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 407#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
408#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 408#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
409#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 409#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
410 410
411#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 411#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
412#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 412#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
413#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 413#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
414#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 414#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
415#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 415#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
416#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 416#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
@@ -427,7 +427,7 @@ do { \
427 __ret; \ 427 __ret; \
428}) 428})
429 429
430#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double 430#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
431#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double 431#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
432#endif /* CONFIG_X86_CMPXCHG64 */ 432#endif /* CONFIG_X86_CMPXCHG64 */
433 433
@@ -436,22 +436,22 @@ do { \
436 * 32 bit must fall back to generic operations. 436 * 32 bit must fall back to generic operations.
437 */ 437 */
438#ifdef CONFIG_X86_64 438#ifdef CONFIG_X86_64
439#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 439#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
440#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 440#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
441#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 441#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
442#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 442#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
443#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 443#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
444#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 444#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
445#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 445#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
446#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 446#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
447 447
448#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 448#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
449#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 449#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
450#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 450#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
451#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 451#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
452#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 452#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
453#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 453#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
454#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 454#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
455#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 455#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
456 456
457/* 457/*
@@ -474,7 +474,7 @@ do { \
474 __ret; \ 474 __ret; \
475}) 475})
476 476
477#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double 477#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
478#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double 478#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
479 479
480#endif 480#endif
@@ -495,9 +495,9 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
495 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; 495 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
496 496
497#ifdef CONFIG_X86_64 497#ifdef CONFIG_X86_64
498 return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0; 498 return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
499#else 499#else
500 return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0; 500 return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;
501#endif 501#endif
502} 502}
503 503
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index c8b051933b1b..7024c12f7bfe 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -19,12 +19,12 @@ DECLARE_PER_CPU(int, __preempt_count);
19 */ 19 */
20static __always_inline int preempt_count(void) 20static __always_inline int preempt_count(void)
21{ 21{
22 return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; 22 return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
23} 23}
24 24
25static __always_inline void preempt_count_set(int pc) 25static __always_inline void preempt_count_set(int pc)
26{ 26{
27 __this_cpu_write_4(__preempt_count, pc); 27 raw_cpu_write_4(__preempt_count, pc);
28} 28}
29 29
30/* 30/*
@@ -53,17 +53,17 @@ static __always_inline void preempt_count_set(int pc)
53 53
54static __always_inline void set_preempt_need_resched(void) 54static __always_inline void set_preempt_need_resched(void)
55{ 55{
56 __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); 56 raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
57} 57}
58 58
59static __always_inline void clear_preempt_need_resched(void) 59static __always_inline void clear_preempt_need_resched(void)
60{ 60{
61 __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); 61 raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
62} 62}
63 63
64static __always_inline bool test_preempt_need_resched(void) 64static __always_inline bool test_preempt_need_resched(void)
65{ 65{
66 return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); 66 return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
67} 67}
68 68
69/* 69/*
@@ -72,12 +72,12 @@ static __always_inline bool test_preempt_need_resched(void)
72 72
73static __always_inline void __preempt_count_add(int val) 73static __always_inline void __preempt_count_add(int val)
74{ 74{
75 __this_cpu_add_4(__preempt_count, val); 75 raw_cpu_add_4(__preempt_count, val);
76} 76}
77 77
78static __always_inline void __preempt_count_sub(int val) 78static __always_inline void __preempt_count_sub(int val)
79{ 79{
80 __this_cpu_add_4(__preempt_count, -val); 80 raw_cpu_add_4(__preempt_count, -val);
81} 81}
82 82
83/* 83/*
@@ -95,7 +95,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
95 */ 95 */
96static __always_inline bool should_resched(void) 96static __always_inline bool should_resched(void)
97{ 97{
98 return unlikely(!__this_cpu_read_4(__preempt_count)); 98 return unlikely(!raw_cpu_read_4(__preempt_count));
99} 99}
100 100
101#ifdef CONFIG_PREEMPT 101#ifdef CONFIG_PREEMPT