aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-06-19 00:03:26 -0400
committerIngo Molnar <mingo@elte.hu>2008-06-19 07:45:51 -0400
commit1a750e0cd7a30c478723ecfa1df685efcdd38a90 (patch)
tree9c0326febfd0124cca3b35ebef9d5b4f85fbaeee
parent5136dea5734cfddbc6d7ccb7ead85a3ac7ce3de2 (diff)
x86, bitops: make constant-bit set/clear_bit ops faster
On Wed, 18 Jun 2008, Linus Torvalds wrote: > > And yes, the "lock andl" should be noticeably faster than the xchgl. I dunno. Here's a untested (!!) patch that turns constant-bit set/clear_bit ops into byte mask ops (lock orb/andb). It's not exactly pretty. The reason for using the byte versions is that a locked op is serialized in the memory pipeline anyway, so there are no forwarding issues (that could slow down things when we access things with different sizes), and the byte ops are a lot smaller than 32-bit and particularly 64-bit ops (big constants, and the 64-bit ops need the REX prefix byte too). [ Side note: I wonder if we should turn the "test_bit()" C version into a "char *" version too.. It could actually help with alias analysis, since char pointers can alias anything. So it might be the RightThing(tm) to do for multiple reasons. I dunno. It's a separate issue. ] It does actually shrink the kernel image a bit (a couple of hundred bytes on the text segment for my everything-compiled-in image), and while it's totally untested the (admittedly few) code generation points I looked at seemed sane. And "lock orb" should be noticeably faster than "lock bts". If somebody wants to play with it, go wild. I didn't do "change_bit()", because nobody sane uses that thing anyway. I guarantee nothing. And if it breaks, nobody saw me do anything. You can't prove this email wasn't sent by somebody who is good at forging smtp. This does require a gcc that is recent enough for "__builtin_constant_p()" to work in an inline function, but I suspect our kernel requirements are already higher than that. And if you do have an old gcc that is supported, the worst that would happen is that the optimization doesn't trigger. Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/asm-x86/bitops.h28
1 files changed, 23 insertions, 5 deletions
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 7d2494bdc660..ab7635a4acd9 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -23,11 +23,22 @@
23#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) 23#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
24/* Technically wrong, but this avoids compilation errors on some gcc 24/* Technically wrong, but this avoids compilation errors on some gcc
25 versions. */ 25 versions. */
26#define ADDR "=m" (*(volatile long *) addr) 26#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
27#else 27#else
28#define ADDR "+m" (*(volatile long *) addr) 28#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
29#endif 29#endif
30 30
31#define ADDR BITOP_ADDR(addr)
32
33/*
34 * We do the locked ops that don't return the old value as
35 * a mask operation on a byte.
36 */
37#define IS_IMMEDIATE(nr) \
38 (__builtin_constant_p(nr))
39#define CONST_MASK_ADDR BITOP_ADDR(addr + (nr>>3))
40#define CONST_MASK (1 << (nr & 7))
41
31/** 42/**
32 * set_bit - Atomically set a bit in memory 43 * set_bit - Atomically set a bit in memory
33 * @nr: the bit to set 44 * @nr: the bit to set
@@ -43,11 +54,15 @@
43 * Note that @nr may be almost arbitrarily large; this function is not 54 * Note that @nr may be almost arbitrarily large; this function is not
44 * restricted to acting on a single-word quantity. 55 * restricted to acting on a single-word quantity.
45 */ 56 */
46static inline void set_bit(int nr, volatile unsigned long *addr) 57static inline void set_bit(unsigned int nr, volatile unsigned long *addr)
47{ 58{
48 asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory"); 59 if (IS_IMMEDIATE(nr))
60 asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR : "i" (CONST_MASK) : "memory");
61 else
62 asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory");
49} 63}
50 64
65
51/** 66/**
52 * __set_bit - Set a bit in memory 67 * __set_bit - Set a bit in memory
53 * @nr: the bit to set 68 * @nr: the bit to set
@@ -74,7 +89,10 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
74 */ 89 */
75static inline void clear_bit(int nr, volatile unsigned long *addr) 90static inline void clear_bit(int nr, volatile unsigned long *addr)
76{ 91{
77 asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr)); 92 if (IS_IMMEDIATE(nr))
93 asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR : "i" (~CONST_MASK));
94 else
95 asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr));
78} 96}
79 97
80/* 98/*