diff options
| author | Alexander van Heukelum <heukelum@mailshack.com> | 2008-03-11 11:17:19 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-04-26 13:21:16 -0400 |
| commit | 64970b68d2b3ed32b964b0b30b1b98518fde388e (patch) | |
| tree | 7d8eb5ea3ab1a841afa0f7ae1c65e7be4a9ca690 | |
| parent | 60b6783a044a55273b637983f52965c2808a6b86 (diff) | |
x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps
This moves an optimization for searching constant-sized small
bitmaps form x86_64-specific to generic code.
On an i386 defconfig (the x86#testing one), the size of vmlinux hardly
changes with this applied. I have observed only four places where this
optimization avoids a call into find_next_bit:
In the functions return_unused_surplus_pages, alloc_fresh_huge_page,
and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap.
In __next_cpu a call is avoided for a 32-bit bitmap. That's it.
On x86_64, 52 locations are optimized with a minimal increase in
code size:
Current #testing defconfig:
146 x bsf, 27 x find_next_*bit
text data bss dec hex filename
5392637 846592 724424 6963653 6a41c5 vmlinux
After removing the x86_64 specific optimization for find_next_*bit:
94 x bsf, 79 x find_next_*bit
text data bss dec hex filename
5392358 846592 724424 6963374 6a40ae vmlinux
After this patch (making the optimization generic):
146 x bsf, 27 x find_next_*bit
text data bss dec hex filename
5392396 846592 724424 6963412 6a40d4 vmlinux
[ tglx@linutronix.de: build fixes ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | include/asm-generic/bitops/find.h | 2 | ||||
| -rw-r--r-- | include/asm-x86/bitops.h | 6 | ||||
| -rw-r--r-- | include/asm-x86/bitops_64.h | 10 | ||||
| -rw-r--r-- | include/linux/bitops.h | 77 | ||||
| -rw-r--r-- | lib/find_next_bit.c | 25 |
5 files changed, 88 insertions, 32 deletions
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 72a51e5a12ef..1914e9742512 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h | |||
| @@ -1,11 +1,13 @@ | |||
| 1 | #ifndef _ASM_GENERIC_BITOPS_FIND_H_ | 1 | #ifndef _ASM_GENERIC_BITOPS_FIND_H_ |
| 2 | #define _ASM_GENERIC_BITOPS_FIND_H_ | 2 | #define _ASM_GENERIC_BITOPS_FIND_H_ |
| 3 | 3 | ||
| 4 | #ifndef CONFIG_GENERIC_FIND_NEXT_BIT | ||
| 4 | extern unsigned long find_next_bit(const unsigned long *addr, unsigned long | 5 | extern unsigned long find_next_bit(const unsigned long *addr, unsigned long |
| 5 | size, unsigned long offset); | 6 | size, unsigned long offset); |
| 6 | 7 | ||
| 7 | extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned | 8 | extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned |
| 8 | long size, unsigned long offset); | 9 | long size, unsigned long offset); |
| 10 | #endif | ||
| 9 | 11 | ||
| 10 | #define find_first_bit(addr, size) find_next_bit((addr), (size), 0) | 12 | #define find_first_bit(addr, size) find_next_bit((addr), (size), 0) |
| 11 | #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) | 13 | #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) |
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index 31e408de90c6..1ae7b270a1ef 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h | |||
| @@ -306,12 +306,6 @@ static int test_bit(int nr, const volatile unsigned long *addr); | |||
| 306 | #undef BIT_ADDR | 306 | #undef BIT_ADDR |
| 307 | #undef ADDR | 307 | #undef ADDR |
| 308 | 308 | ||
| 309 | unsigned long find_next_bit(const unsigned long *addr, | ||
| 310 | unsigned long size, unsigned long offset); | ||
| 311 | unsigned long find_next_zero_bit(const unsigned long *addr, | ||
| 312 | unsigned long size, unsigned long offset); | ||
| 313 | |||
| 314 | |||
| 315 | #ifdef CONFIG_X86_32 | 309 | #ifdef CONFIG_X86_32 |
| 316 | # include "bitops_32.h" | 310 | # include "bitops_32.h" |
| 317 | #else | 311 | #else |
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h index 65b20fb2ae78..7118ef2cc4ec 100644 --- a/include/asm-x86/bitops_64.h +++ b/include/asm-x86/bitops_64.h | |||
| @@ -15,16 +15,6 @@ static inline long __scanbit(unsigned long val, unsigned long max) | |||
| 15 | return val; | 15 | return val; |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | #define find_next_bit(addr,size,off) \ | ||
| 19 | ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ | ||
| 20 | ((off) + (__scanbit((*(unsigned long *)addr) >> (off),(size)-(off)))) : \ | ||
| 21 | find_next_bit(addr,size,off))) | ||
| 22 | |||
| 23 | #define find_next_zero_bit(addr,size,off) \ | ||
| 24 | ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ | ||
| 25 | ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \ | ||
| 26 | find_next_zero_bit(addr,size,off))) | ||
| 27 | |||
| 28 | #define find_first_bit(addr, size) \ | 18 | #define find_first_bit(addr, size) \ |
| 29 | ((__builtin_constant_p((size)) && (size) <= BITS_PER_LONG \ | 19 | ((__builtin_constant_p((size)) && (size) <= BITS_PER_LONG \ |
| 30 | ? (__scanbit(*(unsigned long *)(addr), (size))) \ | 20 | ? (__scanbit(*(unsigned long *)(addr), (size))) \ |
diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 40d54731de7e..3865f2c93bd8 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h | |||
| @@ -112,4 +112,81 @@ static inline unsigned fls_long(unsigned long l) | |||
| 112 | return fls64(l); | 112 | return fls64(l); |
| 113 | } | 113 | } |
| 114 | 114 | ||
| 115 | #ifdef __KERNEL__ | ||
| 116 | #ifdef CONFIG_GENERIC_FIND_NEXT_BIT | ||
| 117 | extern unsigned long __find_next_bit(const unsigned long *addr, | ||
| 118 | unsigned long size, unsigned long offset); | ||
| 119 | |||
| 120 | /** | ||
| 121 | * find_next_bit - find the next set bit in a memory region | ||
| 122 | * @addr: The address to base the search on | ||
| 123 | * @offset: The bitnumber to start searching at | ||
| 124 | * @size: The bitmap size in bits | ||
| 125 | */ | ||
| 126 | static __always_inline unsigned long | ||
| 127 | find_next_bit(const unsigned long *addr, unsigned long size, | ||
| 128 | unsigned long offset) | ||
| 129 | { | ||
| 130 | unsigned long value; | ||
| 131 | |||
| 132 | /* Avoid a function call if the bitmap size is a constant */ | ||
| 133 | /* and not bigger than BITS_PER_LONG. */ | ||
| 134 | |||
| 135 | /* insert a sentinel so that __ffs returns size if there */ | ||
| 136 | /* are no set bits in the bitmap */ | ||
| 137 | if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { | ||
| 138 | value = (*addr) & ((~0ul) << offset); | ||
| 139 | value |= (1ul << size); | ||
| 140 | return __ffs(value); | ||
| 141 | } | ||
| 142 | |||
| 143 | /* the result of __ffs(0) is undefined, so it needs to be */ | ||
| 144 | /* handled separately */ | ||
| 145 | if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { | ||
| 146 | value = (*addr) & ((~0ul) << offset); | ||
| 147 | return (value == 0) ? BITS_PER_LONG : __ffs(value); | ||
| 148 | } | ||
| 149 | |||
| 150 | /* size is not constant or too big */ | ||
| 151 | return __find_next_bit(addr, size, offset); | ||
| 152 | } | ||
| 153 | |||
| 154 | extern unsigned long __find_next_zero_bit(const unsigned long *addr, | ||
| 155 | unsigned long size, unsigned long offset); | ||
| 156 | |||
| 157 | /** | ||
| 158 | * find_next_zero_bit - find the next cleared bit in a memory region | ||
| 159 | * @addr: The address to base the search on | ||
| 160 | * @offset: The bitnumber to start searching at | ||
| 161 | * @size: The bitmap size in bits | ||
| 162 | */ | ||
| 163 | static __always_inline unsigned long | ||
| 164 | find_next_zero_bit(const unsigned long *addr, unsigned long size, | ||
| 165 | unsigned long offset) | ||
| 166 | { | ||
| 167 | unsigned long value; | ||
| 168 | |||
| 169 | /* Avoid a function call if the bitmap size is a constant */ | ||
| 170 | /* and not bigger than BITS_PER_LONG. */ | ||
| 171 | |||
| 172 | /* insert a sentinel so that __ffs returns size if there */ | ||
| 173 | /* are no set bits in the bitmap */ | ||
| 174 | if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { | ||
| 175 | value = (~(*addr)) & ((~0ul) << offset); | ||
| 176 | value |= (1ul << size); | ||
| 177 | return __ffs(value); | ||
| 178 | } | ||
| 179 | |||
| 180 | /* the result of __ffs(0) is undefined, so it needs to be */ | ||
| 181 | /* handled separately */ | ||
| 182 | if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { | ||
| 183 | value = (~(*addr)) & ((~0ul) << offset); | ||
| 184 | return (value == 0) ? BITS_PER_LONG : __ffs(value); | ||
| 185 | } | ||
| 186 | |||
| 187 | /* size is not constant or too big */ | ||
| 188 | return __find_next_zero_bit(addr, size, offset); | ||
| 189 | } | ||
| 190 | #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ | ||
| 191 | #endif /* __KERNEL__ */ | ||
| 115 | #endif | 192 | #endif |
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index 5820e072b890..ce94c4c92d10 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c | |||
| @@ -15,17 +15,12 @@ | |||
| 15 | #include <asm/byteorder.h> | 15 | #include <asm/byteorder.h> |
| 16 | 16 | ||
| 17 | #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) | 17 | #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) |
| 18 | #undef find_next_bit | 18 | |
| 19 | #undef find_next_zero_bit | 19 | /* |
| 20 | 20 | * Find the next set bit in a memory region. | |
| 21 | /** | ||
| 22 | * find_next_bit - find the next set bit in a memory region | ||
| 23 | * @addr: The address to base the search on | ||
| 24 | * @offset: The bitnumber to start searching at | ||
| 25 | * @size: The maximum size to search | ||
| 26 | */ | 21 | */ |
| 27 | unsigned long find_next_bit(const unsigned long *addr, unsigned long size, | 22 | unsigned long __find_next_bit(const unsigned long *addr, |
| 28 | unsigned long offset) | 23 | unsigned long size, unsigned long offset) |
| 29 | { | 24 | { |
| 30 | const unsigned long *p = addr + BITOP_WORD(offset); | 25 | const unsigned long *p = addr + BITOP_WORD(offset); |
| 31 | unsigned long result = offset & ~(BITS_PER_LONG-1); | 26 | unsigned long result = offset & ~(BITS_PER_LONG-1); |
| @@ -62,15 +57,14 @@ found_first: | |||
| 62 | found_middle: | 57 | found_middle: |
| 63 | return result + __ffs(tmp); | 58 | return result + __ffs(tmp); |
| 64 | } | 59 | } |
| 65 | 60 | EXPORT_SYMBOL(__find_next_bit); | |
| 66 | EXPORT_SYMBOL(find_next_bit); | ||
| 67 | 61 | ||
| 68 | /* | 62 | /* |
| 69 | * This implementation of find_{first,next}_zero_bit was stolen from | 63 | * This implementation of find_{first,next}_zero_bit was stolen from |
| 70 | * Linus' asm-alpha/bitops.h. | 64 | * Linus' asm-alpha/bitops.h. |
| 71 | */ | 65 | */ |
| 72 | unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, | 66 | unsigned long __find_next_zero_bit(const unsigned long *addr, |
| 73 | unsigned long offset) | 67 | unsigned long size, unsigned long offset) |
| 74 | { | 68 | { |
| 75 | const unsigned long *p = addr + BITOP_WORD(offset); | 69 | const unsigned long *p = addr + BITOP_WORD(offset); |
| 76 | unsigned long result = offset & ~(BITS_PER_LONG-1); | 70 | unsigned long result = offset & ~(BITS_PER_LONG-1); |
| @@ -107,8 +101,7 @@ found_first: | |||
| 107 | found_middle: | 101 | found_middle: |
| 108 | return result + ffz(tmp); | 102 | return result + ffz(tmp); |
| 109 | } | 103 | } |
| 110 | 104 | EXPORT_SYMBOL(__find_next_zero_bit); | |
| 111 | EXPORT_SYMBOL(find_next_zero_bit); | ||
| 112 | 105 | ||
| 113 | #ifdef __BIG_ENDIAN | 106 | #ifdef __BIG_ENDIAN |
| 114 | 107 | ||
