diff options
author | Alexander van Heukelum <heukelum@mailshack.com> | 2008-03-11 11:17:19 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-26 13:21:16 -0400 |
commit | 64970b68d2b3ed32b964b0b30b1b98518fde388e (patch) | |
tree | 7d8eb5ea3ab1a841afa0f7ae1c65e7be4a9ca690 /include/linux/bitops.h | |
parent | 60b6783a044a55273b637983f52965c2808a6b86 (diff) |
x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps
This moves an optimization for searching constant-sized small
bitmaps form x86_64-specific to generic code.
On an i386 defconfig (the x86#testing one), the size of vmlinux hardly
changes with this applied. I have observed only four places where this
optimization avoids a call into find_next_bit:
In the functions return_unused_surplus_pages, alloc_fresh_huge_page,
and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap.
In __next_cpu a call is avoided for a 32-bit bitmap. That's it.
On x86_64, 52 locations are optimized with a minimal increase in
code size:
Current #testing defconfig:
146 x bsf, 27 x find_next_*bit
text data bss dec hex filename
5392637 846592 724424 6963653 6a41c5 vmlinux
After removing the x86_64 specific optimization for find_next_*bit:
94 x bsf, 79 x find_next_*bit
text data bss dec hex filename
5392358 846592 724424 6963374 6a40ae vmlinux
After this patch (making the optimization generic):
146 x bsf, 27 x find_next_*bit
text data bss dec hex filename
5392396 846592 724424 6963412 6a40d4 vmlinux
[ tglx@linutronix.de: build fixes ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include/linux/bitops.h')
-rw-r--r-- | include/linux/bitops.h | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 40d54731de7e..3865f2c93bd8 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h | |||
@@ -112,4 +112,81 @@ static inline unsigned fls_long(unsigned long l) | |||
112 | return fls64(l); | 112 | return fls64(l); |
113 | } | 113 | } |
114 | 114 | ||
115 | #ifdef __KERNEL__ | ||
116 | #ifdef CONFIG_GENERIC_FIND_NEXT_BIT | ||
117 | extern unsigned long __find_next_bit(const unsigned long *addr, | ||
118 | unsigned long size, unsigned long offset); | ||
119 | |||
120 | /** | ||
121 | * find_next_bit - find the next set bit in a memory region | ||
122 | * @addr: The address to base the search on | ||
123 | * @offset: The bitnumber to start searching at | ||
124 | * @size: The bitmap size in bits | ||
125 | */ | ||
126 | static __always_inline unsigned long | ||
127 | find_next_bit(const unsigned long *addr, unsigned long size, | ||
128 | unsigned long offset) | ||
129 | { | ||
130 | unsigned long value; | ||
131 | |||
132 | /* Avoid a function call if the bitmap size is a constant */ | ||
133 | /* and not bigger than BITS_PER_LONG. */ | ||
134 | |||
135 | /* insert a sentinel so that __ffs returns size if there */ | ||
136 | /* are no set bits in the bitmap */ | ||
137 | if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { | ||
138 | value = (*addr) & ((~0ul) << offset); | ||
139 | value |= (1ul << size); | ||
140 | return __ffs(value); | ||
141 | } | ||
142 | |||
143 | /* the result of __ffs(0) is undefined, so it needs to be */ | ||
144 | /* handled separately */ | ||
145 | if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { | ||
146 | value = (*addr) & ((~0ul) << offset); | ||
147 | return (value == 0) ? BITS_PER_LONG : __ffs(value); | ||
148 | } | ||
149 | |||
150 | /* size is not constant or too big */ | ||
151 | return __find_next_bit(addr, size, offset); | ||
152 | } | ||
153 | |||
154 | extern unsigned long __find_next_zero_bit(const unsigned long *addr, | ||
155 | unsigned long size, unsigned long offset); | ||
156 | |||
157 | /** | ||
158 | * find_next_zero_bit - find the next cleared bit in a memory region | ||
159 | * @addr: The address to base the search on | ||
160 | * @offset: The bitnumber to start searching at | ||
161 | * @size: The bitmap size in bits | ||
162 | */ | ||
163 | static __always_inline unsigned long | ||
164 | find_next_zero_bit(const unsigned long *addr, unsigned long size, | ||
165 | unsigned long offset) | ||
166 | { | ||
167 | unsigned long value; | ||
168 | |||
169 | /* Avoid a function call if the bitmap size is a constant */ | ||
170 | /* and not bigger than BITS_PER_LONG. */ | ||
171 | |||
172 | /* insert a sentinel so that __ffs returns size if there */ | ||
173 | /* are no set bits in the bitmap */ | ||
174 | if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { | ||
175 | value = (~(*addr)) & ((~0ul) << offset); | ||
176 | value |= (1ul << size); | ||
177 | return __ffs(value); | ||
178 | } | ||
179 | |||
180 | /* the result of __ffs(0) is undefined, so it needs to be */ | ||
181 | /* handled separately */ | ||
182 | if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { | ||
183 | value = (~(*addr)) & ((~0ul) << offset); | ||
184 | return (value == 0) ? BITS_PER_LONG : __ffs(value); | ||
185 | } | ||
186 | |||
187 | /* size is not constant or too big */ | ||
188 | return __find_next_zero_bit(addr, size, offset); | ||
189 | } | ||
190 | #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ | ||
191 | #endif /* __KERNEL__ */ | ||
115 | #endif | 192 | #endif |