diff options
author | Alexander van Heukelum <heukelum@mailshack.com> | 2008-03-09 16:01:04 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-26 13:21:16 -0400 |
commit | 6fd92b63d0626a8fe7eb8e2e50d19ecaa18cb412 (patch) | |
tree | 840f6cf998c251cdfcae37a03f34b34ac5bea3a5 /arch | |
parent | 18e413f7193ed2f6991d959863f46330813aa242 (diff) |
x86: change x86 to use generic find_next_bit
The versions with inline assembly are in fact slower on the machines I
tested them on (in userspace) (Athlon XP 2800+, p4-like Xeon 2.8GHz, AMD
Opteron 270). The i386-version needed a fix similar to 06024f21 to avoid
crashing the benchmark.
Benchmark using: gcc -fomit-frame-pointer -Os. For each bitmap size
1...512, for each possible bitmap with one bit set, for each possible
offset: find the position of the first bit starting at offset. If you
follow ;). Times include setup of the bitmap and checking of the
results.
Athlon Xeon Opteron 32/64bit
x86-specific: 0m3.692s 0m2.820s 0m3.196s / 0m2.480s
generic: 0m2.622s 0m1.662s 0m2.100s / 0m1.572s
If the bitmap size is not a multiple of BITS_PER_LONG, and no set
(cleared) bit is found, find_next_bit (find_next_zero_bit) returns a
value outside of the range [0, size]. The generic version always returns
exactly size. The generic version also uses unsigned long everywhere,
while the x86 versions use a mishmash of int, unsigned (int), long and
unsigned long.
Using the generic version does give a slightly bigger kernel, though.
defconfig: text data bss dec hex filename
x86-specific: 4738555 481232 626688 5846475 5935cb vmlinux (32 bit)
generic: 4738621 481232 626688 5846541 59360d vmlinux (32 bit)
x86-specific: 5392395 846568 724424 6963387 6a40bb vmlinux (64 bit)
generic: 5392458 846568 724424 6963450 6a40fa vmlinux (64 bit)
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/lib/bitops_32.c | 70 | ||||
-rw-r--r-- | arch/x86/lib/bitops_64.c | 68 |
4 files changed, 4 insertions, 139 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fadf794483d..5639de47ed45 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -77,6 +77,9 @@ config GENERIC_BUG | |||
77 | def_bool y | 77 | def_bool y |
78 | depends on BUG | 78 | depends on BUG |
79 | 79 | ||
80 | config GENERIC_FIND_NEXT_BIT | ||
81 | def_bool y | ||
82 | |||
80 | config GENERIC_HWEIGHT | 83 | config GENERIC_HWEIGHT |
81 | def_bool y | 84 | def_bool y |
82 | 85 | ||
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 25df1c1989fe..436093299bd3 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -11,7 +11,7 @@ lib-y += memcpy_$(BITS).o | |||
11 | ifeq ($(CONFIG_X86_32),y) | 11 | ifeq ($(CONFIG_X86_32),y) |
12 | lib-y += checksum_32.o | 12 | lib-y += checksum_32.o |
13 | lib-y += strstr_32.o | 13 | lib-y += strstr_32.o |
14 | lib-y += bitops_32.o semaphore_32.o string_32.o | 14 | lib-y += semaphore_32.o string_32.o |
15 | 15 | ||
16 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | 16 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o |
17 | else | 17 | else |
diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c deleted file mode 100644 index b65440459859..000000000000 --- a/arch/x86/lib/bitops_32.c +++ /dev/null | |||
@@ -1,70 +0,0 @@ | |||
1 | #include <linux/bitops.h> | ||
2 | #include <linux/module.h> | ||
3 | |||
4 | /** | ||
5 | * find_next_bit - find the next set bit in a memory region | ||
6 | * @addr: The address to base the search on | ||
7 | * @offset: The bitnumber to start searching at | ||
8 | * @size: The maximum size to search | ||
9 | */ | ||
10 | int find_next_bit(const unsigned long *addr, int size, int offset) | ||
11 | { | ||
12 | const unsigned long *p = addr + (offset >> 5); | ||
13 | int set = 0, bit = offset & 31, res; | ||
14 | |||
15 | if (bit) { | ||
16 | /* | ||
17 | * Look for nonzero in the first 32 bits: | ||
18 | */ | ||
19 | __asm__("bsfl %1,%0\n\t" | ||
20 | "jne 1f\n\t" | ||
21 | "movl $32, %0\n" | ||
22 | "1:" | ||
23 | : "=r" (set) | ||
24 | : "r" (*p >> bit)); | ||
25 | if (set < (32 - bit)) | ||
26 | return set + offset; | ||
27 | set = 32 - bit; | ||
28 | p++; | ||
29 | } | ||
30 | /* | ||
31 | * No set bit yet, search remaining full words for a bit | ||
32 | */ | ||
33 | res = find_first_bit (p, size - 32 * (p - addr)); | ||
34 | return (offset + set + res); | ||
35 | } | ||
36 | EXPORT_SYMBOL(find_next_bit); | ||
37 | |||
38 | /** | ||
39 | * find_next_zero_bit - find the first zero bit in a memory region | ||
40 | * @addr: The address to base the search on | ||
41 | * @offset: The bitnumber to start searching at | ||
42 | * @size: The maximum size to search | ||
43 | */ | ||
44 | int find_next_zero_bit(const unsigned long *addr, int size, int offset) | ||
45 | { | ||
46 | const unsigned long *p = addr + (offset >> 5); | ||
47 | int set = 0, bit = offset & 31, res; | ||
48 | |||
49 | if (bit) { | ||
50 | /* | ||
51 | * Look for zero in the first 32 bits. | ||
52 | */ | ||
53 | __asm__("bsfl %1,%0\n\t" | ||
54 | "jne 1f\n\t" | ||
55 | "movl $32, %0\n" | ||
56 | "1:" | ||
57 | : "=r" (set) | ||
58 | : "r" (~(*p >> bit))); | ||
59 | if (set < (32 - bit)) | ||
60 | return set + offset; | ||
61 | set = 32 - bit; | ||
62 | p++; | ||
63 | } | ||
64 | /* | ||
65 | * No zero yet, search remaining full bytes for a zero | ||
66 | */ | ||
67 | res = find_first_zero_bit(p, size - 32 * (p - addr)); | ||
68 | return (offset + set + res); | ||
69 | } | ||
70 | EXPORT_SYMBOL(find_next_zero_bit); | ||
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c index 0e8f491e6ccc..0eeb704d2513 100644 --- a/arch/x86/lib/bitops_64.c +++ b/arch/x86/lib/bitops_64.c | |||
@@ -1,9 +1,7 @@ | |||
1 | #include <linux/bitops.h> | 1 | #include <linux/bitops.h> |
2 | 2 | ||
3 | #undef find_first_zero_bit | 3 | #undef find_first_zero_bit |
4 | #undef find_next_zero_bit | ||
5 | #undef find_first_bit | 4 | #undef find_first_bit |
6 | #undef find_next_bit | ||
7 | 5 | ||
8 | static inline long | 6 | static inline long |
9 | __find_first_zero_bit(const unsigned long * addr, unsigned long size) | 7 | __find_first_zero_bit(const unsigned long * addr, unsigned long size) |
@@ -57,39 +55,6 @@ long find_first_zero_bit(const unsigned long * addr, unsigned long size) | |||
57 | return __find_first_zero_bit (addr, size); | 55 | return __find_first_zero_bit (addr, size); |
58 | } | 56 | } |
59 | 57 | ||
60 | /** | ||
61 | * find_next_zero_bit - find the next zero bit in a memory region | ||
62 | * @addr: The address to base the search on | ||
63 | * @offset: The bitnumber to start searching at | ||
64 | * @size: The maximum size to search | ||
65 | */ | ||
66 | long find_next_zero_bit (const unsigned long * addr, long size, long offset) | ||
67 | { | ||
68 | const unsigned long * p = addr + (offset >> 6); | ||
69 | unsigned long set = 0; | ||
70 | unsigned long res, bit = offset&63; | ||
71 | |||
72 | if (bit) { | ||
73 | /* | ||
74 | * Look for zero in first word | ||
75 | */ | ||
76 | asm("bsfq %1,%0\n\t" | ||
77 | "cmoveq %2,%0" | ||
78 | : "=r" (set) | ||
79 | : "r" (~(*p >> bit)), "r"(64L)); | ||
80 | if (set < (64 - bit)) | ||
81 | return set + offset; | ||
82 | set = 64 - bit; | ||
83 | p++; | ||
84 | } | ||
85 | /* | ||
86 | * No zero yet, search remaining full words for a zero | ||
87 | */ | ||
88 | res = __find_first_zero_bit (p, size - 64 * (p - addr)); | ||
89 | |||
90 | return (offset + set + res); | ||
91 | } | ||
92 | |||
93 | static inline long | 58 | static inline long |
94 | __find_first_bit(const unsigned long * addr, unsigned long size) | 59 | __find_first_bit(const unsigned long * addr, unsigned long size) |
95 | { | 60 | { |
@@ -136,40 +101,7 @@ long find_first_bit(const unsigned long * addr, unsigned long size) | |||
136 | return __find_first_bit(addr,size); | 101 | return __find_first_bit(addr,size); |
137 | } | 102 | } |
138 | 103 | ||
139 | /** | ||
140 | * find_next_bit - find the first set bit in a memory region | ||
141 | * @addr: The address to base the search on | ||
142 | * @offset: The bitnumber to start searching at | ||
143 | * @size: The maximum size to search | ||
144 | */ | ||
145 | long find_next_bit(const unsigned long * addr, long size, long offset) | ||
146 | { | ||
147 | const unsigned long * p = addr + (offset >> 6); | ||
148 | unsigned long set = 0, bit = offset & 63, res; | ||
149 | |||
150 | if (bit) { | ||
151 | /* | ||
152 | * Look for nonzero in the first 64 bits: | ||
153 | */ | ||
154 | asm("bsfq %1,%0\n\t" | ||
155 | "cmoveq %2,%0\n\t" | ||
156 | : "=r" (set) | ||
157 | : "r" (*p >> bit), "r" (64L)); | ||
158 | if (set < (64 - bit)) | ||
159 | return set + offset; | ||
160 | set = 64 - bit; | ||
161 | p++; | ||
162 | } | ||
163 | /* | ||
164 | * No set bit yet, search remaining full words for a bit | ||
165 | */ | ||
166 | res = __find_first_bit (p, size - 64 * (p - addr)); | ||
167 | return (offset + set + res); | ||
168 | } | ||
169 | |||
170 | #include <linux/module.h> | 104 | #include <linux/module.h> |
171 | 105 | ||
172 | EXPORT_SYMBOL(find_next_bit); | ||
173 | EXPORT_SYMBOL(find_first_bit); | 106 | EXPORT_SYMBOL(find_first_bit); |
174 | EXPORT_SYMBOL(find_first_zero_bit); | 107 | EXPORT_SYMBOL(find_first_zero_bit); |
175 | EXPORT_SYMBOL(find_next_zero_bit); | ||