diff options
author | Alexandre Oliva <aoliva@redhat.com> | 2005-10-31 15:29:36 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-02 22:41:32 -0500 |
commit | 06024f217d607369f0ee0071034ebb03071d5fb2 (patch) | |
tree | d38a20395a0619976543cfb68ea925aebc545575 | |
parent | cfa024f4e45562c50b9eccb23649ab103578037b (diff) |
[PATCH] x86-64: bitops fix for -Os
This fixes the x86-64 find_[first|next]_zero_bit() function for the
end-of-range case. It didn't test for a zero size, and the "rep scas"
would do entirely the wrong thing.
Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/x86_64/lib/bitops.c | 66 |
1 files changed, 50 insertions, 16 deletions
diff --git a/arch/x86_64/lib/bitops.c b/arch/x86_64/lib/bitops.c index a29fb75b33ac..95b6d9639fba 100644 --- a/arch/x86_64/lib/bitops.c +++ b/arch/x86_64/lib/bitops.c | |||
@@ -5,19 +5,23 @@ | |||
5 | #undef find_first_bit | 5 | #undef find_first_bit |
6 | #undef find_next_bit | 6 | #undef find_next_bit |
7 | 7 | ||
8 | /** | 8 | static inline long |
9 | * find_first_zero_bit - find the first zero bit in a memory region | 9 | __find_first_zero_bit(const unsigned long * addr, unsigned long size) |
10 | * @addr: The address to start the search at | ||
11 | * @size: The maximum size to search | ||
12 | * | ||
13 | * Returns the bit-number of the first zero bit, not the number of the byte | ||
14 | * containing a bit. | ||
15 | */ | ||
16 | inline long find_first_zero_bit(const unsigned long * addr, unsigned long size) | ||
17 | { | 10 | { |
18 | long d0, d1, d2; | 11 | long d0, d1, d2; |
19 | long res; | 12 | long res; |
20 | 13 | ||
14 | /* | ||
15 | * We must test the size in words, not in bits, because | ||
16 | * otherwise incoming sizes in the range -63..-1 will not run | ||
17 | * any scasq instructions, and then the flags used by the je | ||
18 | * instruction will have whatever random value was in place | ||
19 | * before. Nobody should call us like that, but | ||
20 | * find_next_zero_bit() does when offset and size are at the | ||
21 | * same word and it fails to find a zero itself. | ||
22 | */ | ||
23 | size += 63; | ||
24 | size >>= 6; | ||
21 | if (!size) | 25 | if (!size) |
22 | return 0; | 26 | return 0; |
23 | asm volatile( | 27 | asm volatile( |
@@ -30,12 +34,30 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size) | |||
30 | " shlq $3,%%rdi\n" | 34 | " shlq $3,%%rdi\n" |
31 | " addq %%rdi,%%rdx" | 35 | " addq %%rdi,%%rdx" |
32 | :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) | 36 | :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) |
33 | :"0" (0ULL), "1" ((size + 63) >> 6), "2" (addr), "3" (-1ULL), | 37 | :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL), |
34 | [addr] "r" (addr) : "memory"); | 38 | [addr] "S" (addr) : "memory"); |
39 | /* | ||
40 | * Any register would do for [addr] above, but GCC tends to | ||
41 | * prefer rbx over rsi, even though rsi is readily available | ||
42 | * and doesn't have to be saved. | ||
43 | */ | ||
35 | return res; | 44 | return res; |
36 | } | 45 | } |
37 | 46 | ||
38 | /** | 47 | /** |
48 | * find_first_zero_bit - find the first zero bit in a memory region | ||
49 | * @addr: The address to start the search at | ||
50 | * @size: The maximum size to search | ||
51 | * | ||
52 | * Returns the bit-number of the first zero bit, not the number of the byte | ||
53 | * containing a bit. | ||
54 | */ | ||
55 | long find_first_zero_bit(const unsigned long * addr, unsigned long size) | ||
56 | { | ||
57 | return __find_first_zero_bit (addr, size); | ||
58 | } | ||
59 | |||
60 | /** | ||
39 | * find_next_zero_bit - find the first zero bit in a memory region | 61 | * find_next_zero_bit - find the first zero bit in a memory region |
40 | * @addr: The address to base the search on | 62 | * @addr: The address to base the search on |
41 | * @offset: The bitnumber to start searching at | 63 | * @offset: The bitnumber to start searching at |
@@ -43,7 +65,7 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size) | |||
43 | */ | 65 | */ |
44 | long find_next_zero_bit (const unsigned long * addr, long size, long offset) | 66 | long find_next_zero_bit (const unsigned long * addr, long size, long offset) |
45 | { | 67 | { |
46 | unsigned long * p = ((unsigned long *) addr) + (offset >> 6); | 68 | const unsigned long * p = addr + (offset >> 6); |
47 | unsigned long set = 0; | 69 | unsigned long set = 0; |
48 | unsigned long res, bit = offset&63; | 70 | unsigned long res, bit = offset&63; |
49 | 71 | ||
@@ -63,8 +85,8 @@ long find_next_zero_bit (const unsigned long * addr, long size, long offset) | |||
63 | /* | 85 | /* |
64 | * No zero yet, search remaining full words for a zero | 86 | * No zero yet, search remaining full words for a zero |
65 | */ | 87 | */ |
66 | res = find_first_zero_bit ((const unsigned long *)p, | 88 | res = __find_first_zero_bit (p, size - 64 * (p - addr)); |
67 | size - 64 * (p - (unsigned long *) addr)); | 89 | |
68 | return (offset + set + res); | 90 | return (offset + set + res); |
69 | } | 91 | } |
70 | 92 | ||
@@ -74,6 +96,19 @@ __find_first_bit(const unsigned long * addr, unsigned long size) | |||
74 | long d0, d1; | 96 | long d0, d1; |
75 | long res; | 97 | long res; |
76 | 98 | ||
99 | /* | ||
100 | * We must test the size in words, not in bits, because | ||
101 | * otherwise incoming sizes in the range -63..-1 will not run | ||
102 | * any scasq instructions, and then the flags used by the jz | ||
103 | * instruction will have whatever random value was in place | ||
104 | * before. Nobody should call us like that, but | ||
105 | * find_next_bit() does when offset and size are at the same | ||
106 | * word and it fails to find a one itself. | ||
107 | */ | ||
108 | size += 63; | ||
109 | size >>= 6; | ||
110 | if (!size) | ||
111 | return 0; | ||
77 | asm volatile( | 112 | asm volatile( |
78 | " repe; scasq\n" | 113 | " repe; scasq\n" |
79 | " jz 1f\n" | 114 | " jz 1f\n" |
@@ -83,8 +118,7 @@ __find_first_bit(const unsigned long * addr, unsigned long size) | |||
83 | " shlq $3,%%rdi\n" | 118 | " shlq $3,%%rdi\n" |
84 | " addq %%rdi,%%rax" | 119 | " addq %%rdi,%%rax" |
85 | :"=a" (res), "=&c" (d0), "=&D" (d1) | 120 | :"=a" (res), "=&c" (d0), "=&D" (d1) |
86 | :"0" (0ULL), | 121 | :"0" (0ULL), "1" (size), "2" (addr), |
87 | "1" ((size + 63) >> 6), "2" (addr), | ||
88 | [addr] "r" (addr) : "memory"); | 122 | [addr] "r" (addr) : "memory"); |
89 | return res; | 123 | return res; |
90 | } | 124 | } |