aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYury Norov <yury.norov@gmail.com>2015-04-16 15:43:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-17 09:03:53 -0400
commit2c57a0e233d72f8c2e2404560dcf0188ac3cf5d7 (patch)
treeee9ddd0fd0eb38b8dfde0f20bd380b0453d553bd
parent396ada68acefc4f90cf1f05d4275913834af5d93 (diff)
lib: find_*_bit reimplementation
This patchset does rework to find_bit function family to achieve better performance, and decrease size of text. All rework is done in patch 1. Patches 2 and 3 are about code moving and renaming. It was boot-tested on x86_64 and MIPS (big-endian) machines. Performance tests were ran on userspace with code like this: /* addr[] is filled from /dev/urandom */ start = clock(); while (ret < nbits) ret = find_next_bit(addr, nbits, ret + 1); end = clock(); printf("%ld\t", (unsigned long) end - start); On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz measurements are: (for find_next_bit, nbits is 8M, for find_first_bit - 80K) find_next_bit: find_first_bit: new current new current 26932 43151 14777 14925 26947 43182 14521 15423 26507 43824 15053 14705 27329 43759 14473 14777 26895 43367 14847 15023 26990 43693 15103 15163 26775 43299 15067 15232 27282 42752 14544 15121 27504 43088 14644 14858 26761 43856 14699 15193 26692 43075 14781 14681 27137 42969 14451 15061 ... ... find_next_bit performance gain is 35-40%; find_first_bit - no measurable difference. On ARM machine, there is arch-specific implementation for find_bit. Thanks a lot to George Spelvin and Rasmus Villemoes for hints and helpful discussions. This patch (of 3): New implementations takes less space in source file (see diffstat) and in object. For me it's 710 vs 453 bytes of text. It also shows better performance. find_last_bit description fixed due to obvious typo. [akpm@linux-foundation.org: include linux/bitmap.h, per Rasmus] Signed-off-by: Yury Norov <yury.norov@gmail.com> Reviewed-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> Reviewed-by: George Spelvin <linux@horizon.com> Cc: Alexey Klimov <klimov.linux@gmail.com> Cc: David S. Miller <davem@davemloft.net> Cc: Daniel Borkmann <dborkman@redhat.com> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Mark Salter <msalter@redhat.com> Cc: AKASHI Takahiro <takahiro.akashi@linaro.org> Cc: Thomas Graf <tgraf@suug.ch> Cc: Valentin Rothberg <valentinrothberg@gmail.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/bitops.h4
-rw-r--r--lib/find_last_bit.c36
-rw-r--r--lib/find_next_bit.c267
3 files changed, 91 insertions, 216 deletions
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5d858e02997f..297f5bda4fdf 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word)
218/** 218/**
219 * find_last_bit - find the last set bit in a memory region 219 * find_last_bit - find the last set bit in a memory region
220 * @addr: The address to start the search at 220 * @addr: The address to start the search at
221 * @size: The maximum size to search 221 * @size: The number of bits to search
222 * 222 *
223 * Returns the bit number of the first set bit, or size. 223 * Returns the bit number of the last set bit, or size.
224 */ 224 */
225extern unsigned long find_last_bit(const unsigned long *addr, 225extern unsigned long find_last_bit(const unsigned long *addr,
226 unsigned long size); 226 unsigned long size);
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
index 91ca09fbf6f9..3e3be40c6a6e 100644
--- a/lib/find_last_bit.c
+++ b/lib/find_last_bit.c
@@ -4,6 +4,9 @@
4 * Written by Rusty Russell <rusty@rustcorp.com.au> 4 * Written by Rusty Russell <rusty@rustcorp.com.au>
5 * (Inspired by David Howell's find_next_bit implementation) 5 * (Inspired by David Howell's find_next_bit implementation)
6 * 6 *
7 * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
8 * size and improve performance, 2015.
9 *
7 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 11 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 12 * as published by the Free Software Foundation; either version
@@ -11,37 +14,26 @@
11 */ 14 */
12 15
13#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/bitmap.h>
14#include <linux/export.h> 18#include <linux/export.h>
15#include <asm/types.h> 19#include <linux/kernel.h>
16#include <asm/byteorder.h>
17 20
18#ifndef find_last_bit 21#ifndef find_last_bit
19 22
20unsigned long find_last_bit(const unsigned long *addr, unsigned long size) 23unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
21{ 24{
22 unsigned long words; 25 if (size) {
23 unsigned long tmp; 26 unsigned long val = BITMAP_LAST_WORD_MASK(size);
24 27 unsigned long idx = (size-1) / BITS_PER_LONG;
25 /* Start at final word. */
26 words = size / BITS_PER_LONG;
27 28
28 /* Partial final word? */ 29 do {
29 if (size & (BITS_PER_LONG-1)) { 30 val &= addr[idx];
30 tmp = (addr[words] & (~0UL >> (BITS_PER_LONG 31 if (val)
31 - (size & (BITS_PER_LONG-1))))); 32 return idx * BITS_PER_LONG + __fls(val);
32 if (tmp)
33 goto found;
34 }
35 33
36 while (words) { 34 val = ~0ul;
37 tmp = addr[--words]; 35 } while (idx--);
38 if (tmp) {
39found:
40 return words * BITS_PER_LONG + __fls(tmp);
41 }
42 } 36 }
43
44 /* Not found */
45 return size; 37 return size;
46} 38}
47EXPORT_SYMBOL(find_last_bit); 39EXPORT_SYMBOL(find_last_bit);
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index 0cbfc0b4398f..cbea5ef843aa 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -3,6 +3,9 @@
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
7 * size and improve performance, 2015.
8 *
6 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 11 * as published by the Free Software Foundation; either version
@@ -11,98 +14,58 @@
11 14
12#include <linux/bitops.h> 15#include <linux/bitops.h>
13#include <linux/export.h> 16#include <linux/export.h>
14#include <asm/types.h> 17#include <linux/kernel.h>
15#include <asm/byteorder.h>
16 18
17#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) 19#if !defined(find_next_bit) || !defined(find_next_zero_bit)
18 20
19#ifndef find_next_bit
20/* 21/*
21 * Find the next set bit in a memory region. 22 * This is a common helper function for find_next_bit and
23 * find_next_zero_bit. The difference is the "invert" argument, which
24 * is XORed with each fetched word before searching it for one bits.
22 */ 25 */
23unsigned long find_next_bit(const unsigned long *addr, unsigned long size, 26static unsigned long _find_next_bit(const unsigned long *addr,
24 unsigned long offset) 27 unsigned long nbits, unsigned long start, unsigned long invert)
25{ 28{
26 const unsigned long *p = addr + BITOP_WORD(offset);
27 unsigned long result = offset & ~(BITS_PER_LONG-1);
28 unsigned long tmp; 29 unsigned long tmp;
29 30
30 if (offset >= size) 31 if (!nbits || start >= nbits)
31 return size; 32 return nbits;
32 size -= result; 33
33 offset %= BITS_PER_LONG; 34 tmp = addr[start / BITS_PER_LONG] ^ invert;
34 if (offset) { 35
35 tmp = *(p++); 36 /* Handle 1st word. */
36 tmp &= (~0UL << offset); 37 tmp &= BITMAP_FIRST_WORD_MASK(start);
37 if (size < BITS_PER_LONG) 38 start = round_down(start, BITS_PER_LONG);
38 goto found_first; 39
39 if (tmp) 40 while (!tmp) {
40 goto found_middle; 41 start += BITS_PER_LONG;
41 size -= BITS_PER_LONG; 42 if (start >= nbits)
42 result += BITS_PER_LONG; 43 return nbits;
43 } 44
44 while (size & ~(BITS_PER_LONG-1)) { 45 tmp = addr[start / BITS_PER_LONG] ^ invert;
45 if ((tmp = *(p++)))
46 goto found_middle;
47 result += BITS_PER_LONG;
48 size -= BITS_PER_LONG;
49 } 46 }
50 if (!size)
51 return result;
52 tmp = *p;
53 47
54found_first: 48 return min(start + __ffs(tmp), nbits);
55 tmp &= (~0UL >> (BITS_PER_LONG - size));
56 if (tmp == 0UL) /* Are any bits set? */
57 return result + size; /* Nope. */
58found_middle:
59 return result + __ffs(tmp);
60} 49}
61EXPORT_SYMBOL(find_next_bit);
62#endif 50#endif
63 51
64#ifndef find_next_zero_bit 52#ifndef find_next_bit
65/* 53/*
66 * This implementation of find_{first,next}_zero_bit was stolen from 54 * Find the next set bit in a memory region.
67 * Linus' asm-alpha/bitops.h.
68 */ 55 */
56unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
57 unsigned long offset)
58{
59 return _find_next_bit(addr, size, offset, 0UL);
60}
61EXPORT_SYMBOL(find_next_bit);
62#endif
63
64#ifndef find_next_zero_bit
69unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, 65unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
70 unsigned long offset) 66 unsigned long offset)
71{ 67{
72 const unsigned long *p = addr + BITOP_WORD(offset); 68 return _find_next_bit(addr, size, offset, ~0UL);
73 unsigned long result = offset & ~(BITS_PER_LONG-1);
74 unsigned long tmp;
75
76 if (offset >= size)
77 return size;
78 size -= result;
79 offset %= BITS_PER_LONG;
80 if (offset) {
81 tmp = *(p++);
82 tmp |= ~0UL >> (BITS_PER_LONG - offset);
83 if (size < BITS_PER_LONG)
84 goto found_first;
85 if (~tmp)
86 goto found_middle;
87 size -= BITS_PER_LONG;
88 result += BITS_PER_LONG;
89 }
90 while (size & ~(BITS_PER_LONG-1)) {
91 if (~(tmp = *(p++)))
92 goto found_middle;
93 result += BITS_PER_LONG;
94 size -= BITS_PER_LONG;
95 }
96 if (!size)
97 return result;
98 tmp = *p;
99
100found_first:
101 tmp |= ~0UL << size;
102 if (tmp == ~0UL) /* Are any bits zero? */
103 return result + size; /* Nope. */
104found_middle:
105 return result + ffz(tmp);
106} 69}
107EXPORT_SYMBOL(find_next_zero_bit); 70EXPORT_SYMBOL(find_next_zero_bit);
108#endif 71#endif
@@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
113 */ 76 */
114unsigned long find_first_bit(const unsigned long *addr, unsigned long size) 77unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
115{ 78{
116 const unsigned long *p = addr; 79 unsigned long idx;
117 unsigned long result = 0;
118 unsigned long tmp;
119 80
120 while (size & ~(BITS_PER_LONG-1)) { 81 for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
121 if ((tmp = *(p++))) 82 if (addr[idx])
122 goto found; 83 return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
123 result += BITS_PER_LONG;
124 size -= BITS_PER_LONG;
125 } 84 }
126 if (!size)
127 return result;
128 85
129 tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); 86 return size;
130 if (tmp == 0UL) /* Are any bits set? */
131 return result + size; /* Nope. */
132found:
133 return result + __ffs(tmp);
134} 87}
135EXPORT_SYMBOL(find_first_bit); 88EXPORT_SYMBOL(find_first_bit);
136#endif 89#endif
@@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit);
141 */ 94 */
142unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) 95unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
143{ 96{
144 const unsigned long *p = addr; 97 unsigned long idx;
145 unsigned long result = 0;
146 unsigned long tmp;
147 98
148 while (size & ~(BITS_PER_LONG-1)) { 99 for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
149 if (~(tmp = *(p++))) 100 if (addr[idx] != ~0UL)
150 goto found; 101 return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
151 result += BITS_PER_LONG;
152 size -= BITS_PER_LONG;
153 } 102 }
154 if (!size)
155 return result;
156 103
157 tmp = (*p) | (~0UL << size); 104 return size;
158 if (tmp == ~0UL) /* Are any bits zero? */
159 return result + size; /* Nope. */
160found:
161 return result + ffz(tmp);
162} 105}
163EXPORT_SYMBOL(find_first_zero_bit); 106EXPORT_SYMBOL(find_first_zero_bit);
164#endif 107#endif
@@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
166#ifdef __BIG_ENDIAN 109#ifdef __BIG_ENDIAN
167 110
168/* include/linux/byteorder does not support "unsigned long" type */ 111/* include/linux/byteorder does not support "unsigned long" type */
169static inline unsigned long ext2_swabp(const unsigned long * x)
170{
171#if BITS_PER_LONG == 64
172 return (unsigned long) __swab64p((u64 *) x);
173#elif BITS_PER_LONG == 32
174 return (unsigned long) __swab32p((u32 *) x);
175#else
176#error BITS_PER_LONG not defined
177#endif
178}
179
180/* include/linux/byteorder doesn't support "unsigned long" type */
181static inline unsigned long ext2_swab(const unsigned long y) 112static inline unsigned long ext2_swab(const unsigned long y)
182{ 113{
183#if BITS_PER_LONG == 64 114#if BITS_PER_LONG == 64
@@ -189,48 +120,38 @@ static inline unsigned long ext2_swab(const unsigned long y)
189#endif 120#endif
190} 121}
191 122
192#ifndef find_next_zero_bit_le 123#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
193unsigned long find_next_zero_bit_le(const void *addr, unsigned 124static unsigned long _find_next_bit_le(const unsigned long *addr,
194 long size, unsigned long offset) 125 unsigned long nbits, unsigned long start, unsigned long invert)
195{ 126{
196 const unsigned long *p = addr;
197 unsigned long result = offset & ~(BITS_PER_LONG - 1);
198 unsigned long tmp; 127 unsigned long tmp;
199 128
200 if (offset >= size) 129 if (!nbits || start >= nbits)
201 return size; 130 return nbits;
202 p += BITOP_WORD(offset); 131
203 size -= result; 132 tmp = addr[start / BITS_PER_LONG] ^ invert;
204 offset &= (BITS_PER_LONG - 1UL); 133
205 if (offset) { 134 /* Handle 1st word. */
206 tmp = ext2_swabp(p++); 135 tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
207 tmp |= (~0UL >> (BITS_PER_LONG - offset)); 136 start = round_down(start, BITS_PER_LONG);
208 if (size < BITS_PER_LONG)
209 goto found_first;
210 if (~tmp)
211 goto found_middle;
212 size -= BITS_PER_LONG;
213 result += BITS_PER_LONG;
214 }
215 137
216 while (size & ~(BITS_PER_LONG - 1)) { 138 while (!tmp) {
217 if (~(tmp = *(p++))) 139 start += BITS_PER_LONG;
218 goto found_middle_swap; 140 if (start >= nbits)
219 result += BITS_PER_LONG; 141 return nbits;
220 size -= BITS_PER_LONG; 142
143 tmp = addr[start / BITS_PER_LONG] ^ invert;
221 } 144 }
222 if (!size)
223 return result;
224 tmp = ext2_swabp(p);
225found_first:
226 tmp |= ~0UL << size;
227 if (tmp == ~0UL) /* Are any bits zero? */
228 return result + size; /* Nope. Skip ffz */
229found_middle:
230 return result + ffz(tmp);
231 145
232found_middle_swap: 146 return min(start + __ffs(ext2_swab(tmp)), nbits);
233 return result + ffz(ext2_swab(tmp)); 147}
148#endif
149
150#ifndef find_next_zero_bit_le
151unsigned long find_next_zero_bit_le(const void *addr, unsigned
152 long size, unsigned long offset)
153{
154 return _find_next_bit_le(addr, size, offset, ~0UL);
234} 155}
235EXPORT_SYMBOL(find_next_zero_bit_le); 156EXPORT_SYMBOL(find_next_zero_bit_le);
236#endif 157#endif
@@ -239,45 +160,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
239unsigned long find_next_bit_le(const void *addr, unsigned 160unsigned long find_next_bit_le(const void *addr, unsigned
240 long size, unsigned long offset) 161 long size, unsigned long offset)
241{ 162{
242 const unsigned long *p = addr; 163 return _find_next_bit_le(addr, size, offset, 0UL);
243 unsigned long result = offset & ~(BITS_PER_LONG - 1);
244 unsigned long tmp;
245
246 if (offset >= size)
247 return size;
248 p += BITOP_WORD(offset);
249 size -= result;
250 offset &= (BITS_PER_LONG - 1UL);
251 if (offset) {
252 tmp = ext2_swabp(p++);
253 tmp &= (~0UL << offset);
254 if (size < BITS_PER_LONG)
255 goto found_first;
256 if (tmp)
257 goto found_middle;
258 size -= BITS_PER_LONG;
259 result += BITS_PER_LONG;
260 }
261
262 while (size & ~(BITS_PER_LONG - 1)) {
263 tmp = *(p++);
264 if (tmp)
265 goto found_middle_swap;
266 result += BITS_PER_LONG;
267 size -= BITS_PER_LONG;
268 }
269 if (!size)
270 return result;
271 tmp = ext2_swabp(p);
272found_first:
273 tmp &= (~0UL >> (BITS_PER_LONG - size));
274 if (tmp == 0UL) /* Are any bits set? */
275 return result + size; /* Nope. */
276found_middle:
277 return result + __ffs(tmp);
278
279found_middle_swap:
280 return result + __ffs(ext2_swab(tmp));
281} 164}
282EXPORT_SYMBOL(find_next_bit_le); 165EXPORT_SYMBOL(find_next_bit_le);
283#endif 166#endif