diff options
author | Jens Axboe <axboe@kernel.dk> | 2018-11-30 15:18:06 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2018-11-30 16:47:45 -0500 |
commit | ea86ea2cdced20057da4d2c32965c1219c238197 (patch) | |
tree | 08926009b00df1229668f131d41d2b467a78cc87 | |
parent | 531724abc3bfb556c1dd68086cf9cb51f76464e3 (diff) |
sbitmap: ammortize cost of clearing bits
sbitmap maintains a set of words that we use to set and clear bits, with
each bit representing a tag for blk-mq. Even though we spread the bits
out and maintain a hint cache, one particular bit allocated will end up
being cleared in the exact same spot.
This introduces batched clearing of bits. Instead of clearing a given
bit, the same bit is set in a cleared/free mask instead. If we fail
allocating a bit from a given word, then we check the free mask, and
batch move those cleared bits at that time. This trades 64 atomic bitops
for 2 cmpxchg().
In a threaded poll test case, half the overhead of getting and clearing
tags is removed with this change. On another poll test case with a
single thread, performance is unchanged.
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | include/linux/sbitmap.h | 33 | ||||
-rw-r--r-- | lib/sbitmap.c | 81 |
2 files changed, 100 insertions, 14 deletions
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 804a50983ec5..81359d45751e 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h | |||
@@ -30,14 +30,24 @@ struct seq_file; | |||
30 | */ | 30 | */ |
31 | struct sbitmap_word { | 31 | struct sbitmap_word { |
32 | /** | 32 | /** |
33 | * @word: The bitmap word itself. | 33 | * @depth: Number of bits being used in @word/@cleared |
34 | */ | 34 | */ |
35 | unsigned long word; | 35 | unsigned long depth; |
36 | 36 | ||
37 | /** | 37 | /** |
38 | * @depth: Number of bits being used in @word. | 38 | * @word: word holding free bits |
39 | */ | 39 | */ |
40 | unsigned long depth; | 40 | unsigned long word ____cacheline_aligned_in_smp; |
41 | |||
42 | /** | ||
43 | * @cleared: word holding cleared bits | ||
44 | */ | ||
45 | unsigned long cleared ____cacheline_aligned_in_smp; | ||
46 | |||
47 | /** | ||
48 | * @swap_lock: Held while swapping word <-> cleared | ||
49 | */ | ||
50 | spinlock_t swap_lock; | ||
41 | } ____cacheline_aligned_in_smp; | 51 | } ____cacheline_aligned_in_smp; |
42 | 52 | ||
43 | /** | 53 | /** |
@@ -310,6 +320,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) | |||
310 | clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); | 320 | clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); |
311 | } | 321 | } |
312 | 322 | ||
323 | /* | ||
324 | * This one is special, since it doesn't actually clear the bit, rather it | ||
325 | * sets the corresponding bit in the ->cleared mask instead. Paired with | ||
326 | * the caller doing sbitmap_batch_clear() if a given index is full, which | ||
327 | * will clear the previously freed entries in the corresponding ->word. | ||
328 | */ | ||
329 | static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) | ||
330 | { | ||
331 | unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared; | ||
332 | |||
333 | set_bit(SB_NR_TO_BIT(sb, bitnr), addr); | ||
334 | } | ||
335 | |||
313 | static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, | 336 | static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, |
314 | unsigned int bitnr) | 337 | unsigned int bitnr) |
315 | { | 338 | { |
@@ -321,8 +344,6 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) | |||
321 | return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); | 344 | return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); |
322 | } | 345 | } |
323 | 346 | ||
324 | unsigned int sbitmap_weight(const struct sbitmap *sb); | ||
325 | |||
326 | /** | 347 | /** |
327 | * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. | 348 | * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. |
328 | * @sb: Bitmap to show. | 349 | * @sb: Bitmap to show. |
diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 45cab6bbc1c7..f99382e59314 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c | |||
@@ -59,6 +59,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, | |||
59 | for (i = 0; i < sb->map_nr; i++) { | 59 | for (i = 0; i < sb->map_nr; i++) { |
60 | sb->map[i].depth = min(depth, bits_per_word); | 60 | sb->map[i].depth = min(depth, bits_per_word); |
61 | depth -= sb->map[i].depth; | 61 | depth -= sb->map[i].depth; |
62 | spin_lock_init(&sb->map[i].swap_lock); | ||
62 | } | 63 | } |
63 | return 0; | 64 | return 0; |
64 | } | 65 | } |
@@ -111,6 +112,57 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, | |||
111 | return nr; | 112 | return nr; |
112 | } | 113 | } |
113 | 114 | ||
115 | /* | ||
116 | * See if we have deferred clears that we can batch move | ||
117 | */ | ||
118 | static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) | ||
119 | { | ||
120 | unsigned long mask, val; | ||
121 | bool ret = false; | ||
122 | |||
123 | spin_lock(&sb->map[index].swap_lock); | ||
124 | |||
125 | if (!sb->map[index].cleared) | ||
126 | goto out_unlock; | ||
127 | |||
128 | /* | ||
129 | * First get a stable cleared mask, setting the old mask to 0. | ||
130 | */ | ||
131 | do { | ||
132 | mask = sb->map[index].cleared; | ||
133 | } while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask); | ||
134 | |||
135 | /* | ||
136 | * Now clear the masked bits in our free word | ||
137 | */ | ||
138 | do { | ||
139 | val = sb->map[index].word; | ||
140 | } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); | ||
141 | |||
142 | ret = true; | ||
143 | out_unlock: | ||
144 | spin_unlock(&sb->map[index].swap_lock); | ||
145 | return ret; | ||
146 | } | ||
147 | |||
148 | static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, | ||
149 | unsigned int alloc_hint, bool round_robin) | ||
150 | { | ||
151 | int nr; | ||
152 | |||
153 | do { | ||
154 | nr = __sbitmap_get_word(&sb->map[index].word, | ||
155 | sb->map[index].depth, alloc_hint, | ||
156 | !round_robin); | ||
157 | if (nr != -1) | ||
158 | break; | ||
159 | if (!sbitmap_deferred_clear(sb, index)) | ||
160 | break; | ||
161 | } while (1); | ||
162 | |||
163 | return nr; | ||
164 | } | ||
165 | |||
114 | int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) | 166 | int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) |
115 | { | 167 | { |
116 | unsigned int i, index; | 168 | unsigned int i, index; |
@@ -129,9 +181,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) | |||
129 | alloc_hint = 0; | 181 | alloc_hint = 0; |
130 | 182 | ||
131 | for (i = 0; i < sb->map_nr; i++) { | 183 | for (i = 0; i < sb->map_nr; i++) { |
132 | nr = __sbitmap_get_word(&sb->map[index].word, | 184 | nr = sbitmap_find_bit_in_index(sb, index, alloc_hint, |
133 | sb->map[index].depth, alloc_hint, | 185 | round_robin); |
134 | !round_robin); | ||
135 | if (nr != -1) { | 186 | if (nr != -1) { |
136 | nr += index << sb->shift; | 187 | nr += index << sb->shift; |
137 | break; | 188 | break; |
@@ -206,23 +257,36 @@ bool sbitmap_any_bit_clear(const struct sbitmap *sb) | |||
206 | } | 257 | } |
207 | EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); | 258 | EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); |
208 | 259 | ||
209 | unsigned int sbitmap_weight(const struct sbitmap *sb) | 260 | static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) |
210 | { | 261 | { |
211 | unsigned int i, weight = 0; | 262 | unsigned int i, weight = 0; |
212 | 263 | ||
213 | for (i = 0; i < sb->map_nr; i++) { | 264 | for (i = 0; i < sb->map_nr; i++) { |
214 | const struct sbitmap_word *word = &sb->map[i]; | 265 | const struct sbitmap_word *word = &sb->map[i]; |
215 | 266 | ||
216 | weight += bitmap_weight(&word->word, word->depth); | 267 | if (set) |
268 | weight += bitmap_weight(&word->word, word->depth); | ||
269 | else | ||
270 | weight += bitmap_weight(&word->cleared, word->depth); | ||
217 | } | 271 | } |
218 | return weight; | 272 | return weight; |
219 | } | 273 | } |
220 | EXPORT_SYMBOL_GPL(sbitmap_weight); | 274 | |
275 | static unsigned int sbitmap_weight(const struct sbitmap *sb) | ||
276 | { | ||
277 | return __sbitmap_weight(sb, true); | ||
278 | } | ||
279 | |||
280 | static unsigned int sbitmap_cleared(const struct sbitmap *sb) | ||
281 | { | ||
282 | return __sbitmap_weight(sb, false); | ||
283 | } | ||
221 | 284 | ||
222 | void sbitmap_show(struct sbitmap *sb, struct seq_file *m) | 285 | void sbitmap_show(struct sbitmap *sb, struct seq_file *m) |
223 | { | 286 | { |
224 | seq_printf(m, "depth=%u\n", sb->depth); | 287 | seq_printf(m, "depth=%u\n", sb->depth); |
225 | seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); | 288 | seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); |
289 | seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); | ||
226 | seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); | 290 | seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); |
227 | seq_printf(m, "map_nr=%u\n", sb->map_nr); | 291 | seq_printf(m, "map_nr=%u\n", sb->map_nr); |
228 | } | 292 | } |
@@ -514,7 +578,8 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); | |||
514 | void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, | 578 | void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, |
515 | unsigned int cpu) | 579 | unsigned int cpu) |
516 | { | 580 | { |
517 | sbitmap_clear_bit_unlock(&sbq->sb, nr); | 581 | sbitmap_deferred_clear_bit(&sbq->sb, nr); |
582 | |||
518 | /* | 583 | /* |
519 | * Pairs with the memory barrier in set_current_state() to ensure the | 584 | * Pairs with the memory barrier in set_current_state() to ensure the |
520 | * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker | 585 | * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker |