aboutsummaryrefslogtreecommitdiffstats
path: root/lib/sbitmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sbitmap.c')
-rw-r--r--lib/sbitmap.c113
1 files changed, 81 insertions, 32 deletions
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index e6a9c06ec70c..6fdc6267f4a8 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -270,18 +270,33 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
270} 270}
271EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 271EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
272 272
273static unsigned int sbq_calc_wake_batch(unsigned int depth) 273static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
274 unsigned int depth)
274{ 275{
275 unsigned int wake_batch; 276 unsigned int wake_batch;
277 unsigned int shallow_depth;
276 278
277 /* 279 /*
278 * For each batch, we wake up one queue. We need to make sure that our 280 * For each batch, we wake up one queue. We need to make sure that our
279 * batch size is small enough that the full depth of the bitmap is 281 * batch size is small enough that the full depth of the bitmap,
280 * enough to wake up all of the queues. 282 * potentially limited by a shallow depth, is enough to wake up all of
283 * the queues.
284 *
285 * Each full word of the bitmap has bits_per_word bits, and there might
286 * be a partial word. There are depth / bits_per_word full words and
287 * depth % bits_per_word bits left over. In bitwise arithmetic:
288 *
289 * bits_per_word = 1 << shift
290 * depth / bits_per_word = depth >> shift
291 * depth % bits_per_word = depth & ((1 << shift) - 1)
292 *
293 * Each word can be limited to sbq->min_shallow_depth bits.
281 */ 294 */
282 wake_batch = SBQ_WAKE_BATCH; 295 shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
283 if (wake_batch > depth / SBQ_WAIT_QUEUES) 296 depth = ((depth >> sbq->sb.shift) * shallow_depth +
284 wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); 297 min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
298 wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
299 SBQ_WAKE_BATCH);
285 300
286 return wake_batch; 301 return wake_batch;
287} 302}
@@ -307,7 +322,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
307 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 322 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
308 } 323 }
309 324
310 sbq->wake_batch = sbq_calc_wake_batch(depth); 325 sbq->min_shallow_depth = UINT_MAX;
326 sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
311 atomic_set(&sbq->wake_index, 0); 327 atomic_set(&sbq->wake_index, 0);
312 328
313 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 329 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
@@ -327,21 +343,28 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
327} 343}
328EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 344EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
329 345
330void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 346static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
347 unsigned int depth)
331{ 348{
332 unsigned int wake_batch = sbq_calc_wake_batch(depth); 349 unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
333 int i; 350 int i;
334 351
335 if (sbq->wake_batch != wake_batch) { 352 if (sbq->wake_batch != wake_batch) {
336 WRITE_ONCE(sbq->wake_batch, wake_batch); 353 WRITE_ONCE(sbq->wake_batch, wake_batch);
337 /* 354 /*
338 * Pairs with the memory barrier in sbq_wake_up() to ensure that 355 * Pairs with the memory barrier in sbitmap_queue_wake_up()
339 * the batch size is updated before the wait counts. 356 * to ensure that the batch size is updated before the wait
357 * counts.
340 */ 358 */
341 smp_mb__before_atomic(); 359 smp_mb__before_atomic();
342 for (i = 0; i < SBQ_WAIT_QUEUES; i++) 360 for (i = 0; i < SBQ_WAIT_QUEUES; i++)
343 atomic_set(&sbq->ws[i].wait_cnt, 1); 361 atomic_set(&sbq->ws[i].wait_cnt, 1);
344 } 362 }
363}
364
365void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
366{
367 sbitmap_queue_update_wake_batch(sbq, depth);
345 sbitmap_resize(&sbq->sb, depth); 368 sbitmap_resize(&sbq->sb, depth);
346} 369}
347EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 370EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -380,6 +403,8 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
380 unsigned int hint, depth; 403 unsigned int hint, depth;
381 int nr; 404 int nr;
382 405
406 WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
407
383 hint = this_cpu_read(*sbq->alloc_hint); 408 hint = this_cpu_read(*sbq->alloc_hint);
384 depth = READ_ONCE(sbq->sb.depth); 409 depth = READ_ONCE(sbq->sb.depth);
385 if (unlikely(hint >= depth)) { 410 if (unlikely(hint >= depth)) {
@@ -403,6 +428,14 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
403} 428}
404EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); 429EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
405 430
431void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
432 unsigned int min_shallow_depth)
433{
434 sbq->min_shallow_depth = min_shallow_depth;
435 sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
436}
437EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
438
406static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 439static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
407{ 440{
408 int i, wake_index; 441 int i, wake_index;
@@ -425,52 +458,67 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
425 return NULL; 458 return NULL;
426} 459}
427 460
428static void sbq_wake_up(struct sbitmap_queue *sbq) 461static bool __sbq_wake_up(struct sbitmap_queue *sbq)
429{ 462{
430 struct sbq_wait_state *ws; 463 struct sbq_wait_state *ws;
431 unsigned int wake_batch; 464 unsigned int wake_batch;
432 int wait_cnt; 465 int wait_cnt;
433 466
434 /*
435 * Pairs with the memory barrier in set_current_state() to ensure the
436 * proper ordering of clear_bit()/waitqueue_active() in the waker and
437 * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
438 * waiter. See the comment on waitqueue_active(). This is __after_atomic
439 * because we just did clear_bit_unlock() in the caller.
440 */
441 smp_mb__after_atomic();
442
443 ws = sbq_wake_ptr(sbq); 467 ws = sbq_wake_ptr(sbq);
444 if (!ws) 468 if (!ws)
445 return; 469 return false;
446 470
447 wait_cnt = atomic_dec_return(&ws->wait_cnt); 471 wait_cnt = atomic_dec_return(&ws->wait_cnt);
448 if (wait_cnt <= 0) { 472 if (wait_cnt <= 0) {
473 int ret;
474
449 wake_batch = READ_ONCE(sbq->wake_batch); 475 wake_batch = READ_ONCE(sbq->wake_batch);
476
450 /* 477 /*
451 * Pairs with the memory barrier in sbitmap_queue_resize() to 478 * Pairs with the memory barrier in sbitmap_queue_resize() to
452 * ensure that we see the batch size update before the wait 479 * ensure that we see the batch size update before the wait
453 * count is reset. 480 * count is reset.
454 */ 481 */
455 smp_mb__before_atomic(); 482 smp_mb__before_atomic();
483
456 /* 484 /*
457 * If there are concurrent callers to sbq_wake_up(), the last 485 * For concurrent callers of this, the one that failed the
458 * one to decrement the wait count below zero will bump it back 486 * atomic_cmpxhcg() race should call this function again
459 * up. If there is a concurrent resize, the count reset will 487 * to wakeup a new batch on a different 'ws'.
460 * either cause the cmpxchg to fail or overwrite after the
461 * cmpxchg.
462 */ 488 */
463 atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); 489 ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
464 sbq_index_atomic_inc(&sbq->wake_index); 490 if (ret == wait_cnt) {
465 wake_up_nr(&ws->wait, wake_batch); 491 sbq_index_atomic_inc(&sbq->wake_index);
492 wake_up_nr(&ws->wait, wake_batch);
493 return false;
494 }
495
496 return true;
466 } 497 }
498
499 return false;
500}
501
502void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
503{
504 while (__sbq_wake_up(sbq))
505 ;
467} 506}
507EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
468 508
469void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 509void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
470 unsigned int cpu) 510 unsigned int cpu)
471{ 511{
472 sbitmap_clear_bit_unlock(&sbq->sb, nr); 512 sbitmap_clear_bit_unlock(&sbq->sb, nr);
473 sbq_wake_up(sbq); 513 /*
514 * Pairs with the memory barrier in set_current_state() to ensure the
515 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
516 * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
517 * waiter. See the comment on waitqueue_active().
518 */
519 smp_mb__after_atomic();
520 sbitmap_queue_wake_up(sbq);
521
474 if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 522 if (likely(!sbq->round_robin && nr < sbq->sb.depth))
475 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 523 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
476} 524}
@@ -482,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
482 530
483 /* 531 /*
484 * Pairs with the memory barrier in set_current_state() like in 532 * Pairs with the memory barrier in set_current_state() like in
485 * sbq_wake_up(). 533 * sbitmap_queue_wake_up().
486 */ 534 */
487 smp_mb(); 535 smp_mb();
488 wake_index = atomic_read(&sbq->wake_index); 536 wake_index = atomic_read(&sbq->wake_index);
@@ -528,5 +576,6 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
528 seq_puts(m, "}\n"); 576 seq_puts(m, "}\n");
529 577
530 seq_printf(m, "round_robin=%d\n", sbq->round_robin); 578 seq_printf(m, "round_robin=%d\n", sbq->round_robin);
579 seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
531} 580}
532EXPORT_SYMBOL_GPL(sbitmap_queue_show); 581EXPORT_SYMBOL_GPL(sbitmap_queue_show);