diff options
Diffstat (limited to 'lib/sbitmap.c')
| -rw-r--r-- | lib/sbitmap.c | 113 |
1 files changed, 81 insertions, 32 deletions
diff --git a/lib/sbitmap.c b/lib/sbitmap.c index e6a9c06ec70c..6fdc6267f4a8 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c | |||
| @@ -270,18 +270,33 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) | |||
| 270 | } | 270 | } |
| 271 | EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); | 271 | EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); |
| 272 | 272 | ||
| 273 | static unsigned int sbq_calc_wake_batch(unsigned int depth) | 273 | static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, |
| 274 | unsigned int depth) | ||
| 274 | { | 275 | { |
| 275 | unsigned int wake_batch; | 276 | unsigned int wake_batch; |
| 277 | unsigned int shallow_depth; | ||
| 276 | 278 | ||
| 277 | /* | 279 | /* |
| 278 | * For each batch, we wake up one queue. We need to make sure that our | 280 | * For each batch, we wake up one queue. We need to make sure that our |
| 279 | * batch size is small enough that the full depth of the bitmap is | 281 | * batch size is small enough that the full depth of the bitmap, |
| 280 | * enough to wake up all of the queues. | 282 | * potentially limited by a shallow depth, is enough to wake up all of |
| 283 | * the queues. | ||
| 284 | * | ||
| 285 | * Each full word of the bitmap has bits_per_word bits, and there might | ||
| 286 | * be a partial word. There are depth / bits_per_word full words and | ||
| 287 | * depth % bits_per_word bits left over. In bitwise arithmetic: | ||
| 288 | * | ||
| 289 | * bits_per_word = 1 << shift | ||
| 290 | * depth / bits_per_word = depth >> shift | ||
| 291 | * depth % bits_per_word = depth & ((1 << shift) - 1) | ||
| 292 | * | ||
| 293 | * Each word can be limited to sbq->min_shallow_depth bits. | ||
| 281 | */ | 294 | */ |
| 282 | wake_batch = SBQ_WAKE_BATCH; | 295 | shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); |
| 283 | if (wake_batch > depth / SBQ_WAIT_QUEUES) | 296 | depth = ((depth >> sbq->sb.shift) * shallow_depth + |
| 284 | wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); | 297 | min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); |
| 298 | wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, | ||
| 299 | SBQ_WAKE_BATCH); | ||
| 285 | 300 | ||
| 286 | return wake_batch; | 301 | return wake_batch; |
| 287 | } | 302 | } |
| @@ -307,7 +322,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, | |||
| 307 | *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; | 322 | *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; |
| 308 | } | 323 | } |
| 309 | 324 | ||
| 310 | sbq->wake_batch = sbq_calc_wake_batch(depth); | 325 | sbq->min_shallow_depth = UINT_MAX; |
| 326 | sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); | ||
| 311 | atomic_set(&sbq->wake_index, 0); | 327 | atomic_set(&sbq->wake_index, 0); |
| 312 | 328 | ||
| 313 | sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); | 329 | sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); |
| @@ -327,21 +343,28 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, | |||
| 327 | } | 343 | } |
| 328 | EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); | 344 | EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); |
| 329 | 345 | ||
| 330 | void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) | 346 | static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, |
| 347 | unsigned int depth) | ||
| 331 | { | 348 | { |
| 332 | unsigned int wake_batch = sbq_calc_wake_batch(depth); | 349 | unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); |
| 333 | int i; | 350 | int i; |
| 334 | 351 | ||
| 335 | if (sbq->wake_batch != wake_batch) { | 352 | if (sbq->wake_batch != wake_batch) { |
| 336 | WRITE_ONCE(sbq->wake_batch, wake_batch); | 353 | WRITE_ONCE(sbq->wake_batch, wake_batch); |
| 337 | /* | 354 | /* |
| 338 | * Pairs with the memory barrier in sbq_wake_up() to ensure that | 355 | * Pairs with the memory barrier in sbitmap_queue_wake_up() |
| 339 | * the batch size is updated before the wait counts. | 356 | * to ensure that the batch size is updated before the wait |
| 357 | * counts. | ||
| 340 | */ | 358 | */ |
| 341 | smp_mb__before_atomic(); | 359 | smp_mb__before_atomic(); |
| 342 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) | 360 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) |
| 343 | atomic_set(&sbq->ws[i].wait_cnt, 1); | 361 | atomic_set(&sbq->ws[i].wait_cnt, 1); |
| 344 | } | 362 | } |
| 363 | } | ||
| 364 | |||
| 365 | void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) | ||
| 366 | { | ||
| 367 | sbitmap_queue_update_wake_batch(sbq, depth); | ||
| 345 | sbitmap_resize(&sbq->sb, depth); | 368 | sbitmap_resize(&sbq->sb, depth); |
| 346 | } | 369 | } |
| 347 | EXPORT_SYMBOL_GPL(sbitmap_queue_resize); | 370 | EXPORT_SYMBOL_GPL(sbitmap_queue_resize); |
| @@ -380,6 +403,8 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, | |||
| 380 | unsigned int hint, depth; | 403 | unsigned int hint, depth; |
| 381 | int nr; | 404 | int nr; |
| 382 | 405 | ||
| 406 | WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); | ||
| 407 | |||
| 383 | hint = this_cpu_read(*sbq->alloc_hint); | 408 | hint = this_cpu_read(*sbq->alloc_hint); |
| 384 | depth = READ_ONCE(sbq->sb.depth); | 409 | depth = READ_ONCE(sbq->sb.depth); |
| 385 | if (unlikely(hint >= depth)) { | 410 | if (unlikely(hint >= depth)) { |
| @@ -403,6 +428,14 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, | |||
| 403 | } | 428 | } |
| 404 | EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); | 429 | EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); |
| 405 | 430 | ||
| 431 | void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, | ||
| 432 | unsigned int min_shallow_depth) | ||
| 433 | { | ||
| 434 | sbq->min_shallow_depth = min_shallow_depth; | ||
| 435 | sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); | ||
| 436 | } | ||
| 437 | EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); | ||
| 438 | |||
| 406 | static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) | 439 | static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) |
| 407 | { | 440 | { |
| 408 | int i, wake_index; | 441 | int i, wake_index; |
| @@ -425,52 +458,67 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) | |||
| 425 | return NULL; | 458 | return NULL; |
| 426 | } | 459 | } |
| 427 | 460 | ||
| 428 | static void sbq_wake_up(struct sbitmap_queue *sbq) | 461 | static bool __sbq_wake_up(struct sbitmap_queue *sbq) |
| 429 | { | 462 | { |
| 430 | struct sbq_wait_state *ws; | 463 | struct sbq_wait_state *ws; |
| 431 | unsigned int wake_batch; | 464 | unsigned int wake_batch; |
| 432 | int wait_cnt; | 465 | int wait_cnt; |
| 433 | 466 | ||
| 434 | /* | ||
| 435 | * Pairs with the memory barrier in set_current_state() to ensure the | ||
| 436 | * proper ordering of clear_bit()/waitqueue_active() in the waker and | ||
| 437 | * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the | ||
| 438 | * waiter. See the comment on waitqueue_active(). This is __after_atomic | ||
| 439 | * because we just did clear_bit_unlock() in the caller. | ||
| 440 | */ | ||
| 441 | smp_mb__after_atomic(); | ||
| 442 | |||
| 443 | ws = sbq_wake_ptr(sbq); | 467 | ws = sbq_wake_ptr(sbq); |
| 444 | if (!ws) | 468 | if (!ws) |
| 445 | return; | 469 | return false; |
| 446 | 470 | ||
| 447 | wait_cnt = atomic_dec_return(&ws->wait_cnt); | 471 | wait_cnt = atomic_dec_return(&ws->wait_cnt); |
| 448 | if (wait_cnt <= 0) { | 472 | if (wait_cnt <= 0) { |
| 473 | int ret; | ||
| 474 | |||
| 449 | wake_batch = READ_ONCE(sbq->wake_batch); | 475 | wake_batch = READ_ONCE(sbq->wake_batch); |
| 476 | |||
| 450 | /* | 477 | /* |
| 451 | * Pairs with the memory barrier in sbitmap_queue_resize() to | 478 | * Pairs with the memory barrier in sbitmap_queue_resize() to |
| 452 | * ensure that we see the batch size update before the wait | 479 | * ensure that we see the batch size update before the wait |
| 453 | * count is reset. | 480 | * count is reset. |
| 454 | */ | 481 | */ |
| 455 | smp_mb__before_atomic(); | 482 | smp_mb__before_atomic(); |
| 483 | |||
| 456 | /* | 484 | /* |
| 457 | * If there are concurrent callers to sbq_wake_up(), the last | 485 | * For concurrent callers of this, the one that failed the |
| 458 | * one to decrement the wait count below zero will bump it back | 486 | * atomic_cmpxhcg() race should call this function again |
| 459 | * up. If there is a concurrent resize, the count reset will | 487 | * to wakeup a new batch on a different 'ws'. |
| 460 | * either cause the cmpxchg to fail or overwrite after the | ||
| 461 | * cmpxchg. | ||
| 462 | */ | 488 | */ |
| 463 | atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); | 489 | ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); |
| 464 | sbq_index_atomic_inc(&sbq->wake_index); | 490 | if (ret == wait_cnt) { |
| 465 | wake_up_nr(&ws->wait, wake_batch); | 491 | sbq_index_atomic_inc(&sbq->wake_index); |
| 492 | wake_up_nr(&ws->wait, wake_batch); | ||
| 493 | return false; | ||
| 494 | } | ||
| 495 | |||
| 496 | return true; | ||
| 466 | } | 497 | } |
| 498 | |||
| 499 | return false; | ||
| 500 | } | ||
| 501 | |||
| 502 | void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) | ||
| 503 | { | ||
| 504 | while (__sbq_wake_up(sbq)) | ||
| 505 | ; | ||
| 467 | } | 506 | } |
| 507 | EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); | ||
| 468 | 508 | ||
| 469 | void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, | 509 | void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, |
| 470 | unsigned int cpu) | 510 | unsigned int cpu) |
| 471 | { | 511 | { |
| 472 | sbitmap_clear_bit_unlock(&sbq->sb, nr); | 512 | sbitmap_clear_bit_unlock(&sbq->sb, nr); |
| 473 | sbq_wake_up(sbq); | 513 | /* |
| 514 | * Pairs with the memory barrier in set_current_state() to ensure the | ||
| 515 | * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker | ||
| 516 | * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the | ||
| 517 | * waiter. See the comment on waitqueue_active(). | ||
| 518 | */ | ||
| 519 | smp_mb__after_atomic(); | ||
| 520 | sbitmap_queue_wake_up(sbq); | ||
| 521 | |||
| 474 | if (likely(!sbq->round_robin && nr < sbq->sb.depth)) | 522 | if (likely(!sbq->round_robin && nr < sbq->sb.depth)) |
| 475 | *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; | 523 | *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; |
| 476 | } | 524 | } |
| @@ -482,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) | |||
| 482 | 530 | ||
| 483 | /* | 531 | /* |
| 484 | * Pairs with the memory barrier in set_current_state() like in | 532 | * Pairs with the memory barrier in set_current_state() like in |
| 485 | * sbq_wake_up(). | 533 | * sbitmap_queue_wake_up(). |
| 486 | */ | 534 | */ |
| 487 | smp_mb(); | 535 | smp_mb(); |
| 488 | wake_index = atomic_read(&sbq->wake_index); | 536 | wake_index = atomic_read(&sbq->wake_index); |
| @@ -528,5 +576,6 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) | |||
| 528 | seq_puts(m, "}\n"); | 576 | seq_puts(m, "}\n"); |
| 529 | 577 | ||
| 530 | seq_printf(m, "round_robin=%d\n", sbq->round_robin); | 578 | seq_printf(m, "round_robin=%d\n", sbq->round_robin); |
| 579 | seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); | ||
| 531 | } | 580 | } |
| 532 | EXPORT_SYMBOL_GPL(sbitmap_queue_show); | 581 | EXPORT_SYMBOL_GPL(sbitmap_queue_show); |
