summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2018-05-24 13:00:39 -0400
committerJens Axboe <axboe@kernel.dk>2018-05-24 13:00:39 -0400
commite6fc46498784e799d3eb95d83079180e413c4e7d (patch)
tree63876dd6517d7d170d90d71441392ebdcdff3ea6
parentf183464684190bacbfb14623bd3e4e51b7575b4c (diff)
blk-mq: avoid starving tag allocation after allocating process migrates
When the allocation process is scheduled back and the mapped hw queue is changed, fake one extra wake up on previous queue for compensating wake up miss, so other allocations on the previous queue won't be starved. This patch fixes one request allocation hang issue, which can be triggered easily in case of very low nr_request. The race is as follows: 1) 2 hw queues, nr_requests are 2, and wake_batch is one 2) there are 3 waiters on hw queue 0 3) two in-flight requests in hw queue 0 are completed, and only two waiters of 3 are waken up because of wake_batch, but both the two waiters can be scheduled to another CPU and cause to switch to hw queue 1 4) then the 3rd waiter will wait for ever, since no in-flight request is in hw queue 0 any more. 5) this patch fixes it by the fake wakeup when waiter is scheduled to another hw queue Cc: <stable@vger.kernel.org> Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Modified commit message to make it clearer, and make it apply on top of the 4.18 branch. Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-mq-tag.c12
-rw-r--r--include/linux/sbitmap.h7
-rw-r--r--lib/sbitmap.c29
3 files changed, 34 insertions, 14 deletions
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 336dde07b230..a4e58fc28a06 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -134,6 +134,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
134 ws = bt_wait_ptr(bt, data->hctx); 134 ws = bt_wait_ptr(bt, data->hctx);
135 drop_ctx = data->ctx == NULL; 135 drop_ctx = data->ctx == NULL;
136 do { 136 do {
137 struct sbitmap_queue *bt_prev;
138
137 /* 139 /*
138 * We're out of tags on this hardware queue, kick any 140 * We're out of tags on this hardware queue, kick any
139 * pending IO submits before going to sleep waiting for 141 * pending IO submits before going to sleep waiting for
@@ -159,6 +161,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
159 if (data->ctx) 161 if (data->ctx)
160 blk_mq_put_ctx(data->ctx); 162 blk_mq_put_ctx(data->ctx);
161 163
164 bt_prev = bt;
162 io_schedule(); 165 io_schedule();
163 166
164 data->ctx = blk_mq_get_ctx(data->q); 167 data->ctx = blk_mq_get_ctx(data->q);
@@ -170,6 +173,15 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
170 bt = &tags->bitmap_tags; 173 bt = &tags->bitmap_tags;
171 174
172 finish_wait(&ws->wait, &wait); 175 finish_wait(&ws->wait, &wait);
176
177 /*
178 * If destination hw queue is changed, fake wake up on
179 * previous queue for compensating the wake up miss, so
180 * other allocations on previous queue won't be starved.
181 */
182 if (bt != bt_prev)
183 sbitmap_queue_wake_up(bt_prev);
184
173 ws = bt_wait_ptr(bt, data->hctx); 185 ws = bt_wait_ptr(bt, data->hctx);
174 } while (1); 186 } while (1);
175 187
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 0c4a9c242dd7..e6539536dea9 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -513,6 +513,13 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
513void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); 513void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
514 514
515/** 515/**
516 * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue
517 * on a &struct sbitmap_queue.
518 * @sbq: Bitmap queue to wake up.
519 */
520void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
521
522/**
516 * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct 523 * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
517 * seq_file. 524 * seq_file.
518 * @sbq: Bitmap queue to show. 525 * @sbq: Bitmap queue to show.
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index e6d7d610778d..6fdc6267f4a8 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -352,8 +352,9 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
352 if (sbq->wake_batch != wake_batch) { 352 if (sbq->wake_batch != wake_batch) {
353 WRITE_ONCE(sbq->wake_batch, wake_batch); 353 WRITE_ONCE(sbq->wake_batch, wake_batch);
354 /* 354 /*
355 * Pairs with the memory barrier in sbq_wake_up() to ensure that 355 * Pairs with the memory barrier in sbitmap_queue_wake_up()
356 * the batch size is updated before the wait counts. 356 * to ensure that the batch size is updated before the wait
357 * counts.
357 */ 358 */
358 smp_mb__before_atomic(); 359 smp_mb__before_atomic();
359 for (i = 0; i < SBQ_WAIT_QUEUES; i++) 360 for (i = 0; i < SBQ_WAIT_QUEUES; i++)
@@ -463,15 +464,6 @@ static bool __sbq_wake_up(struct sbitmap_queue *sbq)
463 unsigned int wake_batch; 464 unsigned int wake_batch;
464 int wait_cnt; 465 int wait_cnt;
465 466
466 /*
467 * Pairs with the memory barrier in set_current_state() to ensure the
468 * proper ordering of clear_bit()/waitqueue_active() in the waker and
469 * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
470 * waiter. See the comment on waitqueue_active(). This is __after_atomic
471 * because we just did clear_bit_unlock() in the caller.
472 */
473 smp_mb__after_atomic();
474
475 ws = sbq_wake_ptr(sbq); 467 ws = sbq_wake_ptr(sbq);
476 if (!ws) 468 if (!ws)
477 return false; 469 return false;
@@ -507,17 +499,26 @@ static bool __sbq_wake_up(struct sbitmap_queue *sbq)
507 return false; 499 return false;
508} 500}
509 501
510static void sbq_wake_up(struct sbitmap_queue *sbq) 502void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
511{ 503{
512 while (__sbq_wake_up(sbq)) 504 while (__sbq_wake_up(sbq))
513 ; 505 ;
514} 506}
507EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
515 508
516void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 509void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
517 unsigned int cpu) 510 unsigned int cpu)
518{ 511{
519 sbitmap_clear_bit_unlock(&sbq->sb, nr); 512 sbitmap_clear_bit_unlock(&sbq->sb, nr);
520 sbq_wake_up(sbq); 513 /*
514 * Pairs with the memory barrier in set_current_state() to ensure the
515 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
516 * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
517 * waiter. See the comment on waitqueue_active().
518 */
519 smp_mb__after_atomic();
520 sbitmap_queue_wake_up(sbq);
521
521 if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 522 if (likely(!sbq->round_robin && nr < sbq->sb.depth))
522 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 523 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
523} 524}
@@ -529,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
529 530
530 /* 531 /*
531 * Pairs with the memory barrier in set_current_state() like in 532 * Pairs with the memory barrier in set_current_state() like in
532 * sbq_wake_up(). 533 * sbitmap_queue_wake_up().
533 */ 534 */
534 smp_mb(); 535 smp_mb();
535 wake_index = atomic_read(&sbq->wake_index); 536 wake_index = atomic_read(&sbq->wake_index);