diff options
author | Alexander Gordeev <agordeev@redhat.com> | 2014-06-12 11:05:37 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-06-18 01:13:05 -0400 |
commit | 2971c35f35886b87af54675313a2afef937c1b0c (patch) | |
tree | 3a812e2286298ca05f764e93c777a80adc310d40 | |
parent | 8537b12034cf1fd3fab3da2c859d71f76846fae9 (diff) |
blk-mq: bitmap tag: fix race on blk_mq_bitmap_tags::wake_cnt
This piece of code in bt_clear_tag() function is racy:
bs = bt_wake_ptr(bt);
if (bs && atomic_dec_and_test(&bs->wait_cnt)) {
atomic_set(&bs->wait_cnt, bt->wake_cnt);
wake_up(&bs->wait);
}
Since nothing prevents bt_wake_ptr() from returning the very
same 'bs' address on multiple CPUs, the following scenario is
possible:
CPU1 CPU2
---- ----
0. bs = bt_wake_ptr(bt); bs = bt_wake_ptr(bt);
1. atomic_dec_and_test(&bs->wait_cnt)
2. atomic_dec_and_test(&bs->wait_cnt)
3. atomic_set(&bs->wait_cnt, bt->wake_cnt);
If the decrement in [1] yields zero then for some amount of time
the decrement in [2] results in a negative/overflow value, which
is not expected. The follow-up assignment in [3] overwrites the
invalid value with the batch value (and likely prevents the issue
from being severe) which is still incorrect and should be a lesser.
Cc: Ming Lei <tom.leiming@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | block/blk-mq-tag.c | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 6deb13055490..08fc6716d362 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -344,6 +344,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) | |||
344 | { | 344 | { |
345 | const int index = TAG_TO_INDEX(bt, tag); | 345 | const int index = TAG_TO_INDEX(bt, tag); |
346 | struct bt_wait_state *bs; | 346 | struct bt_wait_state *bs; |
347 | int wait_cnt; | ||
347 | 348 | ||
348 | /* | 349 | /* |
349 | * The unlock memory barrier need to order access to req in free | 350 | * The unlock memory barrier need to order access to req in free |
@@ -352,10 +353,19 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) | |||
352 | clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); | 353 | clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); |
353 | 354 | ||
354 | bs = bt_wake_ptr(bt); | 355 | bs = bt_wake_ptr(bt); |
355 | if (bs && atomic_dec_and_test(&bs->wait_cnt)) { | 356 | if (!bs) |
356 | atomic_set(&bs->wait_cnt, bt->wake_cnt); | 357 | return; |
358 | |||
359 | wait_cnt = atomic_dec_return(&bs->wait_cnt); | ||
360 | if (wait_cnt == 0) { | ||
361 | wake: | ||
362 | atomic_add(bt->wake_cnt, &bs->wait_cnt); | ||
357 | bt_index_atomic_inc(&bt->wake_index); | 363 | bt_index_atomic_inc(&bt->wake_index); |
358 | wake_up(&bs->wait); | 364 | wake_up(&bs->wait); |
365 | } else if (wait_cnt < 0) { | ||
366 | wait_cnt = atomic_inc_return(&bs->wait_cnt); | ||
367 | if (!wait_cnt) | ||
368 | goto wake; | ||
359 | } | 369 | } |
360 | } | 370 | } |
361 | 371 | ||