diff options
author | Jens Axboe <axboe@fb.com> | 2015-01-14 10:49:55 -0500 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2015-01-14 10:49:55 -0500 |
commit | 0bf364984c4a799f75414de009ecd579d6d35a21 (patch) | |
tree | 639f8d48aa1425843523ac6ace08cfd821eb9446 /block | |
parent | dd22f551ac0ad366f92f601835f6623b83adc331 (diff) |
blk-mq: fix false negative out-of-tags condition
The blk-mq tagging tries to maintain some locality between CPUs and
the tags issued. The tags are split into groups of words, and the
words may not be fully populated. When searching for a new free tag,
blk-mq may look at partial words, hence it passes in an offset/size
to find_next_zero_bit(). However, it does that wrong, the size must
always be the full length of the number of tags in that word,
otherwise we'll potentially miss some near the end.
Another issue is when __bt_get() goes from one word set to the next.
It bumps the index, but not the last_tag associated with the
previous index. Bump that to be in the range of the new word.
Finally, clean up __bt_get() and __bt_get_word() a bit and get
rid of the goto in there, and the unnecessary 'wrap' variable.
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-mq-tag.c | 43 |
1 files changed, 26 insertions, 17 deletions
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 60c9d4a93fe4..d4daee385a23 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -142,29 +142,30 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, | |||
142 | 142 | ||
143 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) | 143 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) |
144 | { | 144 | { |
145 | int tag, org_last_tag, end; | 145 | int tag, org_last_tag = last_tag; |
146 | bool wrap = last_tag != 0; | ||
147 | 146 | ||
148 | org_last_tag = last_tag; | 147 | while (1) { |
149 | end = bm->depth; | 148 | tag = find_next_zero_bit(&bm->word, bm->depth, last_tag); |
150 | do { | 149 | if (unlikely(tag >= bm->depth)) { |
151 | restart: | ||
152 | tag = find_next_zero_bit(&bm->word, end, last_tag); | ||
153 | if (unlikely(tag >= end)) { | ||
154 | /* | 150 | /* |
155 | * We started with an offset, start from 0 to | 151 | * We started with an offset, and we didn't reset the |
152 | * offset to 0 in a failure case, so start from 0 to | ||
156 | * exhaust the map. | 153 | * exhaust the map. |
157 | */ | 154 | */ |
158 | if (wrap) { | 155 | if (org_last_tag && last_tag) { |
159 | wrap = false; | 156 | last_tag = org_last_tag = 0; |
160 | end = org_last_tag; | 157 | continue; |
161 | last_tag = 0; | ||
162 | goto restart; | ||
163 | } | 158 | } |
164 | return -1; | 159 | return -1; |
165 | } | 160 | } |
161 | |||
162 | if (!test_and_set_bit(tag, &bm->word)) | ||
163 | break; | ||
164 | |||
166 | last_tag = tag + 1; | 165 | last_tag = tag + 1; |
167 | } while (test_and_set_bit(tag, &bm->word)); | 166 | if (last_tag >= bm->depth - 1) |
167 | last_tag = 0; | ||
168 | } | ||
168 | 169 | ||
169 | return tag; | 170 | return tag; |
170 | } | 171 | } |
@@ -199,9 +200,17 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, | |||
199 | goto done; | 200 | goto done; |
200 | } | 201 | } |
201 | 202 | ||
202 | last_tag = 0; | 203 | /* |
203 | if (++index >= bt->map_nr) | 204 | * Jump to next index, and reset the last tag to be the |
205 | * first tag of that index | ||
206 | */ | ||
207 | index++; | ||
208 | last_tag = (index << bt->bits_per_word); | ||
209 | |||
210 | if (index >= bt->map_nr) { | ||
204 | index = 0; | 211 | index = 0; |
212 | last_tag = 0; | ||
213 | } | ||
205 | } | 214 | } |
206 | 215 | ||
207 | *tag_cache = 0; | 216 | *tag_cache = 0; |