aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-19 23:56:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-19 23:56:43 -0400
commitf1d702487b3bc16466ad9b4e5c76277b6829d34c (patch)
tree30398d00626a15477645cef81507808330f9439a
parent58c72f94efb7d2f8dc918eaf43e7bbb20480fdb9 (diff)
parent86fb5c56cfa26de5e91c9a50e2767a695dff366e (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A smaller collection of fixes for the block core that would be nice to have in -rc2. This pull request contains: - Fixes for races in the wait/wakeup logic used in blk-mq from Alexander. No issues have been observed, but it is definitely a bit flakey currently. Alternatively, we may drop the cyclic wakeups going forward, but that needs more testing. - Some cleanups from Christoph. - Fix for an oops in null_blk if queue_mode=1 and softirq completions are used. From me. - A fix for a regression caused by the chunk size setting. It inadvertently used max_hw_sectors instead of max_sectors, which is incorrect, and causes hangs on btrfs multi-disk setups (where hw sectors apparently isn't set). From me. - Removal of WQ_POWER_EFFICIENT in the kblockd creation. This was a recent addition as well, but it actually breaks blk-mq which relies on strict scheduling. If the workqueue power_efficient mode is turned on, this breaks blk-mq. From Matias. - null_blk module parameter description fix from Mike" * 'for-linus' of git://git.kernel.dk/linux-block: blk-mq: bitmap tag: fix races in bt_get() function blk-mq: bitmap tag: fix race on blk_mq_bitmap_tags::wake_cnt blk-mq: bitmap tag: fix races on shared ::wake_index fields block: blk_max_size_offset() should check ->max_sectors null_blk: fix softirq completions for queue_mode == 1 blk-mq: merge blk_mq_drain_queue and __blk_mq_drain_queue blk-mq: properly drain stopped queues block: remove WQ_POWER_EFFICIENT from kblockd null_blk: fix name and description of 'queue_mode' module parameter block: remove elv_abort_queue and blk_abort_flushes
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-flush.c38
-rw-r--r--block/blk-mq-tag.c59
-rw-r--r--block/blk-mq-tag.h2
-rw-r--r--block/blk-mq.c11
-rw-r--r--block/blk.h1
-rw-r--r--block/elevator.c20
-rw-r--r--drivers/block/null_blk.c7
-rw-r--r--include/linux/blk-mq.h2
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/elevator.h1
11 files changed, 50 insertions, 96 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index f6f6b9af3e3f..6f8dba161bfe 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3312,8 +3312,7 @@ int __init blk_dev_init(void)
3312 3312
3313 /* used for unplugging and affects IO latency/throughput - HIGHPRI */ 3313 /* used for unplugging and affects IO latency/throughput - HIGHPRI */
3314 kblockd_workqueue = alloc_workqueue("kblockd", 3314 kblockd_workqueue = alloc_workqueue("kblockd",
3315 WQ_MEM_RECLAIM | WQ_HIGHPRI | 3315 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
3316 WQ_POWER_EFFICIENT, 0);
3317 if (!kblockd_workqueue) 3316 if (!kblockd_workqueue)
3318 panic("Failed to create kblockd\n"); 3317 panic("Failed to create kblockd\n");
3319 3318
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 8ffee4b5f93d..3cb5e9e7108a 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -422,44 +422,6 @@ void blk_insert_flush(struct request *rq)
422} 422}
423 423
424/** 424/**
425 * blk_abort_flushes - @q is being aborted, abort flush requests
426 * @q: request_queue being aborted
427 *
428 * To be called from elv_abort_queue(). @q is being aborted. Prepare all
429 * FLUSH/FUA requests for abortion.
430 *
431 * CONTEXT:
432 * spin_lock_irq(q->queue_lock)
433 */
434void blk_abort_flushes(struct request_queue *q)
435{
436 struct request *rq, *n;
437 int i;
438
439 /*
440 * Requests in flight for data are already owned by the dispatch
441 * queue or the device driver. Just restore for normal completion.
442 */
443 list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
444 list_del_init(&rq->flush.list);
445 blk_flush_restore_request(rq);
446 }
447
448 /*
449 * We need to give away requests on flush queues. Restore for
450 * normal completion and put them on the dispatch queue.
451 */
452 for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) {
453 list_for_each_entry_safe(rq, n, &q->flush_queue[i],
454 flush.list) {
455 list_del_init(&rq->flush.list);
456 blk_flush_restore_request(rq);
457 list_add_tail(&rq->queuelist, &q->queue_head);
458 }
459 }
460}
461
462/**
463 * blkdev_issue_flush - queue a flush 425 * blkdev_issue_flush - queue a flush
464 * @bdev: blockdev to issue flush for 426 * @bdev: blockdev to issue flush for
465 * @gfp_mask: memory allocation flags (for bio_alloc) 427 * @gfp_mask: memory allocation flags (for bio_alloc)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1aab39f71d95..c1b92426c95e 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -43,9 +43,16 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
43 return bt_has_free_tags(&tags->bitmap_tags); 43 return bt_has_free_tags(&tags->bitmap_tags);
44} 44}
45 45
46static inline void bt_index_inc(unsigned int *index) 46static inline int bt_index_inc(int index)
47{ 47{
48 *index = (*index + 1) & (BT_WAIT_QUEUES - 1); 48 return (index + 1) & (BT_WAIT_QUEUES - 1);
49}
50
51static inline void bt_index_atomic_inc(atomic_t *index)
52{
53 int old = atomic_read(index);
54 int new = bt_index_inc(old);
55 atomic_cmpxchg(index, old, new);
49} 56}
50 57
51/* 58/*
@@ -69,14 +76,14 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
69 int i, wake_index; 76 int i, wake_index;
70 77
71 bt = &tags->bitmap_tags; 78 bt = &tags->bitmap_tags;
72 wake_index = bt->wake_index; 79 wake_index = atomic_read(&bt->wake_index);
73 for (i = 0; i < BT_WAIT_QUEUES; i++) { 80 for (i = 0; i < BT_WAIT_QUEUES; i++) {
74 struct bt_wait_state *bs = &bt->bs[wake_index]; 81 struct bt_wait_state *bs = &bt->bs[wake_index];
75 82
76 if (waitqueue_active(&bs->wait)) 83 if (waitqueue_active(&bs->wait))
77 wake_up(&bs->wait); 84 wake_up(&bs->wait);
78 85
79 bt_index_inc(&wake_index); 86 wake_index = bt_index_inc(wake_index);
80 } 87 }
81} 88}
82 89
@@ -212,12 +219,14 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
212 struct blk_mq_hw_ctx *hctx) 219 struct blk_mq_hw_ctx *hctx)
213{ 220{
214 struct bt_wait_state *bs; 221 struct bt_wait_state *bs;
222 int wait_index;
215 223
216 if (!hctx) 224 if (!hctx)
217 return &bt->bs[0]; 225 return &bt->bs[0];
218 226
219 bs = &bt->bs[hctx->wait_index]; 227 wait_index = atomic_read(&hctx->wait_index);
220 bt_index_inc(&hctx->wait_index); 228 bs = &bt->bs[wait_index];
229 bt_index_atomic_inc(&hctx->wait_index);
221 return bs; 230 return bs;
222} 231}
223 232
@@ -239,18 +248,12 @@ static int bt_get(struct blk_mq_alloc_data *data,
239 248
240 bs = bt_wait_ptr(bt, hctx); 249 bs = bt_wait_ptr(bt, hctx);
241 do { 250 do {
242 bool was_empty;
243
244 was_empty = list_empty(&wait.task_list);
245 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); 251 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
246 252
247 tag = __bt_get(hctx, bt, last_tag); 253 tag = __bt_get(hctx, bt, last_tag);
248 if (tag != -1) 254 if (tag != -1)
249 break; 255 break;
250 256
251 if (was_empty)
252 atomic_set(&bs->wait_cnt, bt->wake_cnt);
253
254 blk_mq_put_ctx(data->ctx); 257 blk_mq_put_ctx(data->ctx);
255 258
256 io_schedule(); 259 io_schedule();
@@ -313,18 +316,19 @@ static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
313{ 316{
314 int i, wake_index; 317 int i, wake_index;
315 318
316 wake_index = bt->wake_index; 319 wake_index = atomic_read(&bt->wake_index);
317 for (i = 0; i < BT_WAIT_QUEUES; i++) { 320 for (i = 0; i < BT_WAIT_QUEUES; i++) {
318 struct bt_wait_state *bs = &bt->bs[wake_index]; 321 struct bt_wait_state *bs = &bt->bs[wake_index];
319 322
320 if (waitqueue_active(&bs->wait)) { 323 if (waitqueue_active(&bs->wait)) {
321 if (wake_index != bt->wake_index) 324 int o = atomic_read(&bt->wake_index);
322 bt->wake_index = wake_index; 325 if (wake_index != o)
326 atomic_cmpxchg(&bt->wake_index, o, wake_index);
323 327
324 return bs; 328 return bs;
325 } 329 }
326 330
327 bt_index_inc(&wake_index); 331 wake_index = bt_index_inc(wake_index);
328 } 332 }
329 333
330 return NULL; 334 return NULL;
@@ -334,6 +338,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
334{ 338{
335 const int index = TAG_TO_INDEX(bt, tag); 339 const int index = TAG_TO_INDEX(bt, tag);
336 struct bt_wait_state *bs; 340 struct bt_wait_state *bs;
341 int wait_cnt;
337 342
338 /* 343 /*
339 * The unlock memory barrier need to order access to req in free 344 * The unlock memory barrier need to order access to req in free
@@ -342,10 +347,19 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
342 clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); 347 clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word);
343 348
344 bs = bt_wake_ptr(bt); 349 bs = bt_wake_ptr(bt);
345 if (bs && atomic_dec_and_test(&bs->wait_cnt)) { 350 if (!bs)
346 atomic_set(&bs->wait_cnt, bt->wake_cnt); 351 return;
347 bt_index_inc(&bt->wake_index); 352
353 wait_cnt = atomic_dec_return(&bs->wait_cnt);
354 if (wait_cnt == 0) {
355wake:
356 atomic_add(bt->wake_cnt, &bs->wait_cnt);
357 bt_index_atomic_inc(&bt->wake_index);
348 wake_up(&bs->wait); 358 wake_up(&bs->wait);
359 } else if (wait_cnt < 0) {
360 wait_cnt = atomic_inc_return(&bs->wait_cnt);
361 if (!wait_cnt)
362 goto wake;
349 } 363 }
350} 364}
351 365
@@ -499,10 +513,13 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
499 return -ENOMEM; 513 return -ENOMEM;
500 } 514 }
501 515
502 for (i = 0; i < BT_WAIT_QUEUES; i++) 516 bt_update_count(bt, depth);
517
518 for (i = 0; i < BT_WAIT_QUEUES; i++) {
503 init_waitqueue_head(&bt->bs[i].wait); 519 init_waitqueue_head(&bt->bs[i].wait);
520 atomic_set(&bt->bs[i].wait_cnt, bt->wake_cnt);
521 }
504 522
505 bt_update_count(bt, depth);
506 return 0; 523 return 0;
507} 524}
508 525
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 98696a65d4d4..6206ed17ef76 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -24,7 +24,7 @@ struct blk_mq_bitmap_tags {
24 unsigned int map_nr; 24 unsigned int map_nr;
25 struct blk_align_bitmap *map; 25 struct blk_align_bitmap *map;
26 26
27 unsigned int wake_index; 27 atomic_t wake_index;
28 struct bt_wait_state *bs; 28 struct bt_wait_state *bs;
29}; 29};
30 30
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e11f5f8e0313..0ef2dc7f01bf 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -109,7 +109,7 @@ static void blk_mq_queue_exit(struct request_queue *q)
109 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); 109 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
110} 110}
111 111
112static void __blk_mq_drain_queue(struct request_queue *q) 112void blk_mq_drain_queue(struct request_queue *q)
113{ 113{
114 while (true) { 114 while (true) {
115 s64 count; 115 s64 count;
@@ -120,7 +120,7 @@ static void __blk_mq_drain_queue(struct request_queue *q)
120 120
121 if (count == 0) 121 if (count == 0)
122 break; 122 break;
123 blk_mq_run_queues(q, false); 123 blk_mq_start_hw_queues(q);
124 msleep(10); 124 msleep(10);
125 } 125 }
126} 126}
@@ -139,12 +139,7 @@ static void blk_mq_freeze_queue(struct request_queue *q)
139 spin_unlock_irq(q->queue_lock); 139 spin_unlock_irq(q->queue_lock);
140 140
141 if (drain) 141 if (drain)
142 __blk_mq_drain_queue(q); 142 blk_mq_drain_queue(q);
143}
144
145void blk_mq_drain_queue(struct request_queue *q)
146{
147 __blk_mq_drain_queue(q);
148} 143}
149 144
150static void blk_mq_unfreeze_queue(struct request_queue *q) 145static void blk_mq_unfreeze_queue(struct request_queue *q)
diff --git a/block/blk.h b/block/blk.h
index 45385e9abf6f..6748c4f8d7a1 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -84,7 +84,6 @@ static inline void blk_clear_rq_complete(struct request *rq)
84#define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED) 84#define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED)
85 85
86void blk_insert_flush(struct request *rq); 86void blk_insert_flush(struct request *rq);
87void blk_abort_flushes(struct request_queue *q);
88 87
89static inline struct request *__elv_next_request(struct request_queue *q) 88static inline struct request *__elv_next_request(struct request_queue *q)
90{ 89{
diff --git a/block/elevator.c b/block/elevator.c
index f35edddfe9b5..34bded18910e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -729,26 +729,6 @@ int elv_may_queue(struct request_queue *q, int rw)
729 return ELV_MQUEUE_MAY; 729 return ELV_MQUEUE_MAY;
730} 730}
731 731
732void elv_abort_queue(struct request_queue *q)
733{
734 struct request *rq;
735
736 blk_abort_flushes(q);
737
738 while (!list_empty(&q->queue_head)) {
739 rq = list_entry_rq(q->queue_head.next);
740 rq->cmd_flags |= REQ_QUIET;
741 trace_block_rq_abort(q, rq);
742 /*
743 * Mark this request as started so we don't trigger
744 * any debug logic in the end I/O path.
745 */
746 blk_start_request(rq);
747 __blk_end_request_all(rq, -EIO);
748 }
749}
750EXPORT_SYMBOL(elv_abort_queue);
751
752void elv_completed_request(struct request_queue *q, struct request *rq) 732void elv_completed_request(struct request_queue *q, struct request *rq)
753{ 733{
754 struct elevator_queue *e = q->elevator; 734 struct elevator_queue *e = q->elevator;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 77087a29b127..a3b042c4d448 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -79,7 +79,7 @@ MODULE_PARM_DESC(home_node, "Home node for the device");
79 79
80static int queue_mode = NULL_Q_MQ; 80static int queue_mode = NULL_Q_MQ;
81module_param(queue_mode, int, S_IRUGO); 81module_param(queue_mode, int, S_IRUGO);
82MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)"); 82MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
83 83
84static int gb = 250; 84static int gb = 250;
85module_param(gb, int, S_IRUGO); 85module_param(gb, int, S_IRUGO);
@@ -227,7 +227,10 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
227 227
228static void null_softirq_done_fn(struct request *rq) 228static void null_softirq_done_fn(struct request *rq)
229{ 229{
230 end_cmd(blk_mq_rq_to_pdu(rq)); 230 if (queue_mode == NULL_Q_MQ)
231 end_cmd(blk_mq_rq_to_pdu(rq));
232 else
233 end_cmd(rq->special);
231} 234}
232 235
233static inline void null_handle_cmd(struct nullb_cmd *cmd) 236static inline void null_handle_cmd(struct nullb_cmd *cmd)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a002cf191427..eb726b9c5762 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -42,7 +42,7 @@ struct blk_mq_hw_ctx {
42 unsigned int nr_ctx; 42 unsigned int nr_ctx;
43 struct blk_mq_ctx **ctxs; 43 struct blk_mq_ctx **ctxs;
44 44
45 unsigned int wait_index; 45 atomic_t wait_index;
46 46
47 struct blk_mq_tags *tags; 47 struct blk_mq_tags *tags;
48 48
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 31e11051f1ba..713f8b62b435 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -920,7 +920,7 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
920 sector_t offset) 920 sector_t offset)
921{ 921{
922 if (!q->limits.chunk_sectors) 922 if (!q->limits.chunk_sectors)
923 return q->limits.max_hw_sectors; 923 return q->limits.max_sectors;
924 924
925 return q->limits.chunk_sectors - 925 return q->limits.chunk_sectors -
926 (offset & (q->limits.chunk_sectors - 1)); 926 (offset & (q->limits.chunk_sectors - 1));
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4ff262e2bf37..e2a6bd7fb133 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -133,7 +133,6 @@ extern struct request *elv_latter_request(struct request_queue *, struct request
133extern int elv_register_queue(struct request_queue *q); 133extern int elv_register_queue(struct request_queue *q);
134extern void elv_unregister_queue(struct request_queue *q); 134extern void elv_unregister_queue(struct request_queue *q);
135extern int elv_may_queue(struct request_queue *, int); 135extern int elv_may_queue(struct request_queue *, int);
136extern void elv_abort_queue(struct request_queue *);
137extern void elv_completed_request(struct request_queue *, struct request *); 136extern void elv_completed_request(struct request_queue *, struct request *);
138extern int elv_set_request(struct request_queue *q, struct request *rq, 137extern int elv_set_request(struct request_queue *q, struct request *rq,
139 struct bio *bio, gfp_t gfp_mask); 138 struct bio *bio, gfp_t gfp_mask);