diff options
author | Jens Axboe <axboe@kernel.dk> | 2018-02-01 16:01:02 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2018-02-01 16:01:02 -0500 |
commit | 445251d0f4d329aa061f323546cd6388a3bb7ab5 (patch) | |
tree | b211ab115777e57b9b85d00257976ff7554b8441 | |
parent | 86ff7c2a80cd357f6156a53b354f6a0b357dc0c9 (diff) |
blk-mq: fix discard merge with scheduler attached
I ran into an issue on my laptop that triggered a bug on the
discard path:
WARNING: CPU: 2 PID: 207 at drivers/nvme/host/core.c:527 nvme_setup_cmd+0x3d3/0x430
Modules linked in: rfcomm fuse ctr ccm bnep arc4 binfmt_misc snd_hda_codec_hdmi nls_iso8859_1 nls_cp437 vfat snd_hda_codec_conexant fat snd_hda_codec_generic iwlmvm snd_hda_intel snd_hda_codec snd_hwdep mac80211 snd_hda_core snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq x86_pkg_temp_thermal intel_powerclamp kvm_intel uvcvideo iwlwifi btusb snd_seq_device videobuf2_vmalloc btintel videobuf2_memops kvm snd_timer videobuf2_v4l2 bluetooth irqbypass videobuf2_core aesni_intel aes_x86_64 crypto_simd cryptd snd glue_helper videodev cfg80211 ecdh_generic soundcore hid_generic usbhid hid i915 psmouse e1000e ptp pps_core xhci_pci xhci_hcd intel_gtt
CPU: 2 PID: 207 Comm: jbd2/nvme0n1p7- Tainted: G U 4.15.0+ #176
Hardware name: LENOVO 20FBCTO1WW/20FBCTO1WW, BIOS N1FET59W (1.33 ) 12/19/2017
RIP: 0010:nvme_setup_cmd+0x3d3/0x430
RSP: 0018:ffff880423e9f838 EFLAGS: 00010217
RAX: 0000000000000000 RBX: ffff880423e9f8c8 RCX: 0000000000010000
RDX: ffff88022b200010 RSI: 0000000000000002 RDI: 00000000327f0000
RBP: ffff880421251400 R08: ffff88022b200000 R09: 0000000000000009
R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000ffff
R13: ffff88042341e280 R14: 000000000000ffff R15: ffff880421251440
FS: 0000000000000000(0000) GS:ffff880441500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055b684795030 CR3: 0000000002e09006 CR4: 00000000001606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
nvme_queue_rq+0x40/0xa00
? __sbitmap_queue_get+0x24/0x90
? blk_mq_get_tag+0xa3/0x250
? wait_woken+0x80/0x80
? blk_mq_get_driver_tag+0x97/0xf0
blk_mq_dispatch_rq_list+0x7b/0x4a0
? deadline_remove_request+0x49/0xb0
blk_mq_do_dispatch_sched+0x4f/0xc0
blk_mq_sched_dispatch_requests+0x106/0x170
__blk_mq_run_hw_queue+0x53/0xa0
__blk_mq_delay_run_hw_queue+0x83/0xa0
blk_mq_run_hw_queue+0x6c/0xd0
blk_mq_sched_insert_request+0x96/0x140
__blk_mq_try_issue_directly+0x3d/0x190
blk_mq_try_issue_directly+0x30/0x70
blk_mq_make_request+0x1a4/0x6a0
generic_make_request+0xfd/0x2f0
? submit_bio+0x5c/0x110
submit_bio+0x5c/0x110
? __blkdev_issue_discard+0x152/0x200
submit_bio_wait+0x43/0x60
ext4_process_freed_data+0x1cd/0x440
? account_page_dirtied+0xe2/0x1a0
ext4_journal_commit_callback+0x4a/0xc0
jbd2_journal_commit_transaction+0x17e2/0x19e0
? kjournald2+0xb0/0x250
kjournald2+0xb0/0x250
? wait_woken+0x80/0x80
? commit_timeout+0x10/0x10
kthread+0x111/0x130
? kthread_create_worker_on_cpu+0x50/0x50
? do_group_exit+0x3a/0xa0
ret_from_fork+0x1f/0x30
Code: 73 89 c1 83 ce 10 c1 e1 10 09 ca 83 f8 04 0f 87 0f ff ff ff 8b 4d 20 48 8b 7d 00 c1 e9 09 48 01 8c c7 00 08 00 00 e9 f8 fe ff ff <0f> ff 4c 89 c7 41 bc 0a 00 00 00 e8 0d 78 d6 ff e9 a1 fc ff ff
---[ end trace 50d361cc444506c8 ]---
print_req_error: I/O error, dev nvme0n1, sector 847167488
Decoding the assembly, the request claims to have 0xffff segments,
while nvme counts two. This turns out to be because we don't check
for a data carrying request on the mq scheduler path, and since
blk_phys_contig_segment() returns true for a non-data request,
we decrement the initial segment count of 0 and end up with
0xffff in the unsigned short.
There are a few issues here:
1) We should initialize the segment count for a discard to 1.
2) The discard merging is currently using the data limits for
segments and sectors.
Fix this up by having attempt_merge() correctly identify the
request, and by initializing the segment count correctly
for discards.
This can only be triggered with mq-deadline on discard capable
devices right now, which isn't a common configuration.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/blk-core.c | 2 | ||||
-rw-r--r-- | block/blk-merge.c | 29 |
2 files changed, 28 insertions, 3 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 134fd34b681f..d0d104268f1a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -3283,6 +3283,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | |||
3283 | { | 3283 | { |
3284 | if (bio_has_data(bio)) | 3284 | if (bio_has_data(bio)) |
3285 | rq->nr_phys_segments = bio_phys_segments(q, bio); | 3285 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
3286 | else if (bio_op(bio) == REQ_OP_DISCARD) | ||
3287 | rq->nr_phys_segments = 1; | ||
3286 | 3288 | ||
3287 | rq->__data_len = bio->bi_iter.bi_size; | 3289 | rq->__data_len = bio->bi_iter.bi_size; |
3288 | rq->bio = rq->biotail = bio; | 3290 | rq->bio = rq->biotail = bio; |
diff --git a/block/blk-merge.c b/block/blk-merge.c index 8452fc7164cc..782940c65d8a 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -550,6 +550,24 @@ static bool req_no_special_merge(struct request *req) | |||
550 | return !q->mq_ops && req->special; | 550 | return !q->mq_ops && req->special; |
551 | } | 551 | } |
552 | 552 | ||
553 | static bool req_attempt_discard_merge(struct request_queue *q, struct request *req, | ||
554 | struct request *next) | ||
555 | { | ||
556 | unsigned short segments = blk_rq_nr_discard_segments(req); | ||
557 | |||
558 | if (segments >= queue_max_discard_segments(q)) | ||
559 | goto no_merge; | ||
560 | if (blk_rq_sectors(req) + bio_sectors(next->bio) > | ||
561 | blk_rq_get_max_sectors(req, blk_rq_pos(req))) | ||
562 | goto no_merge; | ||
563 | |||
564 | req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next); | ||
565 | return true; | ||
566 | no_merge: | ||
567 | req_set_nomerge(q, req); | ||
568 | return false; | ||
569 | } | ||
570 | |||
553 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | 571 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, |
554 | struct request *next) | 572 | struct request *next) |
555 | { | 573 | { |
@@ -683,9 +701,13 @@ static struct request *attempt_merge(struct request_queue *q, | |||
683 | * If we are allowed to merge, then append bio list | 701 | * If we are allowed to merge, then append bio list |
684 | * from next to rq and release next. merge_requests_fn | 702 | * from next to rq and release next. merge_requests_fn |
685 | * will have updated segment counts, update sector | 703 | * will have updated segment counts, update sector |
686 | * counts here. | 704 | * counts here. Handle DISCARDs separately, as they |
705 | * have separate settings. | ||
687 | */ | 706 | */ |
688 | if (!ll_merge_requests_fn(q, req, next)) | 707 | if (req_op(req) == REQ_OP_DISCARD) { |
708 | if (!req_attempt_discard_merge(q, req, next)) | ||
709 | return NULL; | ||
710 | } else if (!ll_merge_requests_fn(q, req, next)) | ||
689 | return NULL; | 711 | return NULL; |
690 | 712 | ||
691 | /* | 713 | /* |
@@ -715,7 +737,8 @@ static struct request *attempt_merge(struct request_queue *q, | |||
715 | 737 | ||
716 | req->__data_len += blk_rq_bytes(next); | 738 | req->__data_len += blk_rq_bytes(next); |
717 | 739 | ||
718 | elv_merge_requests(q, req, next); | 740 | if (req_op(req) != REQ_OP_DISCARD) |
741 | elv_merge_requests(q, req, next); | ||
719 | 742 | ||
720 | /* | 743 | /* |
721 | * 'next' is going away, so update stats accordingly | 744 | * 'next' is going away, so update stats accordingly |