aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2014-04-29 13:41:29 -0400
committerMatthew Wilcox <matthew.r.wilcox@intel.com>2014-05-05 10:54:02 -0400
commit53562be74bd06bbe74d2acf3caca5398f8eeb160 (patch)
tree7e070ef357ad00df7d971d5672711e17abda2232
parenta7d2ce2832d84e0182585f63bf96ca7323b3aee7 (diff)
NVMe: Flush with data support
It is possible a filesystem may send a flush flagged bio with write data. There is no such composite NVMe command, so the driver sends flush and write separately. The device is allowed to execute these commands in any order, so it was possible the driver ends the bio after the write completes, but while the flush is still active. We don't want to let a filesystem believe flush succeeded before it really has; this could cause data corruption on a power loss between these events. To fix, this patch splits the flush and write into chained bios. Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
-rw-r--r--drivers/block/nvme-core.c44
-rw-r--r--include/linux/nvme.h1
2 files changed, 24 insertions, 21 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e7c4fdb6a651..cd8a8bc711cc 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -197,16 +197,13 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
197#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) 197#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)
198#define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) 198#define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE)
199#define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) 199#define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE)
200#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) 200#define CMD_CTX_ABORT (0x318 + CMD_CTX_BASE)
201#define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE)
202 201
203static void special_completion(struct nvme_queue *nvmeq, void *ctx, 202static void special_completion(struct nvme_queue *nvmeq, void *ctx,
204 struct nvme_completion *cqe) 203 struct nvme_completion *cqe)
205{ 204{
206 if (ctx == CMD_CTX_CANCELLED) 205 if (ctx == CMD_CTX_CANCELLED)
207 return; 206 return;
208 if (ctx == CMD_CTX_FLUSH)
209 return;
210 if (ctx == CMD_CTX_ABORT) { 207 if (ctx == CMD_CTX_ABORT) {
211 ++nvmeq->dev->abort_limit; 208 ++nvmeq->dev->abort_limit;
212 return; 209 return;
@@ -629,16 +626,6 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
629 return 0; 626 return 0;
630} 627}
631 628
632int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
633{
634 int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH,
635 special_completion, NVME_IO_TIMEOUT);
636 if (unlikely(cmdid < 0))
637 return cmdid;
638
639 return nvme_submit_flush(nvmeq, ns, cmdid);
640}
641
642static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) 629static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
643{ 630{
644 struct bio *bio = iod->private; 631 struct bio *bio = iod->private;
@@ -654,7 +641,7 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
654 641
655 if (bio->bi_rw & REQ_DISCARD) 642 if (bio->bi_rw & REQ_DISCARD)
656 return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); 643 return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
657 if ((bio->bi_rw & REQ_FLUSH) && !iod->nents) 644 if (bio->bi_rw & REQ_FLUSH)
658 return nvme_submit_flush(nvmeq, ns, cmdid); 645 return nvme_submit_flush(nvmeq, ns, cmdid);
659 646
660 control = 0; 647 control = 0;
@@ -688,6 +675,26 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
688 return 0; 675 return 0;
689} 676}
690 677
678static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio)
679{
680 struct bio *split = bio_clone(bio, GFP_ATOMIC);
681 if (!split)
682 return -ENOMEM;
683
684 split->bi_iter.bi_size = 0;
685 split->bi_phys_segments = 0;
686 bio->bi_rw &= ~REQ_FLUSH;
687 bio_chain(split, bio);
688
689 if (!waitqueue_active(&nvmeq->sq_full))
690 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
691 bio_list_add(&nvmeq->sq_cong, split);
692 bio_list_add(&nvmeq->sq_cong, bio);
693 wake_up_process(nvme_thread);
694
695 return 0;
696}
697
691/* 698/*
692 * Called with local interrupts disabled and the q_lock held. May not sleep. 699 * Called with local interrupts disabled and the q_lock held. May not sleep.
693 */ 700 */
@@ -698,11 +705,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
698 int psegs = bio_phys_segments(ns->queue, bio); 705 int psegs = bio_phys_segments(ns->queue, bio);
699 int result; 706 int result;
700 707
701 if ((bio->bi_rw & REQ_FLUSH) && psegs) { 708 if ((bio->bi_rw & REQ_FLUSH) && psegs)
702 result = nvme_submit_flush_data(nvmeq, ns); 709 return nvme_split_flush_data(nvmeq, bio);
703 if (result)
704 return result;
705 }
706 710
707 iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC); 711 iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
708 if (!iod) 712 if (!iod)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6266373d3147..1813cfdb7e80 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -156,7 +156,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
156void nvme_unmap_user_pages(struct nvme_dev *dev, int write, 156void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
157 struct nvme_iod *iod); 157 struct nvme_iod *iod);
158int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *); 158int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
159int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
160int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, 159int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
161 u32 *result); 160 u32 *result);
162int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, 161int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,